Merge branch 'params' of github.com:SheffieldML/GPy into params

This commit is contained in:
Ricardo 2014-03-17 10:30:55 +00:00
commit 607ed98e51
8 changed files with 65 additions and 32 deletions

View file

@ -42,7 +42,10 @@ class GP(Model):
assert Y.shape[0] == self.num_data assert Y.shape[0] == self.num_data
_, self.output_dim = self.Y.shape _, self.output_dim = self.Y.shape
self.Y_metadata = Y_metadata or {} if Y_metadata is None:
Y_metadata = {}
else:
self.Y_metadata = Y_metadata
assert isinstance(kernel, kern.Kern) assert isinstance(kernel, kern.Kern)
#assert self.input_dim == kernel.input_dim #assert self.input_dim == kernel.input_dim

View file

@ -3,6 +3,7 @@
from posterior import Posterior from posterior import Posterior
from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv
from ...util import diag
import numpy as np import numpy as np
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
@ -14,8 +15,7 @@ class FITC(object):
the posterior. the posterior.
""" """
def __init__(self): const_jitter = 1e-6
self.const_jitter = 1e-6
def inference(self, kern, X, Z, likelihood, Y): def inference(self, kern, X, Z, likelihood, Y):
@ -33,6 +33,7 @@ class FITC(object):
U = Knm U = Knm
#factor Kmm #factor Kmm
diag.add(Kmm, self.const_jitter)
Kmmi, L, Li, _ = pdinv(Kmm) Kmmi, L, Li, _ = pdinv(Kmm)
#compute beta_star, the effective noise precision #compute beta_star, the effective noise precision

View file

@ -8,7 +8,7 @@ import itertools
def index_to_slices(index): def index_to_slices(index):
""" """
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index. take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
e.g. e.g.
>>> index = np.asarray([0,0,0,1,1,1,2,2,2]) >>> index = np.asarray([0,0,0,1,1,1,2,2,2])
@ -40,6 +40,7 @@ class IndependentOutputs(CombinationKernel):
The index of the functions is given by the last column in the input X The index of the functions is given by the last column in the input X
the rest of the columns of X are passed to the underlying kernel for computation (in blocks). the rest of the columns of X are passed to the underlying kernel for computation (in blocks).
Kern is wrapped with a slicer metaclass
""" """
def __init__(self, kern, index_dim=-1, name='independ'): def __init__(self, kern, index_dim=-1, name='independ'):
assert isinstance(index_dim, int), "IndependentOutputs kernel is only defined with one input dimension being the indeces" assert isinstance(index_dim, int), "IndependentOutputs kernel is only defined with one input dimension being the indeces"

View file

@ -15,21 +15,21 @@ class Stationary(Kern):
""" """
Stationary kernels (covariance functions). Stationary kernels (covariance functions).
Stationary covariance fucntion depend only on r, where r is defined as Stationary covariance fucntion depend only on r, where r is defined as
r = \sqrt{ \sum_{q=1}^Q (x_q - x'_q)^2 } r = \sqrt{ \sum_{q=1}^Q (x_q - x'_q)^2 }
The covariance function k(x, x' can then be written k(r). The covariance function k(x, x' can then be written k(r).
In this implementation, r is scaled by the lengthscales parameter(s): In this implementation, r is scaled by the lengthscales parameter(s):
r = \sqrt{ \sum_{q=1}^Q \frac{(x_q - x'_q)^2}{\ell_q^2} }. r = \sqrt{ \sum_{q=1}^Q \frac{(x_q - x'_q)^2}{\ell_q^2} }.
By default, there's only one lengthscale: seaprate lengthscales for each By default, there's only one lengthscale: seaprate lengthscales for each
dimension can be enables by setting ARD=True. dimension can be enables by setting ARD=True.
To implement a stationary covariance function using this class, one need To implement a stationary covariance function using this class, one need
only define the covariance function k(r), and it derivative. only define the covariance function k(r), and it derivative.
... ...
def K_of_r(self, r): def K_of_r(self, r):
@ -37,10 +37,10 @@ class Stationary(Kern):
def dK_dr(self, r): def dK_dr(self, r):
return bar return bar
The lengthscale(s) and variance parameters are added to the structure automatically. The lengthscale(s) and variance parameters are added to the structure automatically.
""" """
def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name): def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name):
super(Stationary, self).__init__(input_dim, active_dims, name) super(Stationary, self).__init__(input_dim, active_dims, name)
self.ARD = ARD self.ARD = ARD
@ -57,7 +57,7 @@ class Stationary(Kern):
if lengthscale.size != input_dim: if lengthscale.size != input_dim:
lengthscale = np.ones(input_dim)*lengthscale lengthscale = np.ones(input_dim)*lengthscale
else: else:
lengthscale = np.ones(self.input_dim) lengthscale = np.ones(self.input_dim)
self.lengthscale = Param('lengthscale', lengthscale, Logexp()) self.lengthscale = Param('lengthscale', lengthscale, Logexp())
self.variance = Param('variance', variance, Logexp()) self.variance = Param('variance', variance, Logexp())
assert self.variance.size==1 assert self.variance.size==1
@ -95,7 +95,9 @@ class Stationary(Kern):
#X2, = self._slice_X(X2) #X2, = self._slice_X(X2)
X1sq = np.sum(np.square(X),1) X1sq = np.sum(np.square(X),1)
X2sq = np.sum(np.square(X2),1) X2sq = np.sum(np.square(X2),1)
return np.sqrt(-2.*np.dot(X, X2.T) + (X1sq[:,None] + X2sq[None,:])) r2 = -2.*np.dot(X, X2.T) + X1sq[:,None] + X2sq[None,:]
r2[r2<0] = 0. # A bit hacky
return np.sqrt(r2)
@Cache_this(limit=5, ignore_args=()) @Cache_this(limit=5, ignore_args=())
def _scaled_dist(self, X, X2=None): def _scaled_dist(self, X, X2=None):
@ -133,7 +135,7 @@ class Stationary(Kern):
if self.ARD: if self.ARD:
#rinv = self._inv_dis# this is rather high memory? Should we loop instead?t(X, X2) #rinv = self._inv_dis# this is rather high memory? Should we loop instead?t(X, X2)
#d = X[:, None, :] - X2[None, :, :] #d = X[:, None, :] - X2[None, :, :]
#x_xl3 = np.square(d) #x_xl3 = np.square(d)
#self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3 #self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
tmp = dL_dr*self._inv_dist(X, X2) tmp = dL_dr*self._inv_dist(X, X2)
if X2 is None: X2 = X if X2 is None: X2 = X
@ -247,7 +249,7 @@ class Matern52(Stationary):
.. math:: .. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r)
""" """
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'):
super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name) super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)

View file

@ -142,7 +142,12 @@ class Likelihood(Parameterized):
""" """
#conditional_mean: the edpected value of y given some f, under this likelihood #conditional_mean: the edpected value of y given some f, under this likelihood
def int_mean(f,m,v): def int_mean(f,m,v):
return self.conditional_mean(f)*np.exp(-(0.5/v)*np.square(f - m)) p = np.exp(-(0.5/v)*np.square(f - m))
#If p is zero then conditional_mean will overflow
if p < 1e-10:
return 0.
else:
return self.conditional_mean(f)*p
scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)] scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance)) mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance))
@ -165,7 +170,12 @@ class Likelihood(Parameterized):
# E( V(Y_star|f_star) ) # E( V(Y_star|f_star) )
def int_var(f,m,v): def int_var(f,m,v):
return self.conditional_variance(f)*np.exp(-(0.5/v)*np.square(f - m)) p = np.exp(-(0.5/v)*np.square(f - m))
#If p is zero then conditional_variance will overflow
if p < 1e-10:
return 0.
else:
return self.conditional_variance(f)*p
scaled_exp_variance = [quad(int_var, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)] scaled_exp_variance = [quad(int_var, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
exp_var = np.array(scaled_exp_variance)[:,None] / normalizer exp_var = np.array(scaled_exp_variance)[:,None] / normalizer
@ -178,7 +188,13 @@ class Likelihood(Parameterized):
#E( E(Y_star|f_star)**2 ) #E( E(Y_star|f_star)**2 )
def int_pred_mean_sq(f,m,v,predictive_mean_sq): def int_pred_mean_sq(f,m,v,predictive_mean_sq):
return self.conditional_mean(f)**2*np.exp(-(0.5/v)*np.square(f - m)) p = np.exp(-(0.5/v)*np.square(f - m))
#If p is zero then conditional_mean**2 will overflow
if p < 1e-10:
return 0.
else:
return self.conditional_mean(f)**2*p
scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)] scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)]
exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer

View file

@ -6,6 +6,9 @@ from scipy import stats
import scipy as sp import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf
_exp_lim_val = np.finfo(np.float64).max
_lim_val = np.log(_exp_lim_val)
class GPTransformation(object): class GPTransformation(object):
""" """
Link function class for doing non-Gaussian likelihoods approximation Link function class for doing non-Gaussian likelihoods approximation
@ -92,16 +95,16 @@ class Log(GPTransformation):
""" """
def transf(self,f): def transf(self,f):
return np.exp(f) return np.exp(np.clip(f, -_lim_val, _lim_val))
def dtransf_df(self,f): def dtransf_df(self,f):
return np.exp(f) return np.exp(np.clip(f, -_lim_val, _lim_val))
def d2transf_df2(self,f): def d2transf_df2(self,f):
return np.exp(f) return np.exp(np.clip(f, -_lim_val, _lim_val))
def d3transf_df3(self,f): def d3transf_df3(self,f):
return np.exp(f) return np.exp(np.clip(f, -_lim_val, _lim_val))
class Log_ex_1(GPTransformation): class Log_ex_1(GPTransformation):
""" """

View file

@ -21,7 +21,7 @@ class Poisson(Likelihood):
""" """
def __init__(self, gp_link=None): def __init__(self, gp_link=None):
if gp_link is None: if gp_link is None:
gp_link = link_functions.Log_ex_1() gp_link = link_functions.Log()
super(Poisson, self).__init__(gp_link, name='Poisson') super(Poisson, self).__init__(gp_link, name='Poisson')
@ -143,7 +143,7 @@ class Poisson(Likelihood):
""" """
return self.gp_link.transf(gp) return self.gp_link.transf(gp)
def samples(self, gp): def samples(self, gp, Y_metadata=None):
""" """
Returns a set of samples of observations based on a given value of the latent variable. Returns a set of samples of observations based on a given value of the latent variable.

View file

@ -120,6 +120,8 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
if verbose: if verbose:
print("Checking covariance function is positive definite.") print("Checking covariance function is positive definite.")
#if isinstance(kern, GPy.kern.IndependentOutputs):
#import ipdb; ipdb.set_trace() # XXX BREAKPOINT
result = Kern_check_model(kern, X=X).is_positive_semi_definite() result = Kern_check_model(kern, X=X).is_positive_semi_definite()
if result and verbose: if result and verbose:
print("Check passed.") print("Check passed.")
@ -306,17 +308,22 @@ class KernelTestsNonContinuous(unittest.TestCase):
D = self.D D = self.D
self.X = np.random.randn(N,D) self.X = np.random.randn(N,D)
self.X2 = np.random.randn(N1,D) self.X2 = np.random.randn(N1,D)
self.X_block = np.zeros((N+N1, D+D+1)) #self.X_block = np.zeros((N+N1, D+D+1))
#self.X_block[0:N, 0:D] = self.X
#self.X_block[N:N+N1, D:D+D] = self.X2
#self.X_block[0:N, -1] = 0
#self.X_block[N:N+N1, -1] = 1
self.X_block = np.zeros((N+N1, D+1))
self.X_block[0:N, 0:D] = self.X self.X_block[0:N, 0:D] = self.X
self.X_block[N:N+N1, D:D+D] = self.X2 self.X_block[N:N+N1, 0:D] = self.X2
self.X_block[0:N, -1] = 1 self.X_block[0:N, -1] = 0
self.X_block[N:N+1, -1] = 2 self.X_block[N:N+N1, -1] = 1
self.X_block = self.X_block[self.X_block.argsort(0)[:, -1], :] self.X_block = self.X_block[self.X_block.argsort(0)[:, -1], :]
def test_IndependentOutputs(self): def test_IndependentOutputs(self):
k = GPy.kern.RBF(self.D) k = GPy.kern.RBF(self.D)
kern = GPy.kern.IndependentOutputs(k, -1) kern = GPy.kern.IndependentOutputs(k, -1)
self.assertTrue(check_kernel_gradient_functions(kern, X=self.X_block, X2=self.X_block, verbose=verbose)) self.assertTrue(check_kernel_gradient_functions(kern, X=self.X_block, verbose=verbose))
if __name__ == "__main__": if __name__ == "__main__":
print "Running unit tests, please be (very) patient..." print "Running unit tests, please be (very) patient..."