gp merge, grad dict is property of self + Y_metadata being passed through

This commit is contained in:
Max Zwiessele 2014-03-13 09:10:57 +00:00
commit 6530af3e46
14 changed files with 204 additions and 59 deletions

View file

@ -27,7 +27,7 @@ class GP(Model):
""" """
def __init__(self, X, Y, kernel, likelihood, inference_method=None, Y_metadata=None, name='gp'): def __init__(self, X, Y, kernel, likelihood, inference_method=None, name='gp', **Y_metadata):
super(GP, self).__init__(name) super(GP, self).__init__(name)
assert X.ndim == 2 assert X.ndim == 2
@ -43,7 +43,7 @@ class GP(Model):
_, self.output_dim = self.Y.shape _, self.output_dim = self.Y.shape
if Y_metadata is not None: if Y_metadata is not None:
self.Y_metadata = ObservableArray(Y_metadata) self.Y_metadata = Y_metadata
else: else:
self.Y_metadata = None self.Y_metadata = None
@ -56,7 +56,7 @@ class GP(Model):
#find a sensible inference method #find a sensible inference method
if inference_method is None: if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian): if isinstance(likelihood, likelihoods.Gaussian) or isinstance(likelihood, likelihoods.MixedNoise):
inference_method = exact_gaussian_inference.ExactGaussianInference() inference_method = exact_gaussian_inference.ExactGaussianInference()
else: else:
inference_method = expectation_propagation inference_method = expectation_propagation
@ -67,8 +67,8 @@ class GP(Model):
self.add_parameter(self.likelihood) self.add_parameter(self.likelihood)
def parameters_changed(self): def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, Y_metadata=self.Y_metadata) self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, **self.Y_metadata)
self.likelihood.update_gradients(np.diag(self.grad_dict['dL_dK'])) self.likelihood.update_gradients(np.diag(self.grad_dict['dL_dK']), **self.Y_metadata)
self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X) self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X)
def log_likelihood(self): def log_likelihood(self):

View file

@ -19,7 +19,7 @@ class DTC(object):
def __init__(self): def __init__(self):
self.const_jitter = 1e-6 self.const_jitter = 1e-6
def inference(self, kern, X, X_variance, Z, likelihood, Y): def inference(self, kern, X, Z, likelihood, Y):
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC." assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
#TODO: MAX! fix this! #TODO: MAX! fix this!
@ -80,10 +80,6 @@ class DTC(object):
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn), 'dL_dKnm':dL_dU.T} grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn), 'dL_dKnm':dL_dU.T}
#update gradients
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
likelihood.update_gradients(dL_dR)
#construct a posterior object #construct a posterior object
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L) post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)

View file

@ -33,7 +33,7 @@ class ExactGaussianInference(object):
#if Y in self.cache, return self.Cache[Y], else store Y in cache and return L. #if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
raise NotImplementedError, 'TODO' #TODO raise NotImplementedError, 'TODO' #TODO
def inference(self, kern, X, likelihood, Y, Y_metadata=None): def inference(self, kern, X, likelihood, Y, **Y_metadata):
""" """
Returns a Posterior class containing essential quantities of the posterior Returns a Posterior class containing essential quantities of the posterior
""" """
@ -41,7 +41,7 @@ class ExactGaussianInference(object):
K = kern.K(X) K = kern.K(X)
Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, Y_metadata)) Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, **Y_metadata))
alpha, _ = dpotrs(LW, YYT_factor, lower=1) alpha, _ = dpotrs(LW, YYT_factor, lower=1)
@ -50,5 +50,3 @@ class ExactGaussianInference(object):
dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi) dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK} return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK}

View file

@ -17,8 +17,7 @@ class FITC(object):
def __init__(self): def __init__(self):
self.const_jitter = 1e-6 self.const_jitter = 1e-6
def inference(self, kern, X, X_variance, Z, likelihood, Y): def inference(self, kern, X, Z, likelihood, Y):
assert X_variance is None, "cannot use X_variance with FITC. Try varDTC."
#TODO: MAX! fix this! #TODO: MAX! fix this!
from ...util.misc import param_to_array from ...util.misc import param_to_array
@ -81,11 +80,7 @@ class FITC(object):
dL_dU *= beta_star dL_dU *= beta_star
dL_dU -= 2.*KiU*dL_dR dL_dU -= 2.*KiU*dL_dR
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':dL_dR, 'dL_dKnm':dL_dU.T} grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':dL_dR, 'dL_dKnm':dL_dU.T, 'partial_for_likelihood':dL_dR}
#update gradients
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
likelihood.update_gradients(dL_dR)
#construct a posterior object #construct a posterior object
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L) post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)

View file

@ -34,8 +34,8 @@ class Coregionalize(Kern):
.. note: see coregionalization examples in GPy.examples.regression for some usage. .. note: see coregionalization examples in GPy.examples.regression for some usage.
""" """
def __init__(self, output_dim, rank=1, W=None, kappa=None, name='coregion'): def __init__(self, input_dim, output_dim, rank=1, W=None, kappa=None, name='coregion'):
super(Coregionalize, self).__init__(input_dim=1, name=name) super(Coregionalize, self).__init__(input_dim, name=name)
self.output_dim = output_dim self.output_dim = output_dim
self.rank = rank self.rank = rank
if self.rank>output_dim: if self.rank>output_dim:

View file

@ -40,7 +40,7 @@ class Prod(CombinationKernel):
def update_gradients_full(self, dL_dK, X): def update_gradients_full(self, dL_dK, X):
for k1,k2 in itertools.combinations(self.parts, 2): for k1,k2 in itertools.combinations(self.parts, 2):
k1._sliced_X = k1._sliced_X2 = k2._sliced_X = k2._sliced_X2 = True k1._sliced_X = k1._sliced_X2 = k2._sliced_X = k2._sliced_X2 = True
k1.update_gradients_full(dL_dK*k2.K(X, X) k1.update_gradients_full(dL_dK*k2.K(X, X))
self.k2.update_gradients_full(dL_dK*self.k1.K(X[:,self.slice1]), X[:,self.slice2]) self.k2.update_gradients_full(dL_dK*self.k1.K(X[:,self.slice1]), X[:,self.slice2])
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):

View file

@ -5,3 +5,4 @@ from gamma import Gamma
from poisson import Poisson from poisson import Poisson
from student_t import StudentT from student_t import StudentT
from likelihood import Likelihood from likelihood import Likelihood
from mixed_noise import MixedNoise

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
#TODO #TODO
""" """
A lot of this code assumes that the link function is the identity. A lot of this code assumes that the link function is the identity.
I think laplace code is okay, but I'm quite sure that the EP moments will only work if the link is identity. I think laplace code is okay, but I'm quite sure that the EP moments will only work if the link is identity.
@ -49,7 +49,7 @@ class Gaussian(Likelihood):
if isinstance(gp_link, link_functions.Identity): if isinstance(gp_link, link_functions.Identity):
self.log_concave = True self.log_concave = True
def covariance_matrix(self, Y, Y_metadata=None): def covariance_matrix(self, Y, **Y_metadata):
return np.eye(Y.shape[0]) * self.variance return np.eye(Y.shape[0]) * self.variance
def update_gradients(self, partial): def update_gradients(self, partial):

View file

@ -0,0 +1,58 @@
import numpy as np
from scipy import stats, special
from GPy.util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
import link_functions
from likelihood import Likelihood
from ..core.parameterization import Param
from ..core.parameterization.transformations import Logexp
from ..core.parameterization import Parameterized
import itertools
class MixedNoise(Likelihood):
def __init__(self, likelihoods_list, noise_index, variance = None, name='mixed_noise'):
Nlike = len(likelihoods_list)
self.order = np.unique(noise_index)
assert self.order.size == Nlike
if variance is None:
variance = np.ones(Nlike)
else:
assert variance.size == Nlike
super(Likelihood, self).__init__(name=name)
self.add_parameters(*likelihoods_list)
self.likelihoods_list = likelihoods_list
self.noise_index = noise_index
self.log_concave = False
self.likelihoods_indices = [noise_index.flatten()==j for j in self.order]
def covariance_matrix(self, Y, noise_index, **Y_metadata):
variance = np.zeros(Y.shape[0])
for lik, ind in itertools.izip(self.likelihoods_list, self.likelihoods_indices):
variance[ind] = lik.variance
return np.diag(variance)
def update_gradients(self, partial, noise_index, **Y_metadata):
[lik.update_gradients(partial[ind]) for lik,ind in itertools.izip(self.likelihoods_list, self.likelihoods_indices)]
def predictive_values(self, mu, var, full_cov=False, noise_index=None, **Y_metadata):
_variance = np.array([ self.likelihoods_list[j].variance for j in noise_index ])
if full_cov:
var += np.eye(var.shape[0])*_variance
d = 2*np.sqrt(np.diag(var))
low, up = mu - d, mu + d
else:
var += _variance
d = 2*np.sqrt(var)
low, up = mu - d, mu + d
return mu, var, low, up
def predictive_variance(self, mu, sigma, noise_index, predictive_mean=None, **Y_metadata):
if isinstance(noise_index,int):
_variance = self.variance[noise_index]
else:
_variance = np.array([ self.variance[j] for j in noise_index ])[:,None]
return _variance + sigma**2

View file

@ -13,6 +13,6 @@ from warped_gp import WarpedGP
from bayesian_gplvm import BayesianGPLVM from bayesian_gplvm import BayesianGPLVM
from mrd import MRD from mrd import MRD
from gradient_checker import GradientChecker from gradient_checker import GradientChecker
from gp_multioutput_regression import GPMultioutputRegression from ss_gplvm import SSGPLVM
from sparse_gp_multioutput_regression import SparseGPMultioutputRegression from gp_coregionalized_regression import GPCoregionalizedRegression
from ss_gplvm import SSGPLVM #.py file not included!!! #from sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression

View file

@ -0,0 +1,44 @@
# Copyright (c) 2012 - 2014 the GPy Austhors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import GP
from .. import likelihoods
from .. import kern
from .. import util
class GPCoregionalizedRegression(GP):
"""
Gaussian Process model for heteroscedastic multioutput regression
This is a thin wrapper around the models.GP class, with a set of sensible defaults
:param X_list: list of input observations corresponding to each output
:type X_list: list of numpy arrays
:param Y_list: list of observed values related to the different noise models
:type Y_list: list of numpy arrays
:param kernel: a GPy kernel, defaults to RBF ** Coregionalized
:type kernel: None | GPy.kernel defaults
:likelihoods_list: a list of likelihoods, defaults to list of Gaussian likelihoods
:type likelihoods_list: None | a list GPy.likelihoods
:param name: model name
:type name: string
:param W_rank: number tuples of the corregionalization parameters 'W' (see coregionalize kernel documentation)
:type W_rank: integer
:param kernel_name: name of the kernel
:type kernel_name: string
"""
def __init__(self, X_list, Y_list, kernel=None, likelihoods_list=None, name='GPCR',W_rank=1,kernel_name='X'):
#Input and Output
X,Y,self.noise_index = util.multioutput.build_XY(X_list,Y_list)
Ny = len(Y_list)
#Kernel
if kernel is None:
kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=GPy.kern.rbf(X.shape[1]-1), W_rank=1,name=kernel_name)
#Likelihood
likelihood = util.multioutput.build_likelihood(Y_list,self.noise_index,likelihoods_list)
super(GPCoregionalizedRegression, self).__init__(X,Y,kernel,likelihood, noise_index=self.noise_index)

View file

@ -57,7 +57,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
fig = pb.figure(num=fignum) fig = pb.figure(num=fignum)
ax = fig.add_subplot(111) ax = fig.add_subplot(111)
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs(): if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
X = model.X.mean X = model.X.mean
X_variance = param_to_array(model.X.variance) X_variance = param_to_array(model.X.variance)
else: else:
@ -86,7 +86,14 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
upper = m + 2*np.sqrt(v) upper = m + 2*np.sqrt(v)
Y = Y Y = Y
else: else:
m, v, lower, upper = model.predict(Xgrid) if 'noise_index' in model.Y_metadata.keys():
if np.unique(model.Y_metadata['noise_index'][which_data_rows]).size > 1:
print "Data slices choosen have different noise models. Just one will be used."
noise_index = np.repeat(model.Y_metadata['noise_index'][which_data_rows][0], Xgrid.shape[0])[:,None]
m, v, lower, upper = model.predict(Xgrid,full_cov=False,noise_index=noise_index)
else:
noise_index = None
m, v, lower, upper = model.predict(Xgrid,full_cov=False)
Y = Y Y = Y
for d in which_data_ycols: for d in which_data_ycols:
plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol) plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol)

View file

@ -14,6 +14,7 @@ import subarray_and_sorting
import caching import caching
import diag import diag
import initialization import initialization
import multioutput
try: try:
import sympy import sympy

View file

@ -1,12 +1,17 @@
import numpy as np import numpy as np
import warnings import warnings
from .. import kern import GPy
def build_XY(input_list,output_list=None,index=None):
def get_slices(input_list):
num_outputs = len(input_list) num_outputs = len(input_list)
_s = [0] + [ _x.shape[0] for _x in input_list ] _s = [0] + [ _x.shape[0] for _x in input_list ]
_s = np.cumsum(_s) _s = np.cumsum(_s)
slices = [slice(a,b) for a,b in zip(_s[:-1],_s[1:])] slices = [slice(a,b) for a,b in zip(_s[:-1],_s[1:])]
return slices
def build_XY(input_list,output_list=None,index=None):
num_outputs = len(input_list)
if output_list is not None: if output_list is not None:
assert num_outputs == len(output_list) assert num_outputs == len(output_list)
Y = np.vstack(output_list) Y = np.vstack(output_list)
@ -15,42 +20,82 @@ def build_XY(input_list,output_list=None,index=None):
if index is not None: if index is not None:
assert len(index) == num_outputs assert len(index) == num_outputs
I = np.vstack( [j*np.ones((_x.shape[0],1)) for _x,j in zip(input_list,index)] ) I = np.hstack( [np.repeat(j,_x.shape[0]) for _x,j in zip(input_list,index)] )
else: else:
I = np.vstack( [j*np.ones((_x.shape[0],1)) for _x,j in zip(input_list,range(num_outputs))] ) I = np.hstack( [np.repeat(j,_x.shape[0]) for _x,j in zip(input_list,range(num_outputs))] )
X = np.vstack(input_list) X = np.vstack(input_list)
X = np.hstack([X,I]) X = np.hstack([X,I[:,None]])
return X,Y,slices
def build_lcm(input_dim, num_outputs, CK = [], NC = [], W_columns=1,W=None,kappa=None): return X,Y,I[:,None]#slices
#TODO build_icm or build_lcm
def build_likelihood(Y_list,noise_index,likelihoods_list=None):
Ny = len(Y_list)
if likelihoods_list is None:
likelihoods_list = [GPy.likelihoods.Gaussian(name="Gaussian_noise_%s" %j) for y,j in zip(Y_list,range(Ny))]
else:
assert len(likelihoods_list) == Ny
likelihood = GPy.likelihoods.mixed_noise.MixedNoise(likelihoods_list=likelihoods_list, noise_index=noise_index)
return likelihood
def ICM(input_dim, num_outputs, kernel, W_rank=1,W=None,kappa=None,name='X'):
""" """
Builds a kernel for a linear coregionalization model Builds a kernel for an Intrinsic Coregionalization Model
:input_dim: Input dimensionality :input_dim: Input dimensionality
:num_outputs: Number of outputs :num_outputs: Number of outputs
:param CK: List of coregionalized kernels (i.e., this will be multiplied by a coregionalize kernel). :param kernel: kernel that will be multiplied by the coregionalize kernel (matrix B).
:param K: List of kernels that will be added up together with CK, but won't be multiplied by a coregionalize kernel :type kernel: a GPy kernel
:param W_columns: number tuples of the corregionalization parameters 'coregion_W' :param W_rank: number tuples of the corregionalization parameters 'W'
:type W_columns: integer :type W_rank: integer
""" """
if kernel.input_dim <> input_dim:
kernel.input_dim = input_dim
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
for k in CK: #K = kernel.prod(GPy.kern.Coregionalize(input_dim, num_outputs,W_rank,W,kappa,name='B'),tensor=True,name=name)
if k.input_dim <> input_dim: K = kernel.prod(GPy.kern.Coregionalize(input_dim, num_outputs,W_rank,W,kappa,name='B'),name=name)
k.input_dim = input_dim K['.*variance'] = 1.
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.") K['.*variance'].fix()
return K
for k in NC:
if k.input_dim <> input_dim + 1:
k.input_dim = input_dim + 1
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
kernel = CK[0].prod(kern.Coregionalize(num_outputs,W_columns,W,kappa),tensor=True) def LCM(input_dim, num_outputs, kernels_list, W_rank=1,name='X'):
for k in CK[1:]: """
k_coreg = kern.Coregionalize(num_outputs,W_columns,W,kappa) Builds a kernel for an Linear Coregionalization Model
kernel += k.prod(k_coreg,tensor=True)
for k in NC:
kernel += k
return kernel :input_dim: Input dimensionality
:num_outputs: Number of outputs
:param kernel: kernel that will be multiplied by the coregionalize kernel (matrix B).
:type kernel: a GPy kernel
:param W_rank: number tuples of the corregionalization parameters 'W'
:type W_rank: integer
"""
Nk = len(kernels_list)
K = ICM(input_dim,num_outputs,kernels_list[0],W_rank,name='%s%s' %(name,0))
j = 1
for kernel in kernels_list[1:]:
K += ICM(input_dim,num_outputs,kernel,W_rank,name='%s%s' %(name,j))
return K
def Private(input_dim, num_outputs, kernel, output, kappa=None,name='X'):
"""
Builds a kernel for an Intrinsic Coregionalization Model
:input_dim: Input dimensionality
:num_outputs: Number of outputs
:param kernel: kernel that will be multiplied by the coregionalize kernel (matrix B).
:type kernel: a GPy kernel
:param W_rank: number tuples of the corregionalization parameters 'W'
:type W_rank: integer
"""
K = ICM(input_dim,num_outputs,kernel,W_rank=1,kappa=kappa,name=name)
K.B.W.fix(0)
_range = range(num_outputs)
_range.pop(output)
for j in _range:
K.B.kappa[j] = 0
K.B.kappa[j].fix()
return K