mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-24 14:15:14 +02:00
gp merge, grad dict is property of self + Y_metadata being passed through
This commit is contained in:
commit
6530af3e46
14 changed files with 204 additions and 59 deletions
|
|
@ -27,7 +27,7 @@ class GP(Model):
|
|||
|
||||
|
||||
"""
|
||||
def __init__(self, X, Y, kernel, likelihood, inference_method=None, Y_metadata=None, name='gp'):
|
||||
def __init__(self, X, Y, kernel, likelihood, inference_method=None, name='gp', **Y_metadata):
|
||||
super(GP, self).__init__(name)
|
||||
|
||||
assert X.ndim == 2
|
||||
|
|
@ -43,7 +43,7 @@ class GP(Model):
|
|||
_, self.output_dim = self.Y.shape
|
||||
|
||||
if Y_metadata is not None:
|
||||
self.Y_metadata = ObservableArray(Y_metadata)
|
||||
self.Y_metadata = Y_metadata
|
||||
else:
|
||||
self.Y_metadata = None
|
||||
|
||||
|
|
@ -56,7 +56,7 @@ class GP(Model):
|
|||
|
||||
#find a sensible inference method
|
||||
if inference_method is None:
|
||||
if isinstance(likelihood, likelihoods.Gaussian):
|
||||
if isinstance(likelihood, likelihoods.Gaussian) or isinstance(likelihood, likelihoods.MixedNoise):
|
||||
inference_method = exact_gaussian_inference.ExactGaussianInference()
|
||||
else:
|
||||
inference_method = expectation_propagation
|
||||
|
|
@ -67,8 +67,8 @@ class GP(Model):
|
|||
self.add_parameter(self.likelihood)
|
||||
|
||||
def parameters_changed(self):
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, Y_metadata=self.Y_metadata)
|
||||
self.likelihood.update_gradients(np.diag(self.grad_dict['dL_dK']))
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, **self.Y_metadata)
|
||||
self.likelihood.update_gradients(np.diag(self.grad_dict['dL_dK']), **self.Y_metadata)
|
||||
self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X)
|
||||
|
||||
def log_likelihood(self):
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ class DTC(object):
|
|||
def __init__(self):
|
||||
self.const_jitter = 1e-6
|
||||
|
||||
def inference(self, kern, X, X_variance, Z, likelihood, Y):
|
||||
def inference(self, kern, X, Z, likelihood, Y):
|
||||
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
|
||||
|
||||
#TODO: MAX! fix this!
|
||||
|
|
@ -80,10 +80,6 @@ class DTC(object):
|
|||
|
||||
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn), 'dL_dKnm':dL_dU.T}
|
||||
|
||||
#update gradients
|
||||
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
|
||||
likelihood.update_gradients(dL_dR)
|
||||
|
||||
#construct a posterior object
|
||||
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)
|
||||
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ class ExactGaussianInference(object):
|
|||
#if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
|
||||
raise NotImplementedError, 'TODO' #TODO
|
||||
|
||||
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||
def inference(self, kern, X, likelihood, Y, **Y_metadata):
|
||||
"""
|
||||
Returns a Posterior class containing essential quantities of the posterior
|
||||
"""
|
||||
|
|
@ -41,7 +41,7 @@ class ExactGaussianInference(object):
|
|||
|
||||
K = kern.K(X)
|
||||
|
||||
Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, Y_metadata))
|
||||
Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, **Y_metadata))
|
||||
|
||||
alpha, _ = dpotrs(LW, YYT_factor, lower=1)
|
||||
|
||||
|
|
@ -50,5 +50,3 @@ class ExactGaussianInference(object):
|
|||
dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)
|
||||
|
||||
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -17,8 +17,7 @@ class FITC(object):
|
|||
def __init__(self):
|
||||
self.const_jitter = 1e-6
|
||||
|
||||
def inference(self, kern, X, X_variance, Z, likelihood, Y):
|
||||
assert X_variance is None, "cannot use X_variance with FITC. Try varDTC."
|
||||
def inference(self, kern, X, Z, likelihood, Y):
|
||||
|
||||
#TODO: MAX! fix this!
|
||||
from ...util.misc import param_to_array
|
||||
|
|
@ -81,11 +80,7 @@ class FITC(object):
|
|||
dL_dU *= beta_star
|
||||
dL_dU -= 2.*KiU*dL_dR
|
||||
|
||||
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':dL_dR, 'dL_dKnm':dL_dU.T}
|
||||
|
||||
#update gradients
|
||||
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
|
||||
likelihood.update_gradients(dL_dR)
|
||||
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':dL_dR, 'dL_dKnm':dL_dU.T, 'partial_for_likelihood':dL_dR}
|
||||
|
||||
#construct a posterior object
|
||||
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)
|
||||
|
|
|
|||
|
|
@ -34,8 +34,8 @@ class Coregionalize(Kern):
|
|||
|
||||
.. note: see coregionalization examples in GPy.examples.regression for some usage.
|
||||
"""
|
||||
def __init__(self, output_dim, rank=1, W=None, kappa=None, name='coregion'):
|
||||
super(Coregionalize, self).__init__(input_dim=1, name=name)
|
||||
def __init__(self, input_dim, output_dim, rank=1, W=None, kappa=None, name='coregion'):
|
||||
super(Coregionalize, self).__init__(input_dim, name=name)
|
||||
self.output_dim = output_dim
|
||||
self.rank = rank
|
||||
if self.rank>output_dim:
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ class Prod(CombinationKernel):
|
|||
def update_gradients_full(self, dL_dK, X):
|
||||
for k1,k2 in itertools.combinations(self.parts, 2):
|
||||
k1._sliced_X = k1._sliced_X2 = k2._sliced_X = k2._sliced_X2 = True
|
||||
k1.update_gradients_full(dL_dK*k2.K(X, X)
|
||||
k1.update_gradients_full(dL_dK*k2.K(X, X))
|
||||
self.k2.update_gradients_full(dL_dK*self.k1.K(X[:,self.slice1]), X[:,self.slice2])
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
|
|
|
|||
|
|
@ -5,3 +5,4 @@ from gamma import Gamma
|
|||
from poisson import Poisson
|
||||
from student_t import StudentT
|
||||
from likelihood import Likelihood
|
||||
from mixed_noise import MixedNoise
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ class Gaussian(Likelihood):
|
|||
if isinstance(gp_link, link_functions.Identity):
|
||||
self.log_concave = True
|
||||
|
||||
def covariance_matrix(self, Y, Y_metadata=None):
|
||||
def covariance_matrix(self, Y, **Y_metadata):
|
||||
return np.eye(Y.shape[0]) * self.variance
|
||||
|
||||
def update_gradients(self, partial):
|
||||
|
|
|
|||
58
GPy/likelihoods/mixed_noise.py
Normal file
58
GPy/likelihoods/mixed_noise.py
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
import numpy as np
|
||||
from scipy import stats, special
|
||||
from GPy.util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
|
||||
import link_functions
|
||||
from likelihood import Likelihood
|
||||
from ..core.parameterization import Param
|
||||
from ..core.parameterization.transformations import Logexp
|
||||
from ..core.parameterization import Parameterized
|
||||
import itertools
|
||||
|
||||
class MixedNoise(Likelihood):
|
||||
def __init__(self, likelihoods_list, noise_index, variance = None, name='mixed_noise'):
|
||||
|
||||
Nlike = len(likelihoods_list)
|
||||
self.order = np.unique(noise_index)
|
||||
|
||||
assert self.order.size == Nlike
|
||||
|
||||
if variance is None:
|
||||
variance = np.ones(Nlike)
|
||||
else:
|
||||
assert variance.size == Nlike
|
||||
|
||||
super(Likelihood, self).__init__(name=name)
|
||||
|
||||
self.add_parameters(*likelihoods_list)
|
||||
self.likelihoods_list = likelihoods_list
|
||||
self.noise_index = noise_index
|
||||
self.log_concave = False
|
||||
self.likelihoods_indices = [noise_index.flatten()==j for j in self.order]
|
||||
|
||||
def covariance_matrix(self, Y, noise_index, **Y_metadata):
|
||||
variance = np.zeros(Y.shape[0])
|
||||
for lik, ind in itertools.izip(self.likelihoods_list, self.likelihoods_indices):
|
||||
variance[ind] = lik.variance
|
||||
return np.diag(variance)
|
||||
|
||||
def update_gradients(self, partial, noise_index, **Y_metadata):
|
||||
[lik.update_gradients(partial[ind]) for lik,ind in itertools.izip(self.likelihoods_list, self.likelihoods_indices)]
|
||||
|
||||
def predictive_values(self, mu, var, full_cov=False, noise_index=None, **Y_metadata):
|
||||
_variance = np.array([ self.likelihoods_list[j].variance for j in noise_index ])
|
||||
if full_cov:
|
||||
var += np.eye(var.shape[0])*_variance
|
||||
d = 2*np.sqrt(np.diag(var))
|
||||
low, up = mu - d, mu + d
|
||||
else:
|
||||
var += _variance
|
||||
d = 2*np.sqrt(var)
|
||||
low, up = mu - d, mu + d
|
||||
return mu, var, low, up
|
||||
|
||||
def predictive_variance(self, mu, sigma, noise_index, predictive_mean=None, **Y_metadata):
|
||||
if isinstance(noise_index,int):
|
||||
_variance = self.variance[noise_index]
|
||||
else:
|
||||
_variance = np.array([ self.variance[j] for j in noise_index ])[:,None]
|
||||
return _variance + sigma**2
|
||||
|
|
@ -13,6 +13,6 @@ from warped_gp import WarpedGP
|
|||
from bayesian_gplvm import BayesianGPLVM
|
||||
from mrd import MRD
|
||||
from gradient_checker import GradientChecker
|
||||
from gp_multioutput_regression import GPMultioutputRegression
|
||||
from sparse_gp_multioutput_regression import SparseGPMultioutputRegression
|
||||
from ss_gplvm import SSGPLVM
|
||||
from gp_coregionalized_regression import GPCoregionalizedRegression
|
||||
#.py file not included!!! #from sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
|
||||
|
|
|
|||
44
GPy/models/gp_coregionalized_regression.py
Normal file
44
GPy/models/gp_coregionalized_regression.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
# Copyright (c) 2012 - 2014 the GPy Austhors (see AUTHORS.txt)
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ..core import GP
|
||||
from .. import likelihoods
|
||||
from .. import kern
|
||||
from .. import util
|
||||
|
||||
class GPCoregionalizedRegression(GP):
|
||||
"""
|
||||
Gaussian Process model for heteroscedastic multioutput regression
|
||||
|
||||
This is a thin wrapper around the models.GP class, with a set of sensible defaults
|
||||
|
||||
:param X_list: list of input observations corresponding to each output
|
||||
:type X_list: list of numpy arrays
|
||||
:param Y_list: list of observed values related to the different noise models
|
||||
:type Y_list: list of numpy arrays
|
||||
:param kernel: a GPy kernel, defaults to RBF ** Coregionalized
|
||||
:type kernel: None | GPy.kernel defaults
|
||||
:likelihoods_list: a list of likelihoods, defaults to list of Gaussian likelihoods
|
||||
:type likelihoods_list: None | a list GPy.likelihoods
|
||||
:param name: model name
|
||||
:type name: string
|
||||
:param W_rank: number tuples of the corregionalization parameters 'W' (see coregionalize kernel documentation)
|
||||
:type W_rank: integer
|
||||
:param kernel_name: name of the kernel
|
||||
:type kernel_name: string
|
||||
"""
|
||||
def __init__(self, X_list, Y_list, kernel=None, likelihoods_list=None, name='GPCR',W_rank=1,kernel_name='X'):
|
||||
|
||||
#Input and Output
|
||||
X,Y,self.noise_index = util.multioutput.build_XY(X_list,Y_list)
|
||||
Ny = len(Y_list)
|
||||
|
||||
#Kernel
|
||||
if kernel is None:
|
||||
kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=GPy.kern.rbf(X.shape[1]-1), W_rank=1,name=kernel_name)
|
||||
|
||||
#Likelihood
|
||||
likelihood = util.multioutput.build_likelihood(Y_list,self.noise_index,likelihoods_list)
|
||||
|
||||
super(GPCoregionalizedRegression, self).__init__(X,Y,kernel,likelihood, noise_index=self.noise_index)
|
||||
|
|
@ -86,7 +86,14 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
upper = m + 2*np.sqrt(v)
|
||||
Y = Y
|
||||
else:
|
||||
m, v, lower, upper = model.predict(Xgrid)
|
||||
if 'noise_index' in model.Y_metadata.keys():
|
||||
if np.unique(model.Y_metadata['noise_index'][which_data_rows]).size > 1:
|
||||
print "Data slices choosen have different noise models. Just one will be used."
|
||||
noise_index = np.repeat(model.Y_metadata['noise_index'][which_data_rows][0], Xgrid.shape[0])[:,None]
|
||||
m, v, lower, upper = model.predict(Xgrid,full_cov=False,noise_index=noise_index)
|
||||
else:
|
||||
noise_index = None
|
||||
m, v, lower, upper = model.predict(Xgrid,full_cov=False)
|
||||
Y = Y
|
||||
for d in which_data_ycols:
|
||||
plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol)
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ import subarray_and_sorting
|
|||
import caching
|
||||
import diag
|
||||
import initialization
|
||||
import multioutput
|
||||
|
||||
try:
|
||||
import sympy
|
||||
|
|
|
|||
|
|
@ -1,12 +1,17 @@
|
|||
import numpy as np
|
||||
import warnings
|
||||
from .. import kern
|
||||
import GPy
|
||||
|
||||
def build_XY(input_list,output_list=None,index=None):
|
||||
|
||||
def get_slices(input_list):
|
||||
num_outputs = len(input_list)
|
||||
_s = [0] + [ _x.shape[0] for _x in input_list ]
|
||||
_s = np.cumsum(_s)
|
||||
slices = [slice(a,b) for a,b in zip(_s[:-1],_s[1:])]
|
||||
return slices
|
||||
|
||||
def build_XY(input_list,output_list=None,index=None):
|
||||
num_outputs = len(input_list)
|
||||
if output_list is not None:
|
||||
assert num_outputs == len(output_list)
|
||||
Y = np.vstack(output_list)
|
||||
|
|
@ -15,42 +20,82 @@ def build_XY(input_list,output_list=None,index=None):
|
|||
|
||||
if index is not None:
|
||||
assert len(index) == num_outputs
|
||||
I = np.vstack( [j*np.ones((_x.shape[0],1)) for _x,j in zip(input_list,index)] )
|
||||
I = np.hstack( [np.repeat(j,_x.shape[0]) for _x,j in zip(input_list,index)] )
|
||||
else:
|
||||
I = np.vstack( [j*np.ones((_x.shape[0],1)) for _x,j in zip(input_list,range(num_outputs))] )
|
||||
I = np.hstack( [np.repeat(j,_x.shape[0]) for _x,j in zip(input_list,range(num_outputs))] )
|
||||
|
||||
X = np.vstack(input_list)
|
||||
X = np.hstack([X,I])
|
||||
return X,Y,slices
|
||||
X = np.hstack([X,I[:,None]])
|
||||
|
||||
def build_lcm(input_dim, num_outputs, CK = [], NC = [], W_columns=1,W=None,kappa=None):
|
||||
#TODO build_icm or build_lcm
|
||||
return X,Y,I[:,None]#slices
|
||||
|
||||
def build_likelihood(Y_list,noise_index,likelihoods_list=None):
|
||||
Ny = len(Y_list)
|
||||
if likelihoods_list is None:
|
||||
likelihoods_list = [GPy.likelihoods.Gaussian(name="Gaussian_noise_%s" %j) for y,j in zip(Y_list,range(Ny))]
|
||||
else:
|
||||
assert len(likelihoods_list) == Ny
|
||||
likelihood = GPy.likelihoods.mixed_noise.MixedNoise(likelihoods_list=likelihoods_list, noise_index=noise_index)
|
||||
return likelihood
|
||||
|
||||
|
||||
def ICM(input_dim, num_outputs, kernel, W_rank=1,W=None,kappa=None,name='X'):
|
||||
"""
|
||||
Builds a kernel for a linear coregionalization model
|
||||
Builds a kernel for an Intrinsic Coregionalization Model
|
||||
|
||||
:input_dim: Input dimensionality
|
||||
:num_outputs: Number of outputs
|
||||
:param CK: List of coregionalized kernels (i.e., this will be multiplied by a coregionalize kernel).
|
||||
:param K: List of kernels that will be added up together with CK, but won't be multiplied by a coregionalize kernel
|
||||
:param W_columns: number tuples of the corregionalization parameters 'coregion_W'
|
||||
:type W_columns: integer
|
||||
:param kernel: kernel that will be multiplied by the coregionalize kernel (matrix B).
|
||||
:type kernel: a GPy kernel
|
||||
:param W_rank: number tuples of the corregionalization parameters 'W'
|
||||
:type W_rank: integer
|
||||
"""
|
||||
|
||||
for k in CK:
|
||||
if k.input_dim <> input_dim:
|
||||
k.input_dim = input_dim
|
||||
if kernel.input_dim <> input_dim:
|
||||
kernel.input_dim = input_dim
|
||||
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
|
||||
|
||||
for k in NC:
|
||||
if k.input_dim <> input_dim + 1:
|
||||
k.input_dim = input_dim + 1
|
||||
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
|
||||
#K = kernel.prod(GPy.kern.Coregionalize(input_dim, num_outputs,W_rank,W,kappa,name='B'),tensor=True,name=name)
|
||||
K = kernel.prod(GPy.kern.Coregionalize(input_dim, num_outputs,W_rank,W,kappa,name='B'),name=name)
|
||||
K['.*variance'] = 1.
|
||||
K['.*variance'].fix()
|
||||
return K
|
||||
|
||||
kernel = CK[0].prod(kern.Coregionalize(num_outputs,W_columns,W,kappa),tensor=True)
|
||||
for k in CK[1:]:
|
||||
k_coreg = kern.Coregionalize(num_outputs,W_columns,W,kappa)
|
||||
kernel += k.prod(k_coreg,tensor=True)
|
||||
for k in NC:
|
||||
kernel += k
|
||||
|
||||
return kernel
|
||||
def LCM(input_dim, num_outputs, kernels_list, W_rank=1,name='X'):
|
||||
"""
|
||||
Builds a kernel for an Linear Coregionalization Model
|
||||
|
||||
:input_dim: Input dimensionality
|
||||
:num_outputs: Number of outputs
|
||||
:param kernel: kernel that will be multiplied by the coregionalize kernel (matrix B).
|
||||
:type kernel: a GPy kernel
|
||||
:param W_rank: number tuples of the corregionalization parameters 'W'
|
||||
:type W_rank: integer
|
||||
"""
|
||||
Nk = len(kernels_list)
|
||||
K = ICM(input_dim,num_outputs,kernels_list[0],W_rank,name='%s%s' %(name,0))
|
||||
j = 1
|
||||
for kernel in kernels_list[1:]:
|
||||
K += ICM(input_dim,num_outputs,kernel,W_rank,name='%s%s' %(name,j))
|
||||
return K
|
||||
|
||||
|
||||
def Private(input_dim, num_outputs, kernel, output, kappa=None,name='X'):
|
||||
"""
|
||||
Builds a kernel for an Intrinsic Coregionalization Model
|
||||
|
||||
:input_dim: Input dimensionality
|
||||
:num_outputs: Number of outputs
|
||||
:param kernel: kernel that will be multiplied by the coregionalize kernel (matrix B).
|
||||
:type kernel: a GPy kernel
|
||||
:param W_rank: number tuples of the corregionalization parameters 'W'
|
||||
:type W_rank: integer
|
||||
"""
|
||||
K = ICM(input_dim,num_outputs,kernel,W_rank=1,kappa=kappa,name=name)
|
||||
K.B.W.fix(0)
|
||||
_range = range(num_outputs)
|
||||
_range.pop(output)
|
||||
for j in _range:
|
||||
K.B.kappa[j] = 0
|
||||
K.B.kappa[j].fix()
|
||||
return K
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue