merge with commit of dgplvm

This commit is contained in:
frb-yousefi 2015-04-20 16:02:19 +01:00
commit 401374d068
152 changed files with 4272 additions and 1875 deletions

View file

@ -1,24 +1,24 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from gp_regression import GPRegression
from gp_classification import GPClassification
from sparse_gp_regression import SparseGPRegression, SparseGPRegressionUncertainInput
from sparse_gp_classification import SparseGPClassification
from gplvm import GPLVM
from bcgplvm import BCGPLVM
from sparse_gplvm import SparseGPLVM
from warped_gp import WarpedGP
from bayesian_gplvm import BayesianGPLVM
from mrd import MRD
from gradient_checker import GradientChecker
from ss_gplvm import SSGPLVM
from gp_coregionalized_regression import GPCoregionalizedRegression
from sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
from gp_heteroscedastic_regression import GPHeteroscedasticRegression
from ss_mrd import SSMRD
from gp_kronecker_gaussian_regression import GPKroneckerGaussianRegression
from gp_var_gauss import GPVariationalGaussianApproximation
from one_vs_all_classification import OneVsAllClassification
from one_vs_all_sparse_classification import OneVsAllSparseClassification
from dpgplvm import DPBayesianGPLVM
from .gp_regression import GPRegression
from .gp_classification import GPClassification
from .sparse_gp_regression import SparseGPRegression, SparseGPRegressionUncertainInput
from .sparse_gp_classification import SparseGPClassification
from .gplvm import GPLVM
from .bcgplvm import BCGPLVM
from .sparse_gplvm import SparseGPLVM
from .warped_gp import WarpedGP
from .bayesian_gplvm import BayesianGPLVM
from .mrd import MRD
from .gradient_checker import GradientChecker, HessianChecker, SkewChecker
from .ss_gplvm import SSGPLVM
from .gp_coregionalized_regression import GPCoregionalizedRegression
from .sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
from .gp_heteroscedastic_regression import GPHeteroscedasticRegression
from .ss_mrd import SSMRD
from .gp_kronecker_gaussian_regression import GPKroneckerGaussianRegression
from .gp_var_gauss import GPVariationalGaussianApproximation
from .one_vs_all_classification import OneVsAllClassification
from .one_vs_all_sparse_classification import OneVsAllSparseClassification
from .dpgplvm import DPBayesianGPLVM

View file

@ -24,7 +24,7 @@ class BayesianGPLVM(SparseGP_MPI):
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None,
name='bayesian gplvm', mpi_comm=None, normalizer=None,
missing_data=False, stochastic=False, batchsize=1):
missing_data=False, stochastic=False, batchsize=1, Y_metadata=None):
self.logger = logging.getLogger(self.__class__.__name__)
if X is None:
@ -69,6 +69,7 @@ class BayesianGPLVM(SparseGP_MPI):
name=name, inference_method=inference_method,
normalizer=normalizer, mpi_comm=mpi_comm,
variational_prior=self.variational_prior,
Y_metadata=Y_metadata
)
self.link_parameter(self.X, index=0)
@ -83,7 +84,7 @@ class BayesianGPLVM(SparseGP_MPI):
def parameters_changed(self):
super(BayesianGPLVM,self).parameters_changed()
if isinstance(self.inference_method, VarDTC_minibatch):
return
return
kl_fctr = 1.
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)

View file

@ -5,6 +5,8 @@ from ..core.model import Model
import itertools
import numpy
from ..core.parameterization import Param
np = numpy
from ..util.block_matrices import get_blocks, get_block_shapes, unblock, get_blocks_3d, get_block_shapes_3d
def get_shape(x):
if isinstance(x, numpy.ndarray):
@ -111,3 +113,261 @@ class GradientChecker(Model):
#for name, shape in zip(self.names, self.shapes):
#_param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
#return _param_names
class HessianChecker(GradientChecker):
def __init__(self, f, df, ddf, x0, names=None, *args, **kwargs):
"""
:param f: Function (only used for numerical hessian gradient)
:param df: Gradient of function to check
:param ddf: Analytical gradient function
:param x0:
Initial guess for inputs x (if it has a shape (a,b) this will be reflected in the parameter names).
Can be a list of arrays, if takes a list of arrays. This list will be passed
to f and df in the same order as given here.
If only one argument, make sure not to pass a list!!!
:type x0: [array-like] | array-like | float | int
:param names:
Names to print, when performing gradcheck. If a list was passed to x0
a list of names with the same length is expected.
:param args: Arguments passed as f(x, *args, **kwargs) and df(x, *args, **kwargs)
"""
super(HessianChecker, self).__init__(df, ddf, x0, names=names, *args, **kwargs)
self._f = f
self._df = df
self._ddf = ddf
def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3, block_indices=None, plot=False):
"""
Overwrite checkgrad method to check whole block instead of looping through
Shows diagnostics using matshow instead
:param verbose: If True, print a "full" checking of each parameter
:type verbose: bool
:param step: The size of the step around which to linearise the objective
:type step: float (default 1e-6)
:param tolerance: the tolerance allowed (see note)
:type tolerance: float (default 1e-3)
Note:-
The gradient is considered correct if the ratio of the analytical
and numerical gradients is within <tolerance> of unity.
"""
try:
import numdifftools as nd
except:
raise ImportError("Don't have numdifftools package installed, it is not a GPy dependency as of yet, it is only used for hessian tests")
if target_param:
raise NotImplementedError('Only basic functionality is provided with this gradchecker')
#Repeat for each parameter, not the nicest but shouldn't be many cases where there are many
#variables
current_index = 0
for name, shape in zip(self.names, self.shapes):
current_size = numpy.prod(shape)
x = self.optimizer_array.copy()
#x = self._get_params_transformed().copy()
x = x[current_index:current_index + current_size].reshape(shape)
# Check gradients
analytic_hess = self._ddf(x)
if analytic_hess.shape[1] == 1:
analytic_hess = numpy.diagflat(analytic_hess)
#From the docs:
#x0 : vector location
#at which to differentiate fun
#If x0 is an N x M array, then fun is assumed to be a function
#of N*M variables., thus we must have it flat, not (N,1), but just (N,)
#numeric_hess_partial = nd.Hessian(self._f, vectorized=False)
numeric_hess_partial = nd.Jacobian(self._df, vectorized=False)
#numeric_hess_partial = nd.Derivative(self._df, vectorized=True)
numeric_hess = numeric_hess_partial(x)
check_passed = self.checkgrad_block(analytic_hess, numeric_hess, verbose=verbose, step=step, tolerance=tolerance, block_indices=block_indices, plot=plot)
current_index += current_size
return check_passed
def checkgrad_block(self, analytic_hess, numeric_hess, verbose=False, step=1e-6, tolerance=1e-3, block_indices=None, plot=False):
"""
Checkgrad a block matrix
"""
if analytic_hess.dtype is np.dtype('object'):
#Make numeric hessian also into a block matrix
real_size = get_block_shapes(analytic_hess)
num_elements = np.sum(real_size)
if (num_elements, num_elements) == numeric_hess.shape:
#If the sizes are the same we assume they are the same
#(we have not fixed any values so the numeric is the whole hessian)
numeric_hess = get_blocks(numeric_hess, real_size)
else:
#Make a fake empty matrix and fill out the correct block
tmp_numeric_hess = get_blocks(np.zeros((num_elements, num_elements)), real_size)
tmp_numeric_hess[block_indices] = numeric_hess.copy()
numeric_hess = tmp_numeric_hess
if block_indices is not None:
#Extract the right block
analytic_hess = analytic_hess[block_indices]
numeric_hess = numeric_hess[block_indices]
else:
#Unblock them if they are in blocks and you aren't checking a single block (checking whole hessian)
if analytic_hess.dtype is np.dtype('object'):
analytic_hess = unblock(analytic_hess)
numeric_hess = unblock(numeric_hess)
ratio = numeric_hess / (numpy.where(analytic_hess==0, 1e-10, analytic_hess))
difference = numpy.abs(analytic_hess - numeric_hess)
check_passed = numpy.all((numpy.abs(1 - ratio)) < tolerance) or numpy.allclose(numeric_hess, analytic_hess, atol = tolerance)
if verbose:
if block_indices:
print "\nBlock {}".format(block_indices)
else:
print "\nAll blocks"
header = ['Checked', 'Max-Ratio', 'Min-Ratio', 'Min-Difference', 'Max-Difference']
header_string = map(lambda x: ' | '.join(header), [header])
separator = '-' * len(header_string[0])
print '\n'.join([header_string[0], separator])
min_r = '%.6f' % float(numpy.min(ratio))
max_r = '%.6f' % float(numpy.max(ratio))
max_d = '%.6f' % float(numpy.max(difference))
min_d = '%.6f' % float(numpy.min(difference))
cols = [max_r, min_r, min_d, max_d]
if check_passed:
checked = "\033[92m True \033[0m"
else:
checked = "\033[91m False \033[0m"
grad_string = "{} | {} | {} | {} | {} ".format(checked, cols[0], cols[1], cols[2], cols[3])
print grad_string
if plot:
import pylab as pb
fig, axes = pb.subplots(2, 2)
max_lim = numpy.max(numpy.vstack((analytic_hess, numeric_hess)))
min_lim = numpy.min(numpy.vstack((analytic_hess, numeric_hess)))
msa = axes[0,0].matshow(analytic_hess, vmin=min_lim, vmax=max_lim)
axes[0,0].set_title('Analytic hessian')
axes[0,0].xaxis.set_ticklabels([None])
axes[0,0].yaxis.set_ticklabels([None])
axes[0,0].xaxis.set_ticks([None])
axes[0,0].yaxis.set_ticks([None])
msn = axes[0,1].matshow(numeric_hess, vmin=min_lim, vmax=max_lim)
pb.colorbar(msn, ax=axes[0,1])
axes[0,1].set_title('Numeric hessian')
axes[0,1].xaxis.set_ticklabels([None])
axes[0,1].yaxis.set_ticklabels([None])
axes[0,1].xaxis.set_ticks([None])
axes[0,1].yaxis.set_ticks([None])
msr = axes[1,0].matshow(ratio)
pb.colorbar(msr, ax=axes[1,0])
axes[1,0].set_title('Ratio')
axes[1,0].xaxis.set_ticklabels([None])
axes[1,0].yaxis.set_ticklabels([None])
axes[1,0].xaxis.set_ticks([None])
axes[1,0].yaxis.set_ticks([None])
msd = axes[1,1].matshow(difference)
pb.colorbar(msd, ax=axes[1,1])
axes[1,1].set_title('difference')
axes[1,1].xaxis.set_ticklabels([None])
axes[1,1].yaxis.set_ticklabels([None])
axes[1,1].xaxis.set_ticks([None])
axes[1,1].yaxis.set_ticks([None])
if block_indices:
fig.suptitle("Block: {}".format(block_indices))
pb.show()
return check_passed
class SkewChecker(HessianChecker):
def __init__(self, df, ddf, dddf, x0, names=None, *args, **kwargs):
"""
:param df: gradient of function
:param ddf: Gradient of function to check (hessian)
:param dddf: Analytical gradient function (third derivative)
:param x0:
Initial guess for inputs x (if it has a shape (a,b) this will be reflected in the parameter names).
Can be a list of arrays, if takes a list of arrays. This list will be passed
to f and df in the same order as given here.
If only one argument, make sure not to pass a list!!!
:type x0: [array-like] | array-like | float | int
:param names:
Names to print, when performing gradcheck. If a list was passed to x0
a list of names with the same length is expected.
:param args: Arguments passed as f(x, *args, **kwargs) and df(x, *args, **kwargs)
"""
super(SkewChecker, self).__init__(df, ddf, dddf, x0, names=names, *args, **kwargs)
def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3, block_indices=None, plot=False, super_plot=False):
"""
Gradient checker that just checks each hessian individually
super_plot will plot the hessian wrt every parameter, plot will just do the first one
"""
try:
import numdifftools as nd
except:
raise ImportError("Don't have numdifftools package installed, it is not a GPy dependency as of yet, it is only used for hessian tests")
if target_param:
raise NotImplementedError('Only basic functionality is provided with this gradchecker')
#Repeat for each parameter, not the nicest but shouldn't be many cases where there are many
#variables
current_index = 0
for name, n_shape in zip(self.names, self.shapes):
current_size = numpy.prod(n_shape)
x = self.optimizer_array.copy()
#x = self._get_params_transformed().copy()
x = x[current_index:current_index + current_size].reshape(n_shape)
# Check gradients
#Actually the third derivative
analytic_hess = self._ddf(x)
#Can only calculate jacobian for one variable at a time
#From the docs:
#x0 : vector location
#at which to differentiate fun
#If x0 is an N x M array, then fun is assumed to be a function
#of N*M variables., thus we must have it flat, not (N,1), but just (N,)
#numeric_hess_partial = nd.Hessian(self._f, vectorized=False)
#Actually _df is already the hessian
numeric_hess_partial = nd.Jacobian(self._df, vectorized=True)
numeric_hess = numeric_hess_partial(x)
print "Done making numerical hessian"
if analytic_hess.dtype is np.dtype('object'):
#Blockify numeric_hess aswell
blocksizes, pagesizes = get_block_shapes_3d(analytic_hess)
#HACK
real_block_size = np.sum(blocksizes)
numeric_hess = numeric_hess.reshape(real_block_size, real_block_size, pagesizes)
#numeric_hess = get_blocks_3d(numeric_hess, blocksizes)#, pagesizes)
else:
numeric_hess = numeric_hess.reshape(*analytic_hess.shape)
#Check every block individually (for ease)
check_passed = [False]*numeric_hess.shape[2]
for block_ind in xrange(numeric_hess.shape[2]):
#Unless super_plot is set, just plot the first one
p = True if (plot and block_ind == numeric_hess.shape[2]-1) or super_plot else False
if verbose:
print "Checking derivative of hessian wrt parameter number {}".format(block_ind)
check_passed[block_ind] = self.checkgrad_block(analytic_hess[:,:,block_ind], numeric_hess[:,:,block_ind], verbose=verbose, step=step, tolerance=tolerance, block_indices=block_indices, plot=p)
current_index += current_size
return np.all(check_passed)

View file

@ -74,6 +74,8 @@ class MRD(BayesianGPLVMMiniBatch):
self.logger.debug("creating observable arrays")
self.Ylist = [ObsAr(Y) for Y in Ylist]
#The next line is a fix for Python 3. It replicates the python 2 behaviour from the above comprehension
Y = Ylist[-1]
if Ynames is None:
self.logger.debug("creating Ynames")
@ -82,7 +84,7 @@ class MRD(BayesianGPLVMMiniBatch):
assert len(self.names) == len(self.Ylist), "one name per dataset, or None if Ylist is a dict"
if inference_method is None:
self.inference_method = InferenceMethodList([VarDTC() for _ in xrange(len(self.Ylist))])
self.inference_method = InferenceMethodList([VarDTC() for _ in range(len(self.Ylist))])
else:
assert isinstance(inference_method, InferenceMethodList), "please provide one inference method per Y in the list and provide it as InferenceMethodList, inference_method given: {}".format(inference_method)
self.inference_method = inference_method
@ -137,7 +139,7 @@ class MRD(BayesianGPLVMMiniBatch):
self.bgplvms = []
for i, n, k, l, Y, im, bs in itertools.izip(itertools.count(), Ynames, kernels, likelihoods, Ylist, self.inference_method, batchsize):
for i, n, k, l, Y, im, bs in zip(itertools.count(), Ynames, kernels, likelihoods, Ylist, self.inference_method, batchsize):
assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another"
md = np.isnan(Y).any()
spgp = BayesianGPLVMMiniBatch(Y, input_dim, X, X_variance,
@ -164,7 +166,7 @@ class MRD(BayesianGPLVMMiniBatch):
self._log_marginal_likelihood = 0
self.Z.gradient[:] = 0.
self.X.gradient[:] = 0.
for b, i in itertools.izip(self.bgplvms, self.inference_method):
for b, i in zip(self.bgplvms, self.inference_method):
self._log_marginal_likelihood += b._log_marginal_likelihood
self.logger.info('working on im <{}>'.format(hex(id(i))))
@ -195,7 +197,7 @@ class MRD(BayesianGPLVMMiniBatch):
elif init in "PCA_single":
X = np.zeros((Ylist[0].shape[0], self.input_dim))
fracs = []
for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist):
for qs, Y in zip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist):
x,frcs = initialize_latent('PCA', len(qs), Y)
X[:, qs] = x
fracs.append(frcs)
@ -327,9 +329,9 @@ class MRD(BayesianGPLVMMiniBatch):
def __getstate__(self):
state = super(MRD, self).__getstate__()
if state.has_key('kern'):
if 'kern' in state:
del state['kern']
if state.has_key('likelihood'):
if 'likelihood' in state:
del state['likelihood']
return state
@ -338,4 +340,4 @@ class MRD(BayesianGPLVMMiniBatch):
super(MRD, self).__setstate__(state)
self.kern = self.bgplvms[0].kern
self.likelihood = self.bgplvms[0].likelihood
self.parameters_changed()
self.parameters_changed()

View file

@ -30,7 +30,7 @@ class OneVsAllSparseClassification(object):
self.results = {}
for yj in labels:
print 'Class %s vs all' %yj
print('Class %s vs all' %yj)
Ynew = Y.copy()
Ynew[Y.flatten()!=yj] = 0
Ynew[Y.flatten()==yj] = 1

View file

@ -1,6 +1,7 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from __future__ import print_function
import numpy as np
from ..core.parameterization.param import Param
from ..core.sparse_gp import SparseGP
@ -43,14 +44,15 @@ class SparseGPMiniBatch(SparseGP):
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
name='sparse gp', Y_metadata=None, normalizer=False,
missing_data=False, stochastic=False, batchsize=1):
#pick a sensible inference method
# pick a sensible inference method
if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian):
inference_method = var_dtc.VarDTC(limit=1 if not self.missing_data else Y.shape[1])
inference_method = var_dtc.VarDTC(limit=1 if not missing_data else Y.shape[1])
else:
#inference_method = ??
raise NotImplementedError, "what to do what to do?"
print "defaulting to ", inference_method, "for latent function inference"
raise NotImplementedError("what to do what to do?")
print("defaulting to ", inference_method, "for latent function inference")
self.kl_factr = 1.
self.Z = Param('inducing inputs', Z)
@ -80,13 +82,13 @@ class SparseGPMiniBatch(SparseGP):
overall = self.Y_normalized.shape[1]
m_f = lambda i: "Precomputing Y for missing data: {: >7.2%}".format(float(i+1)/overall)
message = m_f(-1)
print message,
for d in xrange(overall):
print(message, end=' ')
for d in range(overall):
self.Ylist.append(self.Y_normalized[self.ninan[:, d], d][:, None])
print ' '*(len(message)+1) + '\r',
print(' '*(len(message)+1) + '\r', end=' ')
message = m_f(d)
print message,
print ''
print(message, end=' ')
print('')
self.posterior = None
@ -181,11 +183,11 @@ class SparseGPMiniBatch(SparseGP):
full_values[key][value_indices[key]] += current_values[key]
"""
for key in current_values.keys():
if value_indices is not None and value_indices.has_key(key):
if value_indices is not None and key in value_indices:
index = value_indices[key]
else:
index = slice(None)
if full_values.has_key(key):
if key in full_values:
full_values[key][index] += current_values[key]
else:
full_values[key] = current_values[key]
@ -241,15 +243,15 @@ class SparseGPMiniBatch(SparseGP):
if not self.stochastics:
m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim)
message = m_f(-1)
print message,
print(message, end=' ')
for d in self.stochastics.d:
ninan = self.ninan[:, d]
if not self.stochastics:
print ' '*(len(message)) + '\r',
print(' '*(len(message)) + '\r', end=' ')
message = m_f(d)
print message,
print(message, end=' ')
posterior, log_marginal_likelihood, \
grad_dict, current_values, value_indices = self._inner_parameters_changed(
@ -268,7 +270,7 @@ class SparseGPMiniBatch(SparseGP):
woodbury_vector[:, d:d+1] = posterior.woodbury_vector
self._log_marginal_likelihood += log_marginal_likelihood
if not self.stochastics:
print ''
print('')
if self.posterior is None:
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,

View file

@ -39,7 +39,10 @@ class SSGPLVM(SparseGP_MPI):
X_variance = np.random.uniform(0,.1,X.shape)
if Gamma is None:
gamma = np.random.randn(X.shape[0], input_dim)
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim)
gamma[gamma>1.-1e-9] = 1.-1e-9
gamma[gamma<1e-9] = 1e-9
else:
gamma = Gamma.copy()
@ -71,7 +74,7 @@ class SSGPLVM(SparseGP_MPI):
self.link_parameter(self.X, index=0)
if self.group_spike:
[self.X.gamma[:,i].tie('tieGamma'+str(i)) for i in xrange(self.X.gamma.shape[1])] # Tie columns together
[self.X.gamma[:,i].tie('tieGamma'+str(i)) for i in range(self.X.gamma.shape[1])] # Tie columns together
def set_X_gradients(self, X, X_grad):
"""Set the gradients of the posterior distribution of X in its specific form."""

View file

@ -19,10 +19,10 @@ class SSMRD(Model):
name='model_'+str(i)) for i,y in enumerate(Ylist)]
self.add_parameters(*(self.models))
[[[self.models[m].X.mean[i,j:j+1].tie('mean_'+str(i)+'_'+str(j)) for m in xrange(len(self.models))] for j in xrange(self.models[0].X.mean.shape[1])]
for i in xrange(self.models[0].X.mean.shape[0])]
[[[self.models[m].X.variance[i,j:j+1].tie('var_'+str(i)+'_'+str(j)) for m in xrange(len(self.models))] for j in xrange(self.models[0].X.variance.shape[1])]
for i in xrange(self.models[0].X.variance.shape[0])]
[[[self.models[m].X.mean[i,j:j+1].tie('mean_'+str(i)+'_'+str(j)) for m in range(len(self.models))] for j in range(self.models[0].X.mean.shape[1])]
for i in range(self.models[0].X.mean.shape[0])]
[[[self.models[m].X.variance[i,j:j+1].tie('var_'+str(i)+'_'+str(j)) for m in range(len(self.models))] for j in range(self.models[0].X.variance.shape[1])]
for i in range(self.models[0].X.variance.shape[0])]
self.updates = True
@ -31,4 +31,4 @@ class SSMRD(Model):
self._log_marginal_likelihood = sum([m._log_marginal_likelihood for m in self.models])
def log_likelihood(self):
return self._log_marginal_likelihood
return self._log_marginal_likelihood

View file

@ -1,7 +1,6 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..util.warping_functions import *
from ..core import GP
@ -10,14 +9,16 @@ from GPy.util.warping_functions import TanhWarpingFunction_d
from GPy import kern
class WarpedGP(GP):
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3, normalize_X=False, normalize_Y=False):
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3):
if kernel is None:
kernel = kern.rbf(X.shape[1])
kernel = kern.RBF(X.shape[1])
if warping_function == None:
self.warping_function = TanhWarpingFunction_d(warping_terms)
self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1,) * 1)
else:
self.warping_function = warping_function
self.scale_data = False
if self.scale_data:
@ -25,10 +26,10 @@ class WarpedGP(GP):
self.has_uncertain_inputs = False
self.Y_untransformed = Y.copy()
self.predict_in_warped_space = False
likelihood = likelihoods.Gaussian(self.transform_data(), normalize=normalize_Y)
likelihood = likelihoods.Gaussian()
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self._set_params(self._get_params())
GP.__init__(self, X, self.transform_data(), likelihood=likelihood, kernel=kernel)
self.link_parameter(self.warping_function)
def _scale_data(self, Y):
self._Ymax = Y.max()
@ -38,62 +39,55 @@ class WarpedGP(GP):
def _unscale_data(self, Y):
return (Y + 0.5) * (self._Ymax - self._Ymin) + self._Ymin
def _set_params(self, x):
self.warping_params = x[:self.warping_function.num_parameters]
Y = self.transform_data()
self.likelihood.set_data(Y)
GP._set_params(self, x[self.warping_function.num_parameters:].copy())
def parameters_changed(self):
self.Y[:] = self.transform_data()
super(WarpedGP, self).parameters_changed()
def _get_params(self):
return np.hstack((self.warping_params.flatten().copy(), GP._get_params(self).copy()))
Kiy = self.posterior.woodbury_vector.flatten()
def _get_param_names(self):
warping_names = self.warping_function._get_param_names()
param_names = GP._get_param_names(self)
return warping_names + param_names
def transform_data(self):
Y = self.warping_function.f(self.Y_untransformed.copy(), self.warping_params).copy()
return Y
def log_likelihood(self):
ll = GP.log_likelihood(self)
jacobian = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
return ll + np.log(jacobian).sum()
def _log_likelihood_gradients(self):
ll_grads = GP._log_likelihood_gradients(self)
alpha = np.dot(self.Ki, self.likelihood.Y.flatten())
warping_grads = self.warping_function_gradients(alpha)
warping_grads = np.append(warping_grads[:, :-1].flatten(), warping_grads[0, -1])
return np.hstack((warping_grads.flatten(), ll_grads.flatten()))
def warping_function_gradients(self, Kiy):
grad_y = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed, self.warping_params,
grad_y = self.warping_function.fgrad_y(self.Y_untransformed)
grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed,
return_covar_chain=True)
djac_dpsi = ((1.0 / grad_y[:, :, None, None]) * grad_y_psi).sum(axis=0).sum(axis=0)
dquad_dpsi = (Kiy[:, None, None, None] * grad_psi).sum(axis=0).sum(axis=0)
return -dquad_dpsi + djac_dpsi
warping_grads = -dquad_dpsi + djac_dpsi
self.warping_function.psi.gradient[:] = warping_grads[:, :-1]
self.warping_function.d.gradient[:] = warping_grads[0, -1]
def transform_data(self):
Y = self.warping_function.f(self.Y_untransformed.copy()).copy()
return Y
def log_likelihood(self):
ll = GP.log_likelihood(self)
jacobian = self.warping_function.fgrad_y(self.Y_untransformed)
return ll + np.log(jacobian).sum()
def plot_warping(self):
self.warping_function.plot(self.warping_params, self.Y_untransformed.min(), self.Y_untransformed.max())
self.warping_function.plot(self.Y_untransformed.min(), self.Y_untransformed.max())
def predict(self, Xnew, which_parts='all', full_cov=False, pred_init=None):
def predict(self, Xnew, which_parts='all', pred_init=None):
# normalize X values
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
mu, var = GP._raw_predict(self, Xnew, full_cov=full_cov, which_parts=which_parts)
# Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
mu, var = GP._raw_predict(self, Xnew)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
mean, var = self.likelihood.predictive_values(mu, var)
if self.predict_in_warped_space:
mean = self.warping_function.f_inv(mean, self.warping_params, y=pred_init)
var = self.warping_function.f_inv(var, self.warping_params)
mean = self.warping_function.f_inv(mean, y=pred_init)
var = self.warping_function.f_inv(var)
if self.scale_data:
mean = self._unscale_data(mean)
return mean, var, _025pm, _975pm
return mean, var
if __name__ == '__main__':
X = np.random.randn(100, 1)
Y = np.sin(X) + np.random.randn(100, 1)*0.05
m = WarpedGP(X, Y)