mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-13 22:12:38 +02:00
merge with commit of dgplvm
This commit is contained in:
commit
401374d068
152 changed files with 4272 additions and 1875 deletions
|
|
@ -1,24 +1,24 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from gp_regression import GPRegression
|
||||
from gp_classification import GPClassification
|
||||
from sparse_gp_regression import SparseGPRegression, SparseGPRegressionUncertainInput
|
||||
from sparse_gp_classification import SparseGPClassification
|
||||
from gplvm import GPLVM
|
||||
from bcgplvm import BCGPLVM
|
||||
from sparse_gplvm import SparseGPLVM
|
||||
from warped_gp import WarpedGP
|
||||
from bayesian_gplvm import BayesianGPLVM
|
||||
from mrd import MRD
|
||||
from gradient_checker import GradientChecker
|
||||
from ss_gplvm import SSGPLVM
|
||||
from gp_coregionalized_regression import GPCoregionalizedRegression
|
||||
from sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
|
||||
from gp_heteroscedastic_regression import GPHeteroscedasticRegression
|
||||
from ss_mrd import SSMRD
|
||||
from gp_kronecker_gaussian_regression import GPKroneckerGaussianRegression
|
||||
from gp_var_gauss import GPVariationalGaussianApproximation
|
||||
from one_vs_all_classification import OneVsAllClassification
|
||||
from one_vs_all_sparse_classification import OneVsAllSparseClassification
|
||||
from dpgplvm import DPBayesianGPLVM
|
||||
from .gp_regression import GPRegression
|
||||
from .gp_classification import GPClassification
|
||||
from .sparse_gp_regression import SparseGPRegression, SparseGPRegressionUncertainInput
|
||||
from .sparse_gp_classification import SparseGPClassification
|
||||
from .gplvm import GPLVM
|
||||
from .bcgplvm import BCGPLVM
|
||||
from .sparse_gplvm import SparseGPLVM
|
||||
from .warped_gp import WarpedGP
|
||||
from .bayesian_gplvm import BayesianGPLVM
|
||||
from .mrd import MRD
|
||||
from .gradient_checker import GradientChecker, HessianChecker, SkewChecker
|
||||
from .ss_gplvm import SSGPLVM
|
||||
from .gp_coregionalized_regression import GPCoregionalizedRegression
|
||||
from .sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
|
||||
from .gp_heteroscedastic_regression import GPHeteroscedasticRegression
|
||||
from .ss_mrd import SSMRD
|
||||
from .gp_kronecker_gaussian_regression import GPKroneckerGaussianRegression
|
||||
from .gp_var_gauss import GPVariationalGaussianApproximation
|
||||
from .one_vs_all_classification import OneVsAllClassification
|
||||
from .one_vs_all_sparse_classification import OneVsAllSparseClassification
|
||||
from .dpgplvm import DPBayesianGPLVM
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ class BayesianGPLVM(SparseGP_MPI):
|
|||
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
||||
Z=None, kernel=None, inference_method=None, likelihood=None,
|
||||
name='bayesian gplvm', mpi_comm=None, normalizer=None,
|
||||
missing_data=False, stochastic=False, batchsize=1):
|
||||
missing_data=False, stochastic=False, batchsize=1, Y_metadata=None):
|
||||
|
||||
self.logger = logging.getLogger(self.__class__.__name__)
|
||||
if X is None:
|
||||
|
|
@ -69,6 +69,7 @@ class BayesianGPLVM(SparseGP_MPI):
|
|||
name=name, inference_method=inference_method,
|
||||
normalizer=normalizer, mpi_comm=mpi_comm,
|
||||
variational_prior=self.variational_prior,
|
||||
Y_metadata=Y_metadata
|
||||
)
|
||||
self.link_parameter(self.X, index=0)
|
||||
|
||||
|
|
@ -83,7 +84,7 @@ class BayesianGPLVM(SparseGP_MPI):
|
|||
def parameters_changed(self):
|
||||
super(BayesianGPLVM,self).parameters_changed()
|
||||
if isinstance(self.inference_method, VarDTC_minibatch):
|
||||
return
|
||||
return
|
||||
|
||||
kl_fctr = 1.
|
||||
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ from ..core.model import Model
|
|||
import itertools
|
||||
import numpy
|
||||
from ..core.parameterization import Param
|
||||
np = numpy
|
||||
from ..util.block_matrices import get_blocks, get_block_shapes, unblock, get_blocks_3d, get_block_shapes_3d
|
||||
|
||||
def get_shape(x):
|
||||
if isinstance(x, numpy.ndarray):
|
||||
|
|
@ -111,3 +113,261 @@ class GradientChecker(Model):
|
|||
#for name, shape in zip(self.names, self.shapes):
|
||||
#_param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
|
||||
#return _param_names
|
||||
|
||||
|
||||
class HessianChecker(GradientChecker):
|
||||
|
||||
def __init__(self, f, df, ddf, x0, names=None, *args, **kwargs):
|
||||
"""
|
||||
:param f: Function (only used for numerical hessian gradient)
|
||||
:param df: Gradient of function to check
|
||||
:param ddf: Analytical gradient function
|
||||
:param x0:
|
||||
Initial guess for inputs x (if it has a shape (a,b) this will be reflected in the parameter names).
|
||||
Can be a list of arrays, if takes a list of arrays. This list will be passed
|
||||
to f and df in the same order as given here.
|
||||
If only one argument, make sure not to pass a list!!!
|
||||
|
||||
:type x0: [array-like] | array-like | float | int
|
||||
:param names:
|
||||
Names to print, when performing gradcheck. If a list was passed to x0
|
||||
a list of names with the same length is expected.
|
||||
:param args: Arguments passed as f(x, *args, **kwargs) and df(x, *args, **kwargs)
|
||||
|
||||
"""
|
||||
super(HessianChecker, self).__init__(df, ddf, x0, names=names, *args, **kwargs)
|
||||
self._f = f
|
||||
self._df = df
|
||||
self._ddf = ddf
|
||||
|
||||
def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3, block_indices=None, plot=False):
|
||||
"""
|
||||
Overwrite checkgrad method to check whole block instead of looping through
|
||||
|
||||
Shows diagnostics using matshow instead
|
||||
|
||||
:param verbose: If True, print a "full" checking of each parameter
|
||||
:type verbose: bool
|
||||
:param step: The size of the step around which to linearise the objective
|
||||
:type step: float (default 1e-6)
|
||||
:param tolerance: the tolerance allowed (see note)
|
||||
:type tolerance: float (default 1e-3)
|
||||
|
||||
Note:-
|
||||
The gradient is considered correct if the ratio of the analytical
|
||||
and numerical gradients is within <tolerance> of unity.
|
||||
"""
|
||||
try:
|
||||
import numdifftools as nd
|
||||
except:
|
||||
raise ImportError("Don't have numdifftools package installed, it is not a GPy dependency as of yet, it is only used for hessian tests")
|
||||
|
||||
if target_param:
|
||||
raise NotImplementedError('Only basic functionality is provided with this gradchecker')
|
||||
|
||||
#Repeat for each parameter, not the nicest but shouldn't be many cases where there are many
|
||||
#variables
|
||||
current_index = 0
|
||||
for name, shape in zip(self.names, self.shapes):
|
||||
current_size = numpy.prod(shape)
|
||||
x = self.optimizer_array.copy()
|
||||
#x = self._get_params_transformed().copy()
|
||||
x = x[current_index:current_index + current_size].reshape(shape)
|
||||
|
||||
# Check gradients
|
||||
analytic_hess = self._ddf(x)
|
||||
if analytic_hess.shape[1] == 1:
|
||||
analytic_hess = numpy.diagflat(analytic_hess)
|
||||
|
||||
#From the docs:
|
||||
#x0 : vector location
|
||||
#at which to differentiate fun
|
||||
#If x0 is an N x M array, then fun is assumed to be a function
|
||||
#of N*M variables., thus we must have it flat, not (N,1), but just (N,)
|
||||
#numeric_hess_partial = nd.Hessian(self._f, vectorized=False)
|
||||
numeric_hess_partial = nd.Jacobian(self._df, vectorized=False)
|
||||
#numeric_hess_partial = nd.Derivative(self._df, vectorized=True)
|
||||
numeric_hess = numeric_hess_partial(x)
|
||||
|
||||
check_passed = self.checkgrad_block(analytic_hess, numeric_hess, verbose=verbose, step=step, tolerance=tolerance, block_indices=block_indices, plot=plot)
|
||||
current_index += current_size
|
||||
return check_passed
|
||||
|
||||
def checkgrad_block(self, analytic_hess, numeric_hess, verbose=False, step=1e-6, tolerance=1e-3, block_indices=None, plot=False):
|
||||
"""
|
||||
Checkgrad a block matrix
|
||||
"""
|
||||
if analytic_hess.dtype is np.dtype('object'):
|
||||
#Make numeric hessian also into a block matrix
|
||||
real_size = get_block_shapes(analytic_hess)
|
||||
num_elements = np.sum(real_size)
|
||||
if (num_elements, num_elements) == numeric_hess.shape:
|
||||
#If the sizes are the same we assume they are the same
|
||||
#(we have not fixed any values so the numeric is the whole hessian)
|
||||
numeric_hess = get_blocks(numeric_hess, real_size)
|
||||
else:
|
||||
#Make a fake empty matrix and fill out the correct block
|
||||
tmp_numeric_hess = get_blocks(np.zeros((num_elements, num_elements)), real_size)
|
||||
tmp_numeric_hess[block_indices] = numeric_hess.copy()
|
||||
numeric_hess = tmp_numeric_hess
|
||||
|
||||
if block_indices is not None:
|
||||
#Extract the right block
|
||||
analytic_hess = analytic_hess[block_indices]
|
||||
numeric_hess = numeric_hess[block_indices]
|
||||
else:
|
||||
#Unblock them if they are in blocks and you aren't checking a single block (checking whole hessian)
|
||||
if analytic_hess.dtype is np.dtype('object'):
|
||||
analytic_hess = unblock(analytic_hess)
|
||||
numeric_hess = unblock(numeric_hess)
|
||||
|
||||
ratio = numeric_hess / (numpy.where(analytic_hess==0, 1e-10, analytic_hess))
|
||||
difference = numpy.abs(analytic_hess - numeric_hess)
|
||||
|
||||
check_passed = numpy.all((numpy.abs(1 - ratio)) < tolerance) or numpy.allclose(numeric_hess, analytic_hess, atol = tolerance)
|
||||
|
||||
if verbose:
|
||||
if block_indices:
|
||||
print "\nBlock {}".format(block_indices)
|
||||
else:
|
||||
print "\nAll blocks"
|
||||
|
||||
header = ['Checked', 'Max-Ratio', 'Min-Ratio', 'Min-Difference', 'Max-Difference']
|
||||
header_string = map(lambda x: ' | '.join(header), [header])
|
||||
separator = '-' * len(header_string[0])
|
||||
print '\n'.join([header_string[0], separator])
|
||||
min_r = '%.6f' % float(numpy.min(ratio))
|
||||
max_r = '%.6f' % float(numpy.max(ratio))
|
||||
max_d = '%.6f' % float(numpy.max(difference))
|
||||
min_d = '%.6f' % float(numpy.min(difference))
|
||||
cols = [max_r, min_r, min_d, max_d]
|
||||
|
||||
if check_passed:
|
||||
checked = "\033[92m True \033[0m"
|
||||
else:
|
||||
checked = "\033[91m False \033[0m"
|
||||
|
||||
grad_string = "{} | {} | {} | {} | {} ".format(checked, cols[0], cols[1], cols[2], cols[3])
|
||||
print grad_string
|
||||
|
||||
if plot:
|
||||
import pylab as pb
|
||||
fig, axes = pb.subplots(2, 2)
|
||||
max_lim = numpy.max(numpy.vstack((analytic_hess, numeric_hess)))
|
||||
min_lim = numpy.min(numpy.vstack((analytic_hess, numeric_hess)))
|
||||
msa = axes[0,0].matshow(analytic_hess, vmin=min_lim, vmax=max_lim)
|
||||
axes[0,0].set_title('Analytic hessian')
|
||||
axes[0,0].xaxis.set_ticklabels([None])
|
||||
axes[0,0].yaxis.set_ticklabels([None])
|
||||
axes[0,0].xaxis.set_ticks([None])
|
||||
axes[0,0].yaxis.set_ticks([None])
|
||||
msn = axes[0,1].matshow(numeric_hess, vmin=min_lim, vmax=max_lim)
|
||||
pb.colorbar(msn, ax=axes[0,1])
|
||||
axes[0,1].set_title('Numeric hessian')
|
||||
axes[0,1].xaxis.set_ticklabels([None])
|
||||
axes[0,1].yaxis.set_ticklabels([None])
|
||||
axes[0,1].xaxis.set_ticks([None])
|
||||
axes[0,1].yaxis.set_ticks([None])
|
||||
msr = axes[1,0].matshow(ratio)
|
||||
pb.colorbar(msr, ax=axes[1,0])
|
||||
axes[1,0].set_title('Ratio')
|
||||
axes[1,0].xaxis.set_ticklabels([None])
|
||||
axes[1,0].yaxis.set_ticklabels([None])
|
||||
axes[1,0].xaxis.set_ticks([None])
|
||||
axes[1,0].yaxis.set_ticks([None])
|
||||
msd = axes[1,1].matshow(difference)
|
||||
pb.colorbar(msd, ax=axes[1,1])
|
||||
axes[1,1].set_title('difference')
|
||||
axes[1,1].xaxis.set_ticklabels([None])
|
||||
axes[1,1].yaxis.set_ticklabels([None])
|
||||
axes[1,1].xaxis.set_ticks([None])
|
||||
axes[1,1].yaxis.set_ticks([None])
|
||||
if block_indices:
|
||||
fig.suptitle("Block: {}".format(block_indices))
|
||||
pb.show()
|
||||
|
||||
return check_passed
|
||||
|
||||
class SkewChecker(HessianChecker):
|
||||
|
||||
def __init__(self, df, ddf, dddf, x0, names=None, *args, **kwargs):
|
||||
"""
|
||||
:param df: gradient of function
|
||||
:param ddf: Gradient of function to check (hessian)
|
||||
:param dddf: Analytical gradient function (third derivative)
|
||||
:param x0:
|
||||
Initial guess for inputs x (if it has a shape (a,b) this will be reflected in the parameter names).
|
||||
Can be a list of arrays, if takes a list of arrays. This list will be passed
|
||||
to f and df in the same order as given here.
|
||||
If only one argument, make sure not to pass a list!!!
|
||||
|
||||
:type x0: [array-like] | array-like | float | int
|
||||
:param names:
|
||||
Names to print, when performing gradcheck. If a list was passed to x0
|
||||
a list of names with the same length is expected.
|
||||
:param args: Arguments passed as f(x, *args, **kwargs) and df(x, *args, **kwargs)
|
||||
|
||||
"""
|
||||
super(SkewChecker, self).__init__(df, ddf, dddf, x0, names=names, *args, **kwargs)
|
||||
|
||||
def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3, block_indices=None, plot=False, super_plot=False):
|
||||
"""
|
||||
Gradient checker that just checks each hessian individually
|
||||
|
||||
super_plot will plot the hessian wrt every parameter, plot will just do the first one
|
||||
"""
|
||||
try:
|
||||
import numdifftools as nd
|
||||
except:
|
||||
raise ImportError("Don't have numdifftools package installed, it is not a GPy dependency as of yet, it is only used for hessian tests")
|
||||
|
||||
if target_param:
|
||||
raise NotImplementedError('Only basic functionality is provided with this gradchecker')
|
||||
|
||||
#Repeat for each parameter, not the nicest but shouldn't be many cases where there are many
|
||||
#variables
|
||||
current_index = 0
|
||||
for name, n_shape in zip(self.names, self.shapes):
|
||||
current_size = numpy.prod(n_shape)
|
||||
x = self.optimizer_array.copy()
|
||||
#x = self._get_params_transformed().copy()
|
||||
x = x[current_index:current_index + current_size].reshape(n_shape)
|
||||
|
||||
# Check gradients
|
||||
#Actually the third derivative
|
||||
analytic_hess = self._ddf(x)
|
||||
|
||||
#Can only calculate jacobian for one variable at a time
|
||||
#From the docs:
|
||||
#x0 : vector location
|
||||
#at which to differentiate fun
|
||||
#If x0 is an N x M array, then fun is assumed to be a function
|
||||
#of N*M variables., thus we must have it flat, not (N,1), but just (N,)
|
||||
#numeric_hess_partial = nd.Hessian(self._f, vectorized=False)
|
||||
#Actually _df is already the hessian
|
||||
numeric_hess_partial = nd.Jacobian(self._df, vectorized=True)
|
||||
numeric_hess = numeric_hess_partial(x)
|
||||
|
||||
print "Done making numerical hessian"
|
||||
if analytic_hess.dtype is np.dtype('object'):
|
||||
#Blockify numeric_hess aswell
|
||||
blocksizes, pagesizes = get_block_shapes_3d(analytic_hess)
|
||||
#HACK
|
||||
real_block_size = np.sum(blocksizes)
|
||||
numeric_hess = numeric_hess.reshape(real_block_size, real_block_size, pagesizes)
|
||||
#numeric_hess = get_blocks_3d(numeric_hess, blocksizes)#, pagesizes)
|
||||
else:
|
||||
numeric_hess = numeric_hess.reshape(*analytic_hess.shape)
|
||||
|
||||
#Check every block individually (for ease)
|
||||
check_passed = [False]*numeric_hess.shape[2]
|
||||
for block_ind in xrange(numeric_hess.shape[2]):
|
||||
#Unless super_plot is set, just plot the first one
|
||||
p = True if (plot and block_ind == numeric_hess.shape[2]-1) or super_plot else False
|
||||
if verbose:
|
||||
print "Checking derivative of hessian wrt parameter number {}".format(block_ind)
|
||||
check_passed[block_ind] = self.checkgrad_block(analytic_hess[:,:,block_ind], numeric_hess[:,:,block_ind], verbose=verbose, step=step, tolerance=tolerance, block_indices=block_indices, plot=p)
|
||||
|
||||
current_index += current_size
|
||||
return np.all(check_passed)
|
||||
|
||||
|
|
|
|||
|
|
@ -74,6 +74,8 @@ class MRD(BayesianGPLVMMiniBatch):
|
|||
|
||||
self.logger.debug("creating observable arrays")
|
||||
self.Ylist = [ObsAr(Y) for Y in Ylist]
|
||||
#The next line is a fix for Python 3. It replicates the python 2 behaviour from the above comprehension
|
||||
Y = Ylist[-1]
|
||||
|
||||
if Ynames is None:
|
||||
self.logger.debug("creating Ynames")
|
||||
|
|
@ -82,7 +84,7 @@ class MRD(BayesianGPLVMMiniBatch):
|
|||
assert len(self.names) == len(self.Ylist), "one name per dataset, or None if Ylist is a dict"
|
||||
|
||||
if inference_method is None:
|
||||
self.inference_method = InferenceMethodList([VarDTC() for _ in xrange(len(self.Ylist))])
|
||||
self.inference_method = InferenceMethodList([VarDTC() for _ in range(len(self.Ylist))])
|
||||
else:
|
||||
assert isinstance(inference_method, InferenceMethodList), "please provide one inference method per Y in the list and provide it as InferenceMethodList, inference_method given: {}".format(inference_method)
|
||||
self.inference_method = inference_method
|
||||
|
|
@ -137,7 +139,7 @@ class MRD(BayesianGPLVMMiniBatch):
|
|||
|
||||
self.bgplvms = []
|
||||
|
||||
for i, n, k, l, Y, im, bs in itertools.izip(itertools.count(), Ynames, kernels, likelihoods, Ylist, self.inference_method, batchsize):
|
||||
for i, n, k, l, Y, im, bs in zip(itertools.count(), Ynames, kernels, likelihoods, Ylist, self.inference_method, batchsize):
|
||||
assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another"
|
||||
md = np.isnan(Y).any()
|
||||
spgp = BayesianGPLVMMiniBatch(Y, input_dim, X, X_variance,
|
||||
|
|
@ -164,7 +166,7 @@ class MRD(BayesianGPLVMMiniBatch):
|
|||
self._log_marginal_likelihood = 0
|
||||
self.Z.gradient[:] = 0.
|
||||
self.X.gradient[:] = 0.
|
||||
for b, i in itertools.izip(self.bgplvms, self.inference_method):
|
||||
for b, i in zip(self.bgplvms, self.inference_method):
|
||||
self._log_marginal_likelihood += b._log_marginal_likelihood
|
||||
|
||||
self.logger.info('working on im <{}>'.format(hex(id(i))))
|
||||
|
|
@ -195,7 +197,7 @@ class MRD(BayesianGPLVMMiniBatch):
|
|||
elif init in "PCA_single":
|
||||
X = np.zeros((Ylist[0].shape[0], self.input_dim))
|
||||
fracs = []
|
||||
for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist):
|
||||
for qs, Y in zip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist):
|
||||
x,frcs = initialize_latent('PCA', len(qs), Y)
|
||||
X[:, qs] = x
|
||||
fracs.append(frcs)
|
||||
|
|
@ -327,9 +329,9 @@ class MRD(BayesianGPLVMMiniBatch):
|
|||
|
||||
def __getstate__(self):
|
||||
state = super(MRD, self).__getstate__()
|
||||
if state.has_key('kern'):
|
||||
if 'kern' in state:
|
||||
del state['kern']
|
||||
if state.has_key('likelihood'):
|
||||
if 'likelihood' in state:
|
||||
del state['likelihood']
|
||||
return state
|
||||
|
||||
|
|
@ -338,4 +340,4 @@ class MRD(BayesianGPLVMMiniBatch):
|
|||
super(MRD, self).__setstate__(state)
|
||||
self.kern = self.bgplvms[0].kern
|
||||
self.likelihood = self.bgplvms[0].likelihood
|
||||
self.parameters_changed()
|
||||
self.parameters_changed()
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ class OneVsAllSparseClassification(object):
|
|||
|
||||
self.results = {}
|
||||
for yj in labels:
|
||||
print 'Class %s vs all' %yj
|
||||
print('Class %s vs all' %yj)
|
||||
Ynew = Y.copy()
|
||||
Ynew[Y.flatten()!=yj] = 0
|
||||
Ynew[Y.flatten()==yj] = 1
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
from ..core.parameterization.param import Param
|
||||
from ..core.sparse_gp import SparseGP
|
||||
|
|
@ -43,14 +44,15 @@ class SparseGPMiniBatch(SparseGP):
|
|||
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
|
||||
name='sparse gp', Y_metadata=None, normalizer=False,
|
||||
missing_data=False, stochastic=False, batchsize=1):
|
||||
#pick a sensible inference method
|
||||
|
||||
# pick a sensible inference method
|
||||
if inference_method is None:
|
||||
if isinstance(likelihood, likelihoods.Gaussian):
|
||||
inference_method = var_dtc.VarDTC(limit=1 if not self.missing_data else Y.shape[1])
|
||||
inference_method = var_dtc.VarDTC(limit=1 if not missing_data else Y.shape[1])
|
||||
else:
|
||||
#inference_method = ??
|
||||
raise NotImplementedError, "what to do what to do?"
|
||||
print "defaulting to ", inference_method, "for latent function inference"
|
||||
raise NotImplementedError("what to do what to do?")
|
||||
print("defaulting to ", inference_method, "for latent function inference")
|
||||
|
||||
self.kl_factr = 1.
|
||||
self.Z = Param('inducing inputs', Z)
|
||||
|
|
@ -80,13 +82,13 @@ class SparseGPMiniBatch(SparseGP):
|
|||
overall = self.Y_normalized.shape[1]
|
||||
m_f = lambda i: "Precomputing Y for missing data: {: >7.2%}".format(float(i+1)/overall)
|
||||
message = m_f(-1)
|
||||
print message,
|
||||
for d in xrange(overall):
|
||||
print(message, end=' ')
|
||||
for d in range(overall):
|
||||
self.Ylist.append(self.Y_normalized[self.ninan[:, d], d][:, None])
|
||||
print ' '*(len(message)+1) + '\r',
|
||||
print(' '*(len(message)+1) + '\r', end=' ')
|
||||
message = m_f(d)
|
||||
print message,
|
||||
print ''
|
||||
print(message, end=' ')
|
||||
print('')
|
||||
|
||||
self.posterior = None
|
||||
|
||||
|
|
@ -181,11 +183,11 @@ class SparseGPMiniBatch(SparseGP):
|
|||
full_values[key][value_indices[key]] += current_values[key]
|
||||
"""
|
||||
for key in current_values.keys():
|
||||
if value_indices is not None and value_indices.has_key(key):
|
||||
if value_indices is not None and key in value_indices:
|
||||
index = value_indices[key]
|
||||
else:
|
||||
index = slice(None)
|
||||
if full_values.has_key(key):
|
||||
if key in full_values:
|
||||
full_values[key][index] += current_values[key]
|
||||
else:
|
||||
full_values[key] = current_values[key]
|
||||
|
|
@ -241,15 +243,15 @@ class SparseGPMiniBatch(SparseGP):
|
|||
if not self.stochastics:
|
||||
m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim)
|
||||
message = m_f(-1)
|
||||
print message,
|
||||
print(message, end=' ')
|
||||
|
||||
for d in self.stochastics.d:
|
||||
ninan = self.ninan[:, d]
|
||||
|
||||
if not self.stochastics:
|
||||
print ' '*(len(message)) + '\r',
|
||||
print(' '*(len(message)) + '\r', end=' ')
|
||||
message = m_f(d)
|
||||
print message,
|
||||
print(message, end=' ')
|
||||
|
||||
posterior, log_marginal_likelihood, \
|
||||
grad_dict, current_values, value_indices = self._inner_parameters_changed(
|
||||
|
|
@ -268,7 +270,7 @@ class SparseGPMiniBatch(SparseGP):
|
|||
woodbury_vector[:, d:d+1] = posterior.woodbury_vector
|
||||
self._log_marginal_likelihood += log_marginal_likelihood
|
||||
if not self.stochastics:
|
||||
print ''
|
||||
print('')
|
||||
|
||||
if self.posterior is None:
|
||||
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
|
||||
|
|
|
|||
|
|
@ -39,7 +39,10 @@ class SSGPLVM(SparseGP_MPI):
|
|||
X_variance = np.random.uniform(0,.1,X.shape)
|
||||
|
||||
if Gamma is None:
|
||||
gamma = np.random.randn(X.shape[0], input_dim)
|
||||
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
|
||||
gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim)
|
||||
gamma[gamma>1.-1e-9] = 1.-1e-9
|
||||
gamma[gamma<1e-9] = 1e-9
|
||||
else:
|
||||
gamma = Gamma.copy()
|
||||
|
||||
|
|
@ -71,7 +74,7 @@ class SSGPLVM(SparseGP_MPI):
|
|||
self.link_parameter(self.X, index=0)
|
||||
|
||||
if self.group_spike:
|
||||
[self.X.gamma[:,i].tie('tieGamma'+str(i)) for i in xrange(self.X.gamma.shape[1])] # Tie columns together
|
||||
[self.X.gamma[:,i].tie('tieGamma'+str(i)) for i in range(self.X.gamma.shape[1])] # Tie columns together
|
||||
|
||||
def set_X_gradients(self, X, X_grad):
|
||||
"""Set the gradients of the posterior distribution of X in its specific form."""
|
||||
|
|
|
|||
|
|
@ -19,10 +19,10 @@ class SSMRD(Model):
|
|||
name='model_'+str(i)) for i,y in enumerate(Ylist)]
|
||||
self.add_parameters(*(self.models))
|
||||
|
||||
[[[self.models[m].X.mean[i,j:j+1].tie('mean_'+str(i)+'_'+str(j)) for m in xrange(len(self.models))] for j in xrange(self.models[0].X.mean.shape[1])]
|
||||
for i in xrange(self.models[0].X.mean.shape[0])]
|
||||
[[[self.models[m].X.variance[i,j:j+1].tie('var_'+str(i)+'_'+str(j)) for m in xrange(len(self.models))] for j in xrange(self.models[0].X.variance.shape[1])]
|
||||
for i in xrange(self.models[0].X.variance.shape[0])]
|
||||
[[[self.models[m].X.mean[i,j:j+1].tie('mean_'+str(i)+'_'+str(j)) for m in range(len(self.models))] for j in range(self.models[0].X.mean.shape[1])]
|
||||
for i in range(self.models[0].X.mean.shape[0])]
|
||||
[[[self.models[m].X.variance[i,j:j+1].tie('var_'+str(i)+'_'+str(j)) for m in range(len(self.models))] for j in range(self.models[0].X.variance.shape[1])]
|
||||
for i in range(self.models[0].X.variance.shape[0])]
|
||||
|
||||
self.updates = True
|
||||
|
||||
|
|
@ -31,4 +31,4 @@ class SSMRD(Model):
|
|||
self._log_marginal_likelihood = sum([m._log_marginal_likelihood for m in self.models])
|
||||
|
||||
def log_likelihood(self):
|
||||
return self._log_marginal_likelihood
|
||||
return self._log_marginal_likelihood
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
import numpy as np
|
||||
from ..util.warping_functions import *
|
||||
from ..core import GP
|
||||
|
|
@ -10,14 +9,16 @@ from GPy.util.warping_functions import TanhWarpingFunction_d
|
|||
from GPy import kern
|
||||
|
||||
class WarpedGP(GP):
|
||||
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3, normalize_X=False, normalize_Y=False):
|
||||
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3):
|
||||
|
||||
if kernel is None:
|
||||
kernel = kern.rbf(X.shape[1])
|
||||
kernel = kern.RBF(X.shape[1])
|
||||
|
||||
if warping_function == None:
|
||||
self.warping_function = TanhWarpingFunction_d(warping_terms)
|
||||
self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1,) * 1)
|
||||
else:
|
||||
self.warping_function = warping_function
|
||||
|
||||
self.scale_data = False
|
||||
if self.scale_data:
|
||||
|
|
@ -25,10 +26,10 @@ class WarpedGP(GP):
|
|||
self.has_uncertain_inputs = False
|
||||
self.Y_untransformed = Y.copy()
|
||||
self.predict_in_warped_space = False
|
||||
likelihood = likelihoods.Gaussian(self.transform_data(), normalize=normalize_Y)
|
||||
likelihood = likelihoods.Gaussian()
|
||||
|
||||
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
||||
self._set_params(self._get_params())
|
||||
GP.__init__(self, X, self.transform_data(), likelihood=likelihood, kernel=kernel)
|
||||
self.link_parameter(self.warping_function)
|
||||
|
||||
def _scale_data(self, Y):
|
||||
self._Ymax = Y.max()
|
||||
|
|
@ -38,62 +39,55 @@ class WarpedGP(GP):
|
|||
def _unscale_data(self, Y):
|
||||
return (Y + 0.5) * (self._Ymax - self._Ymin) + self._Ymin
|
||||
|
||||
def _set_params(self, x):
|
||||
self.warping_params = x[:self.warping_function.num_parameters]
|
||||
Y = self.transform_data()
|
||||
self.likelihood.set_data(Y)
|
||||
GP._set_params(self, x[self.warping_function.num_parameters:].copy())
|
||||
def parameters_changed(self):
|
||||
self.Y[:] = self.transform_data()
|
||||
super(WarpedGP, self).parameters_changed()
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack((self.warping_params.flatten().copy(), GP._get_params(self).copy()))
|
||||
Kiy = self.posterior.woodbury_vector.flatten()
|
||||
|
||||
def _get_param_names(self):
|
||||
warping_names = self.warping_function._get_param_names()
|
||||
param_names = GP._get_param_names(self)
|
||||
return warping_names + param_names
|
||||
|
||||
def transform_data(self):
|
||||
Y = self.warping_function.f(self.Y_untransformed.copy(), self.warping_params).copy()
|
||||
return Y
|
||||
|
||||
def log_likelihood(self):
|
||||
ll = GP.log_likelihood(self)
|
||||
jacobian = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
|
||||
return ll + np.log(jacobian).sum()
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
ll_grads = GP._log_likelihood_gradients(self)
|
||||
alpha = np.dot(self.Ki, self.likelihood.Y.flatten())
|
||||
warping_grads = self.warping_function_gradients(alpha)
|
||||
|
||||
warping_grads = np.append(warping_grads[:, :-1].flatten(), warping_grads[0, -1])
|
||||
return np.hstack((warping_grads.flatten(), ll_grads.flatten()))
|
||||
|
||||
def warping_function_gradients(self, Kiy):
|
||||
grad_y = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
|
||||
grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed, self.warping_params,
|
||||
grad_y = self.warping_function.fgrad_y(self.Y_untransformed)
|
||||
grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed,
|
||||
return_covar_chain=True)
|
||||
djac_dpsi = ((1.0 / grad_y[:, :, None, None]) * grad_y_psi).sum(axis=0).sum(axis=0)
|
||||
dquad_dpsi = (Kiy[:, None, None, None] * grad_psi).sum(axis=0).sum(axis=0)
|
||||
|
||||
return -dquad_dpsi + djac_dpsi
|
||||
warping_grads = -dquad_dpsi + djac_dpsi
|
||||
|
||||
self.warping_function.psi.gradient[:] = warping_grads[:, :-1]
|
||||
self.warping_function.d.gradient[:] = warping_grads[0, -1]
|
||||
|
||||
|
||||
def transform_data(self):
|
||||
Y = self.warping_function.f(self.Y_untransformed.copy()).copy()
|
||||
return Y
|
||||
|
||||
def log_likelihood(self):
|
||||
ll = GP.log_likelihood(self)
|
||||
jacobian = self.warping_function.fgrad_y(self.Y_untransformed)
|
||||
return ll + np.log(jacobian).sum()
|
||||
|
||||
def plot_warping(self):
|
||||
self.warping_function.plot(self.warping_params, self.Y_untransformed.min(), self.Y_untransformed.max())
|
||||
self.warping_function.plot(self.Y_untransformed.min(), self.Y_untransformed.max())
|
||||
|
||||
def predict(self, Xnew, which_parts='all', full_cov=False, pred_init=None):
|
||||
def predict(self, Xnew, which_parts='all', pred_init=None):
|
||||
# normalize X values
|
||||
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
|
||||
mu, var = GP._raw_predict(self, Xnew, full_cov=full_cov, which_parts=which_parts)
|
||||
# Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
|
||||
mu, var = GP._raw_predict(self, Xnew)
|
||||
|
||||
# now push through likelihood
|
||||
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
|
||||
mean, var = self.likelihood.predictive_values(mu, var)
|
||||
|
||||
if self.predict_in_warped_space:
|
||||
mean = self.warping_function.f_inv(mean, self.warping_params, y=pred_init)
|
||||
var = self.warping_function.f_inv(var, self.warping_params)
|
||||
mean = self.warping_function.f_inv(mean, y=pred_init)
|
||||
var = self.warping_function.f_inv(var)
|
||||
|
||||
if self.scale_data:
|
||||
mean = self._unscale_data(mean)
|
||||
|
||||
return mean, var, _025pm, _975pm
|
||||
|
||||
return mean, var
|
||||
|
||||
if __name__ == '__main__':
|
||||
X = np.random.randn(100, 1)
|
||||
Y = np.sin(X) + np.random.randn(100, 1)*0.05
|
||||
|
||||
m = WarpedGP(X, Y)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue