mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-08 11:32:39 +02:00
Merge branch 'devel' of https://github.com/SheffieldML/GPy into devel
This commit is contained in:
commit
970df9b88e
52 changed files with 2183 additions and 772 deletions
|
|
@ -5,6 +5,7 @@ import numpy as np
|
|||
import sys
|
||||
from .. import kern
|
||||
from model import Model
|
||||
from mapping import Mapping
|
||||
from parameterization import ObsAr
|
||||
from .. import likelihoods
|
||||
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
|
||||
|
|
@ -34,7 +35,7 @@ class GP(Model):
|
|||
|
||||
|
||||
"""
|
||||
def __init__(self, X, Y, kernel, likelihood, inference_method=None, name='gp', Y_metadata=None, normalizer=False):
|
||||
def __init__(self, X, Y, kernel, likelihood, mean_function=None, inference_method=None, name='gp', Y_metadata=None, normalizer=False):
|
||||
super(GP, self).__init__(name)
|
||||
|
||||
assert X.ndim == 2
|
||||
|
|
@ -75,6 +76,15 @@ class GP(Model):
|
|||
assert isinstance(likelihood, likelihoods.Likelihood)
|
||||
self.likelihood = likelihood
|
||||
|
||||
#handle the mean function
|
||||
self.mean_function = mean_function
|
||||
if mean_function is not None:
|
||||
assert isinstance(self.mean_function, Mapping)
|
||||
assert mean_function.input_dim == self.input_dim
|
||||
assert mean_function.output_dim == self.output_dim
|
||||
self.link_parameter(mean_function)
|
||||
|
||||
|
||||
#find a sensible inference method
|
||||
logger.info("initializing inference method")
|
||||
if inference_method is None:
|
||||
|
|
@ -153,9 +163,11 @@ class GP(Model):
|
|||
This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call
|
||||
this method yourself, there may be unexpected consequences.
|
||||
"""
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.mean_function, self.Y_metadata)
|
||||
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
|
||||
self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X)
|
||||
if self.mean_function is not None:
|
||||
self.mean_function.update_gradients(self.grad_dict['dL_dm'], self.X)
|
||||
|
||||
def log_likelihood(self):
|
||||
"""
|
||||
|
|
@ -192,6 +204,10 @@ class GP(Model):
|
|||
|
||||
#force mu to be a column vector
|
||||
if len(mu.shape)==1: mu = mu[:,None]
|
||||
|
||||
#add the mean function in
|
||||
if not self.mean_function is None:
|
||||
mu += self.mean_function.f(_Xnew)
|
||||
return mu, var
|
||||
|
||||
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
|
||||
|
|
@ -241,12 +257,14 @@ class GP(Model):
|
|||
|
||||
def predictive_gradients(self, Xnew):
|
||||
"""
|
||||
Compute the derivatives of the latent function with respect to X*
|
||||
Compute the derivatives of the predicted latent function with respect to X*
|
||||
|
||||
Given a set of points at which to predict X* (size [N*,Q]), compute the
|
||||
derivatives of the mean and variance. Resulting arrays are sized:
|
||||
dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
|
||||
|
||||
Note that this is not the same as computing the mean and variance of the derivative of the function!
|
||||
|
||||
dv_dX* -- [N*, Q], (since all outputs have the same variance)
|
||||
:param X: The points at which to get the predictive gradients
|
||||
:type X: np.ndarray (Xnew x self.input_dim)
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
# Copyright (c) 2013,2014, GPy authors (see AUTHORS.txt).
|
||||
# Copyright (c) 2015, James Hensman
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import sys
|
||||
|
|
@ -7,7 +8,7 @@ import numpy as np
|
|||
|
||||
class Mapping(Parameterized):
|
||||
"""
|
||||
Base model for shared behavior between models that can act like a mapping.
|
||||
Base model for shared mapping behaviours
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, output_dim, name='mapping'):
|
||||
|
|
@ -18,49 +19,12 @@ class Mapping(Parameterized):
|
|||
def f(self, X):
|
||||
raise NotImplementedError
|
||||
|
||||
def df_dX(self, dL_df, X):
|
||||
"""Evaluate derivatives of mapping outputs with respect to inputs.
|
||||
|
||||
:param dL_df: gradient of the objective with respect to the function.
|
||||
:type dL_df: ndarray (num_data x output_dim)
|
||||
:param X: the input locations where derivatives are to be evaluated.
|
||||
:type X: ndarray (num_data x input_dim)
|
||||
:returns: matrix containing gradients of the function with respect to the inputs.
|
||||
"""
|
||||
def gradients_X(self, dL_dF, X):
|
||||
raise NotImplementedError
|
||||
|
||||
def df_dtheta(self, dL_df, X):
|
||||
"""The gradient of the outputs of the mapping with respect to each of the parameters.
|
||||
|
||||
:param dL_df: gradient of the objective with respect to the function.
|
||||
:type dL_df: ndarray (num_data x output_dim)
|
||||
:param X: input locations where the function is evaluated.
|
||||
:type X: ndarray (num_data x input_dim)
|
||||
:returns: Matrix containing gradients with respect to parameters of each output for each input data.
|
||||
:rtype: ndarray (num_params length)
|
||||
"""
|
||||
|
||||
def update_gradients(self, dL_dF, X):
|
||||
raise NotImplementedError
|
||||
|
||||
def plot(self, *args):
|
||||
"""
|
||||
Plots the mapping associated with the model.
|
||||
- In one dimension, the function is plotted.
|
||||
- In two dimensions, a contour-plot shows the function
|
||||
- In higher dimensions, we've not implemented this yet !TODO!
|
||||
|
||||
Can plot only part of the data and part of the posterior functions
|
||||
using which_data and which_functions
|
||||
|
||||
This is a convenience function: arguments are passed to
|
||||
GPy.plotting.matplot_dep.models_plots.plot_mapping
|
||||
"""
|
||||
|
||||
if "matplotlib" in sys.modules:
|
||||
from ..plotting.matplot_dep import models_plots
|
||||
mapping_plots.plot_mapping(self,*args)
|
||||
else:
|
||||
raise NameError, "matplotlib package has not been imported."
|
||||
|
||||
class Bijective_mapping(Mapping):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -50,31 +50,29 @@ class SpikeAndSlabPrior(VariationalPrior):
|
|||
def KL_divergence(self, variational_posterior):
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma,gamma1 = variational_posterior.gamma_probabilities()
|
||||
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
|
||||
gamma = variational_posterior.gamma.values
|
||||
if len(self.pi.shape)==2:
|
||||
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
|
||||
idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
|
||||
pi = self.pi[idx]
|
||||
else:
|
||||
pi = self.pi
|
||||
|
||||
var_mean = np.square(mu)/self.variance
|
||||
var_S = (S/self.variance - np.log(S))
|
||||
var_gamma = (gamma*(log_gamma-np.log(pi))).sum()+(gamma1*(log_gamma1-np.log(1-pi))).sum()
|
||||
var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()
|
||||
return var_gamma+ (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2.
|
||||
|
||||
def update_gradients_KL(self, variational_posterior):
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma,gamma1 = variational_posterior.gamma_probabilities()
|
||||
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
|
||||
gamma = variational_posterior.gamma.values
|
||||
if len(self.pi.shape)==2:
|
||||
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
|
||||
idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
|
||||
pi = self.pi[idx]
|
||||
else:
|
||||
pi = self.pi
|
||||
|
||||
variational_posterior.binary_prob.gradient -= (np.log((1-pi)/pi)+log_gamma-log_gamma1+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.)*gamma*gamma1
|
||||
variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
|
||||
mu.gradient -= gamma*mu/self.variance
|
||||
S.gradient -= (1./self.variance - 1./S) * gamma /2.
|
||||
if self.learnPi:
|
||||
|
|
@ -161,24 +159,8 @@ class SpikeAndSlabPosterior(VariationalPosterior):
|
|||
binary_prob : the probability of the distribution on the slab part.
|
||||
"""
|
||||
super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
|
||||
self.gamma = Param("binary_prob",binary_prob)
|
||||
self.gamma = Param("binary_prob",binary_prob,Logistic(0.,1.))
|
||||
self.link_parameter(self.gamma)
|
||||
|
||||
@Cache_this(limit=5)
|
||||
def gamma_probabilities(self):
|
||||
prob = np.zeros_like(param_to_array(self.gamma))
|
||||
prob[self.gamma>-710] = 1./(1.+np.exp(-self.gamma[self.gamma>-710]))
|
||||
prob1 = -np.zeros_like(param_to_array(self.gamma))
|
||||
prob1[self.gamma<710] = 1./(1.+np.exp(self.gamma[self.gamma<710]))
|
||||
return prob, prob1
|
||||
|
||||
@Cache_this(limit=5)
|
||||
def gamma_log_prob(self):
|
||||
loggamma = param_to_array(self.gamma).copy()
|
||||
loggamma[loggamma>-40] = -np.log1p(np.exp(-loggamma[loggamma>-40]))
|
||||
loggamma1 = -param_to_array(self.gamma).copy()
|
||||
loggamma1[loggamma1>-40] = -np.log1p(np.exp(-loggamma1[loggamma1>-40]))
|
||||
return loggamma,loggamma1
|
||||
|
||||
def set_gradients(self, grad):
|
||||
self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ class SparseGP(GP):
|
|||
This model allows (approximate) inference using variational DTC or FITC
|
||||
(Gaussian likelihoods) as well as non-conjugate sparse methods based on
|
||||
these.
|
||||
|
||||
|
||||
This is not for missing data, as the implementation for missing data involves
|
||||
some inefficient optimization routine decisions.
|
||||
See missing data SparseGP implementation in py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'.
|
||||
|
|
@ -39,7 +39,7 @@ class SparseGP(GP):
|
|||
|
||||
"""
|
||||
|
||||
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
|
||||
def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, inference_method=None,
|
||||
name='sparse gp', Y_metadata=None, normalizer=False):
|
||||
#pick a sensible inference method
|
||||
if inference_method is None:
|
||||
|
|
@ -53,7 +53,7 @@ class SparseGP(GP):
|
|||
self.Z = Param('inducing inputs', Z)
|
||||
self.num_inducing = Z.shape[0]
|
||||
|
||||
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
|
||||
GP.__init__(self, X, Y, kernel, likelihood, mean_function, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
|
||||
|
||||
logger.info("Adding Z as parameter")
|
||||
self.link_parameter(self.Z, index=0)
|
||||
|
|
@ -61,7 +61,7 @@ class SparseGP(GP):
|
|||
|
||||
def has_uncertain_inputs(self):
|
||||
return isinstance(self.X, VariationalPosterior)
|
||||
|
||||
|
||||
def set_Z(self, Z, trigger_update=True):
|
||||
if trigger_update: self.update_model(False)
|
||||
self.unlink_parameter(self.Z)
|
||||
|
|
@ -110,8 +110,8 @@ class SparseGP(GP):
|
|||
|
||||
def _raw_predict(self, Xnew, full_cov=False, kern=None):
|
||||
"""
|
||||
Make a prediction for the latent function values.
|
||||
|
||||
Make a prediction for the latent function values.
|
||||
|
||||
For certain inputs we give back a full_cov of shape NxN,
|
||||
if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of,
|
||||
we take only the diagonal elements across N.
|
||||
|
|
@ -136,6 +136,9 @@ class SparseGP(GP):
|
|||
else:
|
||||
Kxx = kern.Kdiag(Xnew)
|
||||
var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
|
||||
#add in the mean function
|
||||
if self.mean_function is not None:
|
||||
mu += self.mean_function.f(Xnew)
|
||||
else:
|
||||
psi0_star = self.kern.psi0(self.Z, Xnew)
|
||||
psi1_star = self.kern.psi1(self.Z, Xnew)
|
||||
|
|
@ -165,4 +168,5 @@ class SparseGP(GP):
|
|||
var[i] = var_
|
||||
else:
|
||||
var[i] = np.diag(var_)+p0-t2
|
||||
|
||||
return mu, var
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from ..inference.latent_function_inference import SVGP as svgp_inf
|
|||
|
||||
|
||||
class SVGP(SparseGP):
|
||||
def __init__(self, X, Y, Z, kernel, likelihood, name='SVGP', Y_metadata=None, batchsize=None):
|
||||
def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, name='SVGP', Y_metadata=None, batchsize=None):
|
||||
"""
|
||||
Stochastic Variational GP.
|
||||
|
||||
|
|
@ -38,7 +38,7 @@ class SVGP(SparseGP):
|
|||
#create the SVI inference method
|
||||
inf_method = svgp_inf()
|
||||
|
||||
SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, inference_method=inf_method,
|
||||
SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, mean_function=mean_function, inference_method=inf_method,
|
||||
name=name, Y_metadata=Y_metadata, normalizer=False)
|
||||
|
||||
self.m = Param('q_u_mean', np.zeros((self.num_inducing, Y.shape[1])))
|
||||
|
|
@ -48,7 +48,7 @@ class SVGP(SparseGP):
|
|||
self.link_parameter(self.m)
|
||||
|
||||
def parameters_changed(self):
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.q_u_mean, self.q_u_chol, self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata, KL_scale=1.0, batch_scale=float(self.X_all.shape[0])/float(self.X.shape[0]))
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.q_u_mean, self.q_u_chol, self.kern, self.X, self.Z, self.likelihood, self.Y, self.mean_function, self.Y_metadata, KL_scale=1.0, batch_scale=float(self.X_all.shape[0])/float(self.X.shape[0]))
|
||||
|
||||
#update the kernel gradients
|
||||
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z)
|
||||
|
|
@ -65,6 +65,13 @@ class SVGP(SparseGP):
|
|||
self.m.gradient = self.grad_dict['dL_dm']
|
||||
self.chol.gradient = self.grad_dict['dL_dchol']
|
||||
|
||||
if self.mean_function is not None:
|
||||
self.mean_function.update_gradients(self.grad_dict['dL_dmfX'], self.X)
|
||||
g = self.mean_function.gradient[:].copy()
|
||||
self.mean_function.update_gradients(self.grad_dict['dL_dmfZ'], self.Z)
|
||||
self.mean_function.gradient[:] += g
|
||||
self.Z.gradient[:] += self.mean_function.gradients_X(self.grad_dict['dL_dmfZ'], self.Z)
|
||||
|
||||
def set_data(self, X, Y):
|
||||
"""
|
||||
Set the data without calling parameters_changed to avoid wasted computation
|
||||
|
|
|
|||
|
|
@ -505,3 +505,48 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
|
|||
|
||||
print m
|
||||
return m
|
||||
|
||||
def simple_mean_function(max_iters=100, optimize=True, plot=True):
|
||||
"""
|
||||
The simplest possible mean function. No parameters, just a simple Sinusoid.
|
||||
"""
|
||||
#create simple mean function
|
||||
mf = GPy.core.Mapping(1,1)
|
||||
mf.f = np.sin
|
||||
mf.update_gradients = lambda a,b: None
|
||||
|
||||
X = np.linspace(0,10,50).reshape(-1,1)
|
||||
Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape)
|
||||
|
||||
k =GPy.kern.RBF(1)
|
||||
lik = GPy.likelihoods.Gaussian()
|
||||
m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
|
||||
if optimize:
|
||||
m.optimize(max_iters=max_iters)
|
||||
if plot:
|
||||
m.plot(plot_limits=(-10,15))
|
||||
return m
|
||||
|
||||
def parametric_mean_function(max_iters=100, optimize=True, plot=True):
|
||||
"""
|
||||
A linear mean function with parameters that we'll learn alongside the kernel
|
||||
"""
|
||||
#create simple mean function
|
||||
mf = GPy.core.Mapping(1,1)
|
||||
mf.f = np.sin
|
||||
|
||||
X = np.linspace(0,10,50).reshape(-1,1)
|
||||
Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape) + 3*X
|
||||
|
||||
mf = GPy.mappings.Linear(1,1)
|
||||
|
||||
k =GPy.kern.RBF(1)
|
||||
lik = GPy.likelihoods.Gaussian()
|
||||
m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
|
||||
if optimize:
|
||||
m.optimize(max_iters=max_iters)
|
||||
if plot:
|
||||
m.plot()
|
||||
return m
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -50,19 +50,19 @@ class InferenceMethodList(LatentFunctionInference, list):
|
|||
def on_optimization_end(self):
|
||||
for inf in self:
|
||||
inf.on_optimization_end()
|
||||
|
||||
|
||||
def __getstate__(self):
|
||||
state = []
|
||||
for inf in self:
|
||||
state.append(inf)
|
||||
return state
|
||||
|
||||
|
||||
def __setstate__(self, state):
|
||||
for inf in state:
|
||||
self.append(inf)
|
||||
|
||||
from exact_gaussian_inference import ExactGaussianInference
|
||||
from laplace import Laplace
|
||||
from laplace import Laplace, LaplaceBlock
|
||||
from GPy.inference.latent_function_inference.var_dtc import VarDTC
|
||||
from expectation_propagation import EP
|
||||
from expectation_propagation_dtc import EPDTC
|
||||
|
|
@ -78,9 +78,9 @@ from svgp import SVGP
|
|||
# class EMLikeLatentFunctionInference(LatentFunctionInference):
|
||||
# def update_approximation(self):
|
||||
# """
|
||||
# This function gets called when the
|
||||
# This function gets called when the
|
||||
# """
|
||||
#
|
||||
#
|
||||
# def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
|
||||
# """
|
||||
# Do inference on the latent functions given a covariance function `kern`,
|
||||
|
|
@ -88,7 +88,7 @@ from svgp import SVGP
|
|||
# Additional metadata for the outputs `Y` can be given in `Y_metadata`.
|
||||
# """
|
||||
# raise NotImplementedError, "Abstract base class for full inference"
|
||||
#
|
||||
#
|
||||
# class VariationalLatentFunctionInference(LatentFunctionInference):
|
||||
# def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
|
||||
# """
|
||||
|
|
|
|||
|
|
@ -20,7 +20,8 @@ class DTC(LatentFunctionInference):
|
|||
def __init__(self):
|
||||
self.const_jitter = 1e-6
|
||||
|
||||
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
|
||||
def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
|
||||
assert mean_function is None, "inference with a mean function not implemented"
|
||||
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
|
||||
|
||||
num_inducing, _ = Z.shape
|
||||
|
|
@ -88,7 +89,8 @@ class vDTC(object):
|
|||
def __init__(self):
|
||||
self.const_jitter = 1e-6
|
||||
|
||||
def inference(self, kern, X, X_variance, Z, likelihood, Y, Y_metadata):
|
||||
def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
|
||||
assert mean_function is None, "inference with a mean function not implemented"
|
||||
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
|
||||
|
||||
num_inducing, _ = Z.shape
|
||||
|
|
|
|||
|
|
@ -36,11 +36,18 @@ class ExactGaussianInference(LatentFunctionInference):
|
|||
#print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!"
|
||||
return Y
|
||||
|
||||
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None):
|
||||
"""
|
||||
Returns a Posterior class containing essential quantities of the posterior
|
||||
"""
|
||||
YYT_factor = self.get_YYTfactor(Y)
|
||||
|
||||
if mean_function is None:
|
||||
m = 0
|
||||
else:
|
||||
m = mean_function.f(X)
|
||||
|
||||
|
||||
YYT_factor = self.get_YYTfactor(Y-m)
|
||||
|
||||
K = kern.K(X)
|
||||
|
||||
|
|
@ -56,4 +63,4 @@ class ExactGaussianInference(LatentFunctionInference):
|
|||
|
||||
dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK),Y_metadata)
|
||||
|
||||
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
|
||||
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}
|
||||
|
|
|
|||
|
|
@ -33,15 +33,19 @@ class EP(LatentFunctionInference):
|
|||
# TODO: update approximation in the end as well? Maybe even with a switch?
|
||||
pass
|
||||
|
||||
def inference(self, kern, X, likelihood, Y, Y_metadata=None, Z=None):
|
||||
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, Z=None):
|
||||
assert mean_function is None, "inference with a mean function not implemented"
|
||||
num_data, output_dim = Y.shape
|
||||
assert output_dim ==1, "ep in 1D only (for now!)"
|
||||
|
||||
K = kern.K(X)
|
||||
|
||||
if self._ep_approximation is None:
|
||||
|
||||
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
|
||||
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
|
||||
else:
|
||||
#if we've already run EP, just use the existing approximation stored in self._ep_approximation
|
||||
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation
|
||||
|
||||
Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde))
|
||||
|
|
|
|||
|
|
@ -64,7 +64,8 @@ class EPDTC(LatentFunctionInference):
|
|||
self.old_mutilde, self.old_vtilde = None, None
|
||||
self._ep_approximation = None
|
||||
|
||||
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
|
||||
def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
|
||||
assert mean_function is None, "inference with a mean function not implemented"
|
||||
num_data, output_dim = Y.shape
|
||||
assert output_dim ==1, "ep in 1D only (for now!)"
|
||||
|
||||
|
|
|
|||
|
|
@ -18,7 +18,8 @@ class FITC(LatentFunctionInference):
|
|||
"""
|
||||
const_jitter = 1e-6
|
||||
|
||||
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
|
||||
def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
|
||||
assert mean_function is None, "inference with a mean function not implemented"
|
||||
|
||||
num_inducing, _ = Z.shape
|
||||
num_data, output_dim = Y.shape
|
||||
|
|
|
|||
|
|
@ -39,10 +39,11 @@ class Laplace(LatentFunctionInference):
|
|||
self.first_run = True
|
||||
self._previous_Ki_fhat = None
|
||||
|
||||
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None):
|
||||
"""
|
||||
Returns a Posterior class containing essential quantities of the posterior
|
||||
"""
|
||||
assert mean_function is None, "inference with a mean function not implemented"
|
||||
|
||||
# Compute K
|
||||
K = kern.K(X)
|
||||
|
|
@ -50,21 +51,25 @@ class Laplace(LatentFunctionInference):
|
|||
#Find mode
|
||||
if self.bad_fhat or self.first_run:
|
||||
Ki_f_init = np.zeros_like(Y)
|
||||
first_run = False
|
||||
self.first_run = False
|
||||
else:
|
||||
Ki_f_init = self._previous_Ki_fhat
|
||||
|
||||
Ki_f_init = np.zeros_like(Y)# FIXME: take this out
|
||||
|
||||
f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
|
||||
|
||||
self.f_hat = f_hat
|
||||
self.Ki_fhat = Ki_fhat
|
||||
self.K = K.copy()
|
||||
#self.Ki_fhat = Ki_fhat
|
||||
#self.K = K.copy()
|
||||
|
||||
#Compute hessian and other variables at mode
|
||||
log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
|
||||
|
||||
self._previous_Ki_fhat = Ki_fhat.copy()
|
||||
return Posterior(woodbury_vector=Ki_fhat, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
|
||||
|
||||
def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None):
|
||||
def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None, *args, **kwargs):
|
||||
"""
|
||||
Rasmussen's numerically stable mode finding
|
||||
For nomenclature see Rasmussen & Williams 2006
|
||||
|
|
@ -89,7 +94,12 @@ class Laplace(LatentFunctionInference):
|
|||
|
||||
#define the objective function (to be maximised)
|
||||
def obj(Ki_f, f):
|
||||
return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata))
|
||||
ll = -0.5*np.sum(np.dot(Ki_f.T, f)) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata))
|
||||
if np.isnan(ll):
|
||||
return -np.inf
|
||||
else:
|
||||
return ll
|
||||
|
||||
|
||||
difference = np.inf
|
||||
iteration = 0
|
||||
|
|
@ -104,7 +114,7 @@ class Laplace(LatentFunctionInference):
|
|||
W_f = W*f
|
||||
|
||||
b = W_f + grad # R+W p46 line 6.
|
||||
W12BiW12, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave)
|
||||
W12BiW12, _, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave, *args, **kwargs)
|
||||
W12BiW12Kb = np.dot(W12BiW12, np.dot(K, b))
|
||||
|
||||
#Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
|
||||
|
|
@ -121,7 +131,9 @@ class Laplace(LatentFunctionInference):
|
|||
step = optimize.brent(inner_obj, tol=1e-4, maxiter=12)
|
||||
Ki_f_new = Ki_f + step*dKi_f
|
||||
f_new = np.dot(K, Ki_f_new)
|
||||
|
||||
#print "new {} vs old {}".format(obj(Ki_f_new, f_new), obj(Ki_f, f))
|
||||
if obj(Ki_f_new, f_new) < obj(Ki_f, f):
|
||||
raise ValueError("Shouldn't happen, brent optimization failing")
|
||||
difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f))
|
||||
Ki_f = Ki_f_new
|
||||
f = f_new
|
||||
|
|
@ -152,14 +164,10 @@ class Laplace(LatentFunctionInference):
|
|||
if np.any(np.isnan(W)):
|
||||
raise ValueError('One or more element(s) of W is NaN')
|
||||
|
||||
K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave)
|
||||
|
||||
#compute vital matrices
|
||||
C = np.dot(LiW12, K)
|
||||
Ki_W_i = K - C.T.dot(C)
|
||||
K_Wi_i, logdet_I_KW, I_KW_i, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave)
|
||||
|
||||
#compute the log marginal
|
||||
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + np.sum(likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata)) - np.sum(np.log(np.diag(L)))
|
||||
log_marginal = -0.5*np.sum(np.dot(Ki_f.T, f_hat)) + np.sum(likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata)) - 0.5*logdet_I_KW
|
||||
|
||||
# Compute matrices for derivatives
|
||||
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
|
||||
|
|
@ -196,23 +204,23 @@ class Laplace(LatentFunctionInference):
|
|||
dL_dthetaL = np.zeros(num_params)
|
||||
for thetaL_i in range(num_params):
|
||||
#Explicit
|
||||
dL_dthetaL_exp = ( np.sum(dlik_dthetaL[thetaL_i])
|
||||
dL_dthetaL_exp = ( np.sum(dlik_dthetaL[thetaL_i,:, :])
|
||||
# The + comes from the fact that dlik_hess_dthetaL == -dW_dthetaL
|
||||
+ 0.5*np.sum(np.diag(Ki_W_i).flatten()*dlik_hess_dthetaL[:, thetaL_i].flatten())
|
||||
+ 0.5*np.sum(np.diag(Ki_W_i)*np.squeeze(dlik_hess_dthetaL[thetaL_i, :, :]))
|
||||
)
|
||||
|
||||
#Implicit
|
||||
dfhat_dthetaL = mdot(I_KW_i, K, dlik_grad_dthetaL[:, thetaL_i])
|
||||
#dfhat_dthetaL = mdot(Ki_W_i, dlik_grad_dthetaL[:, thetaL_i])
|
||||
dfhat_dthetaL = mdot(I_KW_i, K, dlik_grad_dthetaL[thetaL_i, :, :])
|
||||
#dfhat_dthetaL = mdot(Ki_W_i, dlik_grad_dthetaL[thetaL_i, :, :])
|
||||
dL_dthetaL_imp = np.dot(dL_dfhat.T, dfhat_dthetaL)
|
||||
dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp
|
||||
dL_dthetaL[thetaL_i] = np.sum(dL_dthetaL_exp + dL_dthetaL_imp)
|
||||
|
||||
else:
|
||||
dL_dthetaL = np.zeros(likelihood.size)
|
||||
|
||||
return log_marginal, K_Wi_i, dL_dK, dL_dthetaL
|
||||
|
||||
def _compute_B_statistics(self, K, W, log_concave):
|
||||
def _compute_B_statistics(self, K, W, log_concave, *args, **kwargs):
|
||||
"""
|
||||
Rasmussen suggests the use of a numerically stable positive definite matrix B
|
||||
Which has a positive diagonal elements and can be easily inverted
|
||||
|
|
@ -225,7 +233,7 @@ class Laplace(LatentFunctionInference):
|
|||
"""
|
||||
if not log_concave:
|
||||
#print "Under 1e-10: {}".format(np.sum(W < 1e-6))
|
||||
W[W<1e-6] = 1e-6
|
||||
W = np.clip(W, 1e-6, 1e+30)
|
||||
# NOTE: when setting a parameter inside parameters_changed it will allways come to closed update circles!!!
|
||||
#W.__setitem__(W < 1e-6, 1e-6, update=False) # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
|
||||
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
|
||||
|
|
@ -247,5 +255,160 @@ class Laplace(LatentFunctionInference):
|
|||
#K_Wi_i_2 , _= dpotri(L2)
|
||||
#symmetrify(K_Wi_i_2)
|
||||
|
||||
return K_Wi_i, L, LiW12
|
||||
#compute vital matrices
|
||||
C = np.dot(LiW12, K)
|
||||
Ki_W_i = K - C.T.dot(C)
|
||||
|
||||
I_KW_i = np.eye(K.shape[0]) - np.dot(K, K_Wi_i)
|
||||
logdet_I_KW = 2*np.sum(np.log(np.diag(L)))
|
||||
|
||||
return K_Wi_i, logdet_I_KW, I_KW_i, Ki_W_i
|
||||
|
||||
class LaplaceBlock(Laplace):
|
||||
def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None, *args, **kwargs):
|
||||
Ki_f = Ki_f_init.copy()
|
||||
f = np.dot(K, Ki_f)
|
||||
|
||||
#define the objective function (to be maximised)
|
||||
def obj(Ki_f, f):
|
||||
ll = -0.5*np.dot(Ki_f.T, f) + np.sum(likelihood.logpdf_sum(f, Y, Y_metadata=Y_metadata))
|
||||
if np.isnan(ll):
|
||||
return -np.inf
|
||||
else:
|
||||
return ll
|
||||
|
||||
difference = np.inf
|
||||
iteration = 0
|
||||
|
||||
I = np.eye(K.shape[0])
|
||||
while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter:
|
||||
W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata)
|
||||
|
||||
W[np.diag_indices_from(W)] = np.clip(np.diag(W), 1e-6, 1e+30)
|
||||
|
||||
W_f = np.dot(W, f)
|
||||
grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata)
|
||||
|
||||
b = W_f + grad # R+W p46 line 6.
|
||||
K_Wi_i, _, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave, *args, **kwargs)
|
||||
|
||||
#Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
|
||||
#a = (I - (K+Wi)i*K)*b
|
||||
full_step_Ki_f = np.dot(I - np.dot(K_Wi_i, K), b)
|
||||
dKi_f = full_step_Ki_f - Ki_f
|
||||
|
||||
#define an objective for the line search (minimize this one)
|
||||
def inner_obj(step_size):
|
||||
Ki_f_trial = Ki_f + step_size*dKi_f
|
||||
f_trial = np.dot(K, Ki_f_trial)
|
||||
return -obj(Ki_f_trial, f_trial)
|
||||
|
||||
#use scipy for the line search, the compute new values of f, Ki_f
|
||||
step = optimize.brent(inner_obj, tol=1e-4, maxiter=12)
|
||||
|
||||
Ki_f_new = Ki_f + step*dKi_f
|
||||
f_new = np.dot(K, Ki_f_new)
|
||||
|
||||
difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f))
|
||||
Ki_f = Ki_f_new
|
||||
f = f_new
|
||||
iteration += 1
|
||||
|
||||
#Warn of bad fits
|
||||
if difference > self._mode_finding_tolerance:
|
||||
if not self.bad_fhat:
|
||||
warnings.warn("Not perfect f_hat fit difference: {}".format(difference))
|
||||
self._previous_Ki_fhat = np.zeros_like(Y)
|
||||
self.bad_fhat = True
|
||||
elif self.bad_fhat:
|
||||
self.bad_fhat = False
|
||||
warnings.warn("f_hat now fine again")
|
||||
if iteration > self._mode_finding_max_iter:
|
||||
warnings.warn("didn't find the best")
|
||||
|
||||
return f, Ki_f
|
||||
|
||||
def mode_computations(self, f_hat, Ki_f, K, Y, likelihood, kern, Y_metadata):
|
||||
#At this point get the hessian matrix (or vector as W is diagonal)
|
||||
W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
|
||||
|
||||
W[np.diag_indices_from(W)] = np.clip(np.diag(W), 1e-6, 1e+30)
|
||||
|
||||
K_Wi_i, log_B_det, I_KW_i, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave)
|
||||
|
||||
#compute the log marginal
|
||||
#FIXME: The derterminant should be output_dim*0.5 I think, gradients may now no longer check
|
||||
log_marginal = -0.5*np.dot(f_hat.T, Ki_f) + np.sum(likelihood.logpdf_sum(f_hat, Y, Y_metadata=Y_metadata)) - 0.5*log_B_det
|
||||
|
||||
#Compute vival matrices for derivatives
|
||||
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
|
||||
|
||||
#dL_dfhat = np.zeros((f_hat.shape[0]))
|
||||
#for i in range(f_hat.shape[0]):
|
||||
#dL_dfhat[i] = -0.5*np.trace(np.dot(Ki_W_i, dW_df[:,:,i]))
|
||||
|
||||
dL_dfhat = -0.5*np.einsum('ij,ijk->k', Ki_W_i, dW_df)
|
||||
|
||||
woodbury_vector = likelihood.dlogpdf_df(f_hat, Y, Y_metadata=Y_metadata)
|
||||
|
||||
####################
|
||||
#compute dL_dK#
|
||||
####################
|
||||
if kern.size > 0 and not kern.is_fixed:
|
||||
#Explicit
|
||||
explicit_part = 0.5*(np.dot(Ki_f, Ki_f.T) - K_Wi_i)
|
||||
|
||||
#Implicit
|
||||
implicit_part = woodbury_vector.dot(dL_dfhat[None,:]).dot(I_KW_i)
|
||||
#implicit_part = Ki_f.dot(dL_dfhat[None,:]).dot(I_KW_i)
|
||||
|
||||
dL_dK = explicit_part + implicit_part
|
||||
else:
|
||||
dL_dK = np.zeros_like(K)
|
||||
|
||||
####################
|
||||
#compute dL_dthetaL#
|
||||
####################
|
||||
if likelihood.size > 0 and not likelihood.is_fixed:
|
||||
raise NotImplementedError
|
||||
else:
|
||||
dL_dthetaL = np.zeros(likelihood.size)
|
||||
|
||||
#self.K_Wi_i = K_Wi_i
|
||||
#self.Ki_W_i = Ki_W_i
|
||||
#self.W = W
|
||||
#self.K = K
|
||||
#self.dL_dfhat = dL_dfhat
|
||||
#self.explicit_part = explicit_part
|
||||
#self.implicit_part = implicit_part
|
||||
return log_marginal, K_Wi_i, dL_dK, dL_dthetaL
|
||||
|
||||
def _compute_B_statistics(self, K, W, log_concave, *args, **kwargs):
|
||||
"""
|
||||
Rasmussen suggests the use of a numerically stable positive definite matrix B
|
||||
Which has a positive diagonal element and can be easyily inverted
|
||||
|
||||
:param K: Prior Covariance matrix evaluated at locations X
|
||||
:type K: NxN matrix
|
||||
:param W: Negative hessian at a point (diagonal matrix)
|
||||
:type W: Vector of diagonal values of hessian (1xN)
|
||||
:returns: (K_Wi_i, L_B, not_provided)
|
||||
"""
|
||||
#w = GPy.util.diag.view(W)
|
||||
#W[:] = np.where(w<1e-6, 1e-6, w)
|
||||
|
||||
#B = I + KW
|
||||
B = np.eye(K.shape[0]) + np.dot(K, W)
|
||||
#Bi, L, Li, logdetB = pdinv(B)
|
||||
Bi = np.linalg.inv(B)
|
||||
|
||||
#K_Wi_i = np.eye(K.shape[0]) - mdot(W, Bi, K)
|
||||
K_Wi_i = np.dot(W, Bi)
|
||||
|
||||
#self.K_Wi_i_brute = np.linalg.inv(K + np.linalg.inv(W))
|
||||
#self.B = B
|
||||
#self.Bi = Bi
|
||||
Ki_W_i = np.dot(Bi, K)
|
||||
|
||||
sign, logdetB = np.linalg.slogdet(B)
|
||||
return K_Wi_i, sign*logdetB, Bi, Ki_W_i
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ class Posterior(object):
|
|||
the function at any new point x_* by integrating over this posterior.
|
||||
|
||||
"""
|
||||
def __init__(self, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None, woodbury_inv=None):
|
||||
def __init__(self, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None, woodbury_inv=None, prior_mean=0):
|
||||
"""
|
||||
woodbury_chol : a lower triangular matrix L that satisfies posterior_covariance = K - K L^{-T} L^{-1} K
|
||||
woodbury_vector : a matrix (or vector, as Nx1 matrix) M which satisfies posterior_mean = K M
|
||||
|
|
@ -67,6 +67,7 @@ class Posterior(object):
|
|||
#option 2:
|
||||
self._mean = mean
|
||||
self._covariance = cov
|
||||
self._prior_mean = prior_mean
|
||||
|
||||
#compute this lazily
|
||||
self._precision = None
|
||||
|
|
@ -175,7 +176,7 @@ class Posterior(object):
|
|||
$$
|
||||
"""
|
||||
if self._woodbury_vector is None:
|
||||
self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean)
|
||||
self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean - self._prior_mean)
|
||||
return self._woodbury_vector
|
||||
|
||||
@property
|
||||
|
|
|
|||
|
|
@ -6,7 +6,8 @@ from posterior import Posterior
|
|||
|
||||
class SVGP(LatentFunctionInference):
|
||||
|
||||
def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, Y_metadata=None, KL_scale=1.0, batch_scale=1.0):
|
||||
def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None, KL_scale=1.0, batch_scale=1.0):
|
||||
|
||||
num_inducing = Z.shape[0]
|
||||
num_data, num_outputs = Y.shape
|
||||
|
||||
|
|
@ -22,6 +23,15 @@ class SVGP(LatentFunctionInference):
|
|||
#S = S + np.eye(S.shape[0])*1e-5*np.max(np.max(S))
|
||||
#Si, Lnew, _,_ = linalg.pdinv(S)
|
||||
|
||||
#compute mean function stuff
|
||||
if mean_function is not None:
|
||||
prior_mean_u = mean_function.f(Z)
|
||||
prior_mean_f = mean_function.f(X)
|
||||
else:
|
||||
prior_mean_u = np.zeros((num_inducing, num_outputs))
|
||||
prior_mean_f = np.zeros((num_data, num_outputs))
|
||||
|
||||
|
||||
#compute kernel related stuff
|
||||
Kmm = kern.K(Z)
|
||||
Knm = kern.K(X, Z)
|
||||
|
|
@ -30,29 +40,43 @@ class SVGP(LatentFunctionInference):
|
|||
|
||||
#compute the marginal means and variances of q(f)
|
||||
A = np.dot(Knm, Kmmi)
|
||||
mu = np.dot(A, q_u_mean)
|
||||
mu = prior_mean_f + np.dot(A, q_u_mean - prior_mean_u)
|
||||
v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * np.einsum('ij,jkl->ikl', A, S),1)
|
||||
|
||||
#compute the KL term
|
||||
Kmmim = np.dot(Kmmi, q_u_mean)
|
||||
KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.einsum('ij,ijk->k', Kmmi, S) + 0.5*np.sum(q_u_mean*Kmmim,0)
|
||||
KL = KLs.sum()
|
||||
dKL_dm = Kmmim
|
||||
#gradient of the KL term (assuming zero mean function)
|
||||
dKL_dm = Kmmim.copy()
|
||||
dKL_dS = 0.5*(Kmmi[:,:,None] - Si)
|
||||
dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(-1)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T)
|
||||
|
||||
if mean_function is not None:
|
||||
#adjust KL term for mean function
|
||||
Kmmi_mfZ = np.dot(Kmmi, prior_mean_u)
|
||||
KL += -np.sum(q_u_mean*Kmmi_mfZ)
|
||||
KL += 0.5*np.sum(Kmmi_mfZ*prior_mean_u)
|
||||
|
||||
#adjust gradient for mean fucntion
|
||||
dKL_dm -= Kmmi_mfZ
|
||||
dKL_dKmm += Kmmim.dot(Kmmi_mfZ.T)
|
||||
dKL_dKmm -= 0.5*Kmmi_mfZ.dot(Kmmi_mfZ.T)
|
||||
|
||||
#compute gradients for mean_function
|
||||
dKL_dmfZ = Kmmi_mfZ - Kmmim
|
||||
|
||||
#quadrature for the likelihood
|
||||
F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v, Y_metadata=Y_metadata)
|
||||
|
||||
#rescale the F term if working on a batch
|
||||
F, dF_dmu, dF_dv = F*batch_scale, dF_dmu*batch_scale, dF_dv*batch_scale
|
||||
if dF_dthetaL is not None:
|
||||
dF_dthetaL = dF_dthetaL.sum(1).sum(1)*batch_scale
|
||||
|
||||
#derivatives of expected likelihood
|
||||
#derivatives of expected likelihood, assuming zero mean function
|
||||
Adv = A.T[:,:,None]*dF_dv[None,:,:] # As if dF_Dv is diagonal
|
||||
Admu = A.T.dot(dF_dmu)
|
||||
#AdvA = np.einsum('ijk,jl->ilk', Adv, A)
|
||||
#AdvA = np.dot(A.T, Adv).swapaxes(0,1)
|
||||
AdvA = np.dstack([np.dot(A.T, Adv[:,:,i].T) for i in range(num_outputs)])
|
||||
tmp = np.einsum('ijk,jlk->il', AdvA, S).dot(Kmmi)
|
||||
dF_dKmm = -Admu.dot(Kmmim.T) + AdvA.sum(-1) - tmp - tmp.T
|
||||
|
|
@ -62,6 +86,14 @@ class SVGP(LatentFunctionInference):
|
|||
dF_dm = Admu
|
||||
dF_dS = AdvA
|
||||
|
||||
#adjust gradient to account for mean function
|
||||
if mean_function is not None:
|
||||
dF_dmfX = dF_dmu.copy()
|
||||
dF_dmfZ = -Admu
|
||||
dF_dKmn -= np.dot(Kmmi_mfZ, dF_dmu.T)
|
||||
dF_dKmm += Admu.dot(Kmmi_mfZ.T)
|
||||
|
||||
|
||||
#sum (gradients of) expected likelihood and KL part
|
||||
log_marginal = F.sum() - KL
|
||||
dL_dm, dL_dS, dL_dKmm, dL_dKmn = dF_dm - dKL_dm, dF_dS- dKL_dS, dF_dKmm- dKL_dKmm, dF_dKmn
|
||||
|
|
@ -69,4 +101,8 @@ class SVGP(LatentFunctionInference):
|
|||
dL_dchol = np.dstack([2.*np.dot(dL_dS[:,:,i], L[:,:,i]) for i in range(num_outputs)])
|
||||
dL_dchol = choleskies.triang_to_flat(dL_dchol)
|
||||
|
||||
return Posterior(mean=q_u_mean, cov=S, K=Kmm), log_marginal, {'dL_dKmm':dL_dKmm, 'dL_dKmn':dL_dKmn, 'dL_dKdiag': dF_dv, 'dL_dm':dL_dm, 'dL_dchol':dL_dchol, 'dL_dthetaL':dF_dthetaL}
|
||||
grad_dict = {'dL_dKmm':dL_dKmm, 'dL_dKmn':dL_dKmn, 'dL_dKdiag': dF_dv.sum(1), 'dL_dm':dL_dm, 'dL_dchol':dL_dchol, 'dL_dthetaL':dF_dthetaL}
|
||||
if mean_function is not None:
|
||||
grad_dict['dL_dmfZ'] = dF_dmfZ - dKL_dmfZ
|
||||
grad_dict['dL_dmfX'] = dF_dmfX
|
||||
return Posterior(mean=q_u_mean, cov=S, K=Kmm, prior_mean=prior_mean_u), log_marginal, grad_dict
|
||||
|
|
|
|||
|
|
@ -169,11 +169,13 @@ class VarDTC_minibatch(LatentFunctionInference):
|
|||
|
||||
Kmm = kern.K(Z).copy()
|
||||
diag.add(Kmm, self.const_jitter)
|
||||
Lm = jitchol(Kmm, maxtries=100)
|
||||
if not np.isfinite(Kmm).all():
|
||||
print Kmm
|
||||
Lm = jitchol(Kmm)
|
||||
|
||||
LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
|
||||
Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
|
||||
LL = jitchol(Lambda, maxtries=100)
|
||||
LL = jitchol(Lambda)
|
||||
logdet_L = 2.*np.sum(np.log(np.diag(LL)))
|
||||
b = dtrtrs(LL,dtrtrs(Lm,psi1Y_full.T)[0])[0]
|
||||
bbt = np.square(b).sum()
|
||||
|
|
|
|||
|
|
@ -16,5 +16,6 @@ from _src.poly import Poly
|
|||
from _src.eq_ode2 import EQ_ODE2
|
||||
|
||||
from _src.trunclinear import TruncLinear,TruncLinear_inf
|
||||
from _src.splitKern import SplitKern,DiffGenomeKern
|
||||
from _src.splitKern import SplitKern,DEtime
|
||||
from _src.splitKern import DEtime as DiffGenomeKern
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,20 @@ from kern import CombinationKernel
|
|||
from ...util.caching import Cache_this
|
||||
import itertools
|
||||
|
||||
|
||||
def numpy_invalid_op_as_exception(func):
|
||||
"""
|
||||
A decorator that allows catching numpy invalid operations
|
||||
as exceptions (the default behaviour is raising warnings).
|
||||
"""
|
||||
def func_wrapper(*args, **kwargs):
|
||||
np.seterr(invalid='raise')
|
||||
result = func(*args, **kwargs)
|
||||
np.seterr(invalid='warn')
|
||||
return result
|
||||
return func_wrapper
|
||||
|
||||
|
||||
class Prod(CombinationKernel):
|
||||
"""
|
||||
Computes the product of 2 kernels
|
||||
|
|
@ -46,18 +60,20 @@ class Prod(CombinationKernel):
|
|||
self.parts[0].update_gradients_full(dL_dK*self.parts[1].K(X,X2), X, X2)
|
||||
self.parts[1].update_gradients_full(dL_dK*self.parts[0].K(X,X2), X, X2)
|
||||
else:
|
||||
k = self.K(X,X2)*dL_dK
|
||||
for p in self.parts:
|
||||
p.update_gradients_full(k/p.K(X,X2),X,X2)
|
||||
for combination in itertools.combinations(self.parts, len(self.parts) - 1):
|
||||
prod = reduce(np.multiply, [p.K(X, X2) for p in combination])
|
||||
to_update = list(set(self.parts) - set(combination))[0]
|
||||
to_update.update_gradients_full(dL_dK * prod, X, X2)
|
||||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
if len(self.parts)==2:
|
||||
self.parts[0].update_gradients_diag(dL_dKdiag*self.parts[1].Kdiag(X), X)
|
||||
self.parts[1].update_gradients_diag(dL_dKdiag*self.parts[0].Kdiag(X), X)
|
||||
else:
|
||||
k = self.Kdiag(X)*dL_dKdiag
|
||||
for p in self.parts:
|
||||
p.update_gradients_diag(k/p.Kdiag(X),X)
|
||||
for combination in itertools.combinations(self.parts, len(self.parts) - 1):
|
||||
prod = reduce(np.multiply, [p.Kdiag(X) for p in combination])
|
||||
to_update = list(set(self.parts) - set(combination))[0]
|
||||
to_update.update_gradients_diag(dL_dKdiag * prod, X)
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
target = np.zeros(X.shape)
|
||||
|
|
@ -65,9 +81,10 @@ class Prod(CombinationKernel):
|
|||
target += self.parts[0].gradients_X(dL_dK*self.parts[1].K(X, X2), X, X2)
|
||||
target += self.parts[1].gradients_X(dL_dK*self.parts[0].K(X, X2), X, X2)
|
||||
else:
|
||||
k = self.K(X,X2)*dL_dK
|
||||
for p in self.parts:
|
||||
target += p.gradients_X(k/p.K(X,X2),X,X2)
|
||||
for combination in itertools.combinations(self.parts, len(self.parts) - 1):
|
||||
prod = reduce(np.multiply, [p.K(X, X2) for p in combination])
|
||||
to_update = list(set(self.parts) - set(combination))[0]
|
||||
target += to_update.gradients_X(dL_dK * prod, X, X2)
|
||||
return target
|
||||
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
|
|
@ -80,3 +97,5 @@ class Prod(CombinationKernel):
|
|||
for p in self.parts:
|
||||
target += p.gradients_X_diag(k/p.Kdiag(X),X)
|
||||
return target
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -37,11 +37,11 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variati
|
|||
|
||||
# Compute for psi0 and psi1
|
||||
mu2S = np.square(mu)+S
|
||||
dL_dvar += np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu)
|
||||
dL_dgamma += np.einsum('n,q,nq->nq',dL_dpsi0,variance,mu2S) + np.einsum('nm,q,mq,nq->nq',dL_dpsi1,variance,Z,mu)
|
||||
dL_dmu += np.einsum('n,nq,q,nq->nq',dL_dpsi0,gamma,2.*variance,mu) + np.einsum('nm,nq,q,mq->nq',dL_dpsi1,gamma,variance,Z)
|
||||
dL_dS += np.einsum('n,nq,q->nq',dL_dpsi0,gamma,variance)
|
||||
dL_dZ += np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, variance,mu)
|
||||
dL_dvar += (dL_dpsi0[:,None]*gamma*mu2S).sum(axis=0) + (dL_dpsi1.T.dot(gamma*mu)*Z).sum(axis=0)
|
||||
dL_dgamma += dL_dpsi0[:,None]*variance*mu2S+ dL_dpsi1.dot(Z)*mu*variance
|
||||
dL_dmu += dL_dpsi0[:,None]*2.*variance*gamma*mu + dL_dpsi1.dot(Z)*gamma*variance
|
||||
dL_dS += dL_dpsi0[:,None]*variance*gamma
|
||||
dL_dZ += dL_dpsi1.T.dot(gamma*mu)*variance
|
||||
|
||||
return dL_dvar, dL_dZ, dL_dmu, dL_dS, dL_dgamma
|
||||
|
||||
|
|
@ -64,29 +64,23 @@ def _psi2computations(dL_dpsi2, variance, Z, mu, S, gamma):
|
|||
gamma2 = np.square(gamma)
|
||||
variance2 = np.square(variance)
|
||||
mu2S = mu2+S # NxQ
|
||||
gvm = np.einsum('nq,nq,q->nq',gamma,mu,variance)
|
||||
common_sum = np.einsum('nq,mq->nm',gvm,Z)
|
||||
# common_sum = np.einsum('nq,q,mq,nq->nm',gamma,variance,Z,mu) # NxM
|
||||
Z_expect = np.einsum('mo,mq,oq->q',dL_dpsi2,Z,Z)
|
||||
gvm = gamma*mu*variance
|
||||
common_sum = gvm.dot(Z.T)
|
||||
Z_expect = (np.dot(dL_dpsi2,Z)*Z).sum(axis=0)
|
||||
Z_expect_var2 = Z_expect*variance2
|
||||
dL_dpsi2T = dL_dpsi2+dL_dpsi2.T
|
||||
tmp = np.einsum('mo,oq->mq',dL_dpsi2T,Z)
|
||||
common_expect = np.einsum('mq,nm->nq',tmp,common_sum)
|
||||
# common_expect = np.einsum('mo,mq,no->nq',dL_dpsi2+dL_dpsi2.T,Z,common_sum)
|
||||
Z2_expect = np.einsum('om,nm->no',dL_dpsi2T,common_sum)
|
||||
Z1_expect = np.einsum('om,mq->oq',dL_dpsi2T,Z)
|
||||
common_expect = common_sum.dot(dL_dpsi2T).dot(Z)
|
||||
Z2_expect = common_sum.dot(dL_dpsi2T)
|
||||
Z1_expect = dL_dpsi2T.dot(Z)
|
||||
|
||||
dL_dvar = np.einsum('nq,q,q->q',2.*(gamma*mu2S-gamma2*mu2),variance,Z_expect)+\
|
||||
np.einsum('nq,nq,nq->q',common_expect,gamma,mu)
|
||||
dL_dvar = variance*Z_expect*2.*(gamma*mu2S-gamma2*mu2).sum(axis=0)+(common_expect*gamma*mu).sum(axis=0)
|
||||
|
||||
dL_dgamma = np.einsum('q,q,nq->nq',Z_expect,variance2,(mu2S-2.*gamma*mu2))+\
|
||||
np.einsum('nq,q,nq->nq',common_expect,variance,mu)
|
||||
dL_dgamma = Z_expect_var2*(mu2S-2.*gamma*mu2)+common_expect*mu*variance
|
||||
|
||||
dL_dmu = Z_expect_var2*mu*2.*(gamma-gamma2) + common_expect*gamma*variance
|
||||
|
||||
dL_dS = gamma*Z_expect_var2
|
||||
|
||||
dL_dmu = np.einsum('q,q,nq,nq->nq',Z_expect,variance2,mu,2.*(gamma-gamma2))+\
|
||||
np.einsum('nq,nq,q->nq',common_expect,gamma,variance)
|
||||
|
||||
dL_dS = np.einsum('q,nq,q->nq',Z_expect,gamma,variance2)
|
||||
|
||||
# dL_dZ = 2.*(np.einsum('om,nq,q,mq,nq->oq',dL_dpsi2,gamma,variance2,Z,(mu2S-gamma*mu2))+np.einsum('om,nq,q,nq,nm->oq',dL_dpsi2,gamma,variance,mu,common_sum))
|
||||
dL_dZ = Z1_expect*np.einsum('nq,q,nq->q',gamma,variance2,(mu2S-gamma*mu2))+np.einsum('nq,q,nq,nm->mq',gamma,variance,mu,Z2_expect)
|
||||
dL_dZ = (gamma*(mu2S-gamma*mu2)).sum(axis=0)*variance2*Z1_expect+ Z2_expect.T.dot(gamma*mu)*variance
|
||||
|
||||
return dL_dvar, dL_dgamma, dL_dmu, dL_dS, dL_dZ
|
||||
|
|
|
|||
|
|
@ -22,12 +22,14 @@ try:
|
|||
# _psi1 NxM
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
|
||||
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
|
||||
l2 = np.square(lengthscale)
|
||||
log_denom1 = np.log(S/l2+1)
|
||||
log_denom2 = np.log(2*S/l2+1)
|
||||
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
|
||||
log_gamma = np.log(gamma)
|
||||
log_gamma1 = np.log(1.-gamma)
|
||||
variance = float(variance)
|
||||
psi0 = np.empty(N)
|
||||
psi0[:] = variance
|
||||
|
|
@ -37,6 +39,7 @@ try:
|
|||
from ....util.misc import param_to_array
|
||||
S = param_to_array(S)
|
||||
mu = param_to_array(mu)
|
||||
gamma = param_to_array(gamma)
|
||||
Z = param_to_array(Z)
|
||||
|
||||
support_code = """
|
||||
|
|
@ -79,7 +82,7 @@ try:
|
|||
}
|
||||
}
|
||||
"""
|
||||
weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz)
|
||||
weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz)
|
||||
|
||||
psi2 = psi2n.sum(axis=0)
|
||||
return psi0,psi1,psi2,psi2n
|
||||
|
|
@ -94,12 +97,13 @@ try:
|
|||
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
|
||||
l2 = np.square(lengthscale)
|
||||
log_denom1 = np.log(S/l2+1)
|
||||
log_denom2 = np.log(2*S/l2+1)
|
||||
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
|
||||
gamma, gamma1 = variational_posterior.gamma_probabilities()
|
||||
log_gamma = np.log(gamma)
|
||||
log_gamma1 = np.log(1.-gamma)
|
||||
variance = float(variance)
|
||||
|
||||
dvar = np.zeros(1)
|
||||
|
|
@ -113,6 +117,7 @@ try:
|
|||
from ....util.misc import param_to_array
|
||||
S = param_to_array(S)
|
||||
mu = param_to_array(mu)
|
||||
gamma = param_to_array(gamma)
|
||||
Z = param_to_array(Z)
|
||||
|
||||
support_code = """
|
||||
|
|
@ -130,7 +135,6 @@ try:
|
|||
double Zm1q = Z(m1,q);
|
||||
double Zm2q = Z(m2,q);
|
||||
double gnq = gamma(n,q);
|
||||
double g1nq = gamma1(n,q);
|
||||
double mu_nq = mu(n,q);
|
||||
|
||||
if(m2==0) {
|
||||
|
|
@ -156,7 +160,7 @@ try:
|
|||
|
||||
dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum);
|
||||
dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.;
|
||||
dgamma(n,q) += lpsi1*(d_exp1*g1nq-d_exp2*gnq)/exp_sum;
|
||||
dgamma(n,q) += lpsi1*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
|
||||
dl(q) += lpsi1*((Zmu2_denom+Snq/lq)/denom*d_exp1+Zm1q*Zm1q/(lq*lq)*d_exp2)/(2.*exp_sum);
|
||||
dZ(m1,q) += lpsi1*(-Zmu/denom*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
|
||||
}
|
||||
|
|
@ -184,7 +188,7 @@ try:
|
|||
|
||||
dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum;
|
||||
dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum;
|
||||
dgamma(n,q) += lpsi2*(d_exp1*g1nq-d_exp2*gnq)/exp_sum;
|
||||
dgamma(n,q) += lpsi2*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
|
||||
dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum;
|
||||
dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
|
||||
}
|
||||
|
|
@ -192,7 +196,7 @@ try:
|
|||
}
|
||||
}
|
||||
"""
|
||||
weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','gamma1','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz)
|
||||
weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz)
|
||||
|
||||
dl *= 2.*lengthscale
|
||||
if not ARD:
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from kern import Kern,CombinationKernel
|
|||
from .independent_outputs import index_to_slices
|
||||
import itertools
|
||||
|
||||
class DiffGenomeKern(Kern):
|
||||
class DEtime(Kern):
|
||||
|
||||
def __init__(self, kernel, idx_p, Xp, index_dim=-1, name='DiffGenomeKern'):
|
||||
self.idx_p = idx_p
|
||||
|
|
|
|||
|
|
@ -60,7 +60,10 @@ class White(Static):
|
|||
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
self.variance.gradient = np.trace(dL_dK)
|
||||
if X2 is None:
|
||||
self.variance.gradient = np.trace(dL_dK)
|
||||
else:
|
||||
self.variance.gradient = 0.
|
||||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
self.variance.gradient = dL_dKdiag.sum()
|
||||
|
|
|
|||
|
|
@ -296,6 +296,8 @@ class Exponential(Stationary):
|
|||
return -0.5*self.K_of_r(r)
|
||||
|
||||
|
||||
|
||||
|
||||
class OU(Stationary):
|
||||
"""
|
||||
OU kernel:
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ class Bernoulli(Likelihood):
|
|||
|
||||
return Z_hat, mu_hat, sigma2_hat
|
||||
|
||||
def variational_expectations(self, Y, m, v, gh_points=None):
|
||||
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
|
||||
if isinstance(self.gp_link, link_functions.Probit):
|
||||
|
||||
if gh_points is None:
|
||||
|
|
|
|||
|
|
@ -34,7 +34,9 @@ class Gaussian(Likelihood):
|
|||
if gp_link is None:
|
||||
gp_link = link_functions.Identity()
|
||||
|
||||
assert isinstance(gp_link, link_functions.Identity), "the likelihood only implemented for the identity link"
|
||||
if not isinstance(gp_link, link_functions.Identity):
|
||||
print "Warning, Exact inference is not implemeted for non-identity link functions,\
|
||||
if you are not already, ensure Laplace inference_method is used"
|
||||
|
||||
super(Gaussian, self).__init__(gp_link, name=name)
|
||||
|
||||
|
|
@ -263,16 +265,19 @@ class Gaussian(Likelihood):
|
|||
return d2logpdf_dlink2_dvar
|
||||
|
||||
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
|
||||
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
|
||||
return dlogpdf_dvar
|
||||
dlogpdf_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
|
||||
dlogpdf_dtheta[0,:,:] = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
|
||||
return dlogpdf_dtheta
|
||||
|
||||
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
|
||||
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
|
||||
return dlogpdf_dlink_dvar
|
||||
dlogpdf_dlink_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
|
||||
dlogpdf_dlink_dtheta[0, :, :]= self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
|
||||
return dlogpdf_dlink_dtheta
|
||||
|
||||
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
|
||||
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
|
||||
return d2logpdf_dlink2_dvar
|
||||
d2logpdf_dlink2_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
|
||||
d2logpdf_dlink2_dtheta[0, :, :] = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
|
||||
return d2logpdf_dlink2_dtheta
|
||||
|
||||
def _mean(self, gp):
|
||||
"""
|
||||
|
|
@ -305,18 +310,17 @@ class Gaussian(Likelihood):
|
|||
Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp])
|
||||
return Ysim.reshape(orig_shape)
|
||||
|
||||
def log_predictive_density(self, y_test, mu_star, var_star):
|
||||
def log_predictive_density(self, y_test, mu_star, var_star, Y_metadata=None):
|
||||
"""
|
||||
assumes independence
|
||||
"""
|
||||
v = var_star + self.variance
|
||||
return -0.5*np.log(2*np.pi) -0.5*np.log(v) - 0.5*np.square(y_test - mu_star)/v
|
||||
|
||||
def variational_expectations(self, Y, m, v, gh_points=None):
|
||||
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
|
||||
lik_var = float(self.variance)
|
||||
F = -0.5*np.log(2*np.pi) -0.5*np.log(lik_var) - 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/lik_var
|
||||
dF_dmu = (Y - m)/lik_var
|
||||
dF_dv = np.ones_like(v)*(-0.5/lik_var)
|
||||
dF_dlik_var = np.sum(-0.5/lik_var + 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/(lik_var**2))
|
||||
dF_dtheta = [dF_dlik_var]
|
||||
return F, dF_dmu, dF_dv, dF_dtheta
|
||||
dF_dtheta = -0.5/lik_var + 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/(lik_var**2)
|
||||
return F, dF_dmu, dF_dv, dF_dtheta.reshape(1, Y.shape[0], Y.shape[1])
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import numpy as np
|
|||
from scipy import stats,special
|
||||
import scipy as sp
|
||||
import link_functions
|
||||
from ..util.misc import chain_1, chain_2, chain_3
|
||||
from ..util.misc import chain_1, chain_2, chain_3, blockify_dhess_dtheta, blockify_third, blockify_hessian, safe_exp
|
||||
from scipy.integrate import quad
|
||||
import warnings
|
||||
from ..core.parameterization import Parameterized
|
||||
|
|
@ -39,6 +39,7 @@ class Likelihood(Parameterized):
|
|||
assert isinstance(gp_link,link_functions.GPTransformation), "gp_link is not a valid GPTransformation."
|
||||
self.gp_link = gp_link
|
||||
self.log_concave = False
|
||||
self.not_block_really = False
|
||||
|
||||
def _gradients(self,partial):
|
||||
return np.zeros(0)
|
||||
|
|
@ -177,7 +178,12 @@ class Likelihood(Parameterized):
|
|||
if np.any(np.isnan(dF_dm)) or np.any(np.isinf(dF_dm)):
|
||||
stop
|
||||
|
||||
dF_dtheta = None # Not yet implemented
|
||||
if self.size:
|
||||
dF_dtheta = self.dlogpdf_dtheta(X, Y[:,None]) # Ntheta x (orig size) x N_{quad_points}
|
||||
dF_dtheta = np.dot(dF_dtheta, gh_w)
|
||||
dF_dtheta = dF_dtheta.reshape(self.size, shape[0], shape[1])
|
||||
else:
|
||||
dF_dtheta = None # Not yet implemented
|
||||
return F.reshape(*shape), dF_dm.reshape(*shape), dF_dv.reshape(*shape), dF_dtheta
|
||||
|
||||
def predictive_mean(self, mu, variance, Y_metadata=None):
|
||||
|
|
@ -189,20 +195,27 @@ class Likelihood(Parameterized):
|
|||
|
||||
"""
|
||||
#conditional_mean: the edpected value of y given some f, under this likelihood
|
||||
fmin = -np.inf
|
||||
fmax = np.inf
|
||||
def int_mean(f,m,v):
|
||||
p = np.exp(-(0.5/v)*np.square(f - m))
|
||||
exponent = -(0.5/v)*np.square(f - m)
|
||||
#If exponent is under -30 then exp(exponent) will be very small, so don't exp it!)
|
||||
#If p is zero then conditional_mean will overflow
|
||||
assert v.all() > 0
|
||||
p = safe_exp(exponent)
|
||||
|
||||
#If p is zero then conditional_variance will overflow
|
||||
if p < 1e-10:
|
||||
return 0.
|
||||
else:
|
||||
return self.conditional_mean(f)*p
|
||||
scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
|
||||
scaled_mean = [quad(int_mean, fmin, fmax,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
|
||||
mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance))
|
||||
|
||||
return mean
|
||||
|
||||
def _conditional_mean(self, f):
|
||||
"""Quadrature calculation of the conditional mean: E(Y_star|f)"""
|
||||
"""Quadrature calculation of the conditional mean: E(Y_star|f_star)"""
|
||||
raise NotImplementedError, "implement this function to make predictions"
|
||||
|
||||
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
||||
|
|
@ -210,7 +223,7 @@ class Likelihood(Parameterized):
|
|||
Approximation to the predictive variance: V(Y_star)
|
||||
|
||||
The following variance decomposition is used:
|
||||
V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
|
||||
V(Y_star) = E( V(Y_star|f_star)**2 ) + V( E(Y_star|f_star) )**2
|
||||
|
||||
:param mu: mean of posterior
|
||||
:param sigma: standard deviation of posterior
|
||||
|
|
@ -220,15 +233,22 @@ class Likelihood(Parameterized):
|
|||
#sigma2 = sigma**2
|
||||
normalizer = np.sqrt(2*np.pi*variance)
|
||||
|
||||
fmin_v = -np.inf
|
||||
fmin_m = np.inf
|
||||
fmin = -np.inf
|
||||
fmax = np.inf
|
||||
|
||||
from ..util.misc import safe_exp
|
||||
# E( V(Y_star|f_star) )
|
||||
def int_var(f,m,v):
|
||||
p = np.exp(-(0.5/v)*np.square(f - m))
|
||||
exponent = -(0.5/v)*np.square(f - m)
|
||||
p = safe_exp(exponent)
|
||||
#If p is zero then conditional_variance will overflow
|
||||
if p < 1e-10:
|
||||
return 0.
|
||||
else:
|
||||
return self.conditional_variance(f)*p
|
||||
scaled_exp_variance = [quad(int_var, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
|
||||
scaled_exp_variance = [quad(int_var, fmin_v, fmax,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
|
||||
exp_var = np.array(scaled_exp_variance)[:,None] / normalizer
|
||||
|
||||
#V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star) )**2
|
||||
|
|
@ -240,14 +260,15 @@ class Likelihood(Parameterized):
|
|||
|
||||
#E( E(Y_star|f_star)**2 )
|
||||
def int_pred_mean_sq(f,m,v,predictive_mean_sq):
|
||||
p = np.exp(-(0.5/v)*np.square(f - m))
|
||||
exponent = -(0.5/v)*np.square(f - m)
|
||||
p = np.exp(exponent)
|
||||
#If p is zero then conditional_mean**2 will overflow
|
||||
if p < 1e-10:
|
||||
return 0.
|
||||
else:
|
||||
return self.conditional_mean(f)**2*p
|
||||
|
||||
scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)]
|
||||
scaled_exp_exp2 = [quad(int_pred_mean_sq, fmin_m, fmax,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)]
|
||||
exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer
|
||||
|
||||
var_exp = exp_exp2 - predictive_mean_sq
|
||||
|
|
@ -295,8 +316,18 @@ class Likelihood(Parameterized):
|
|||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
if isinstance(self.gp_link, link_functions.Identity):
|
||||
return self.pdf_link(f, y, Y_metadata=Y_metadata)
|
||||
else:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
|
||||
def logpdf_sum(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
Convenience function that can overridden for functions where this could
|
||||
be computed more efficiently
|
||||
"""
|
||||
return np.sum(self.logpdf(f, y, Y_metadata=Y_metadata))
|
||||
|
||||
def logpdf(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -313,8 +344,11 @@ class Likelihood(Parameterized):
|
|||
:returns: log likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
if isinstance(self.gp_link, link_functions.Identity):
|
||||
return self.logpdf_link(f, y, Y_metadata=Y_metadata)
|
||||
else:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
|
||||
def dlogpdf_df(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -332,11 +366,15 @@ class Likelihood(Parameterized):
|
|||
:returns: derivative of log likelihood evaluated for this point
|
||||
:rtype: 1xN array
|
||||
"""
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
return chain_1(dlogpdf_dlink, dlink_df)
|
||||
if isinstance(self.gp_link, link_functions.Identity):
|
||||
return self.dlogpdf_dlink(f, y, Y_metadata=Y_metadata)
|
||||
else:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
return chain_1(dlogpdf_dlink, dlink_df)
|
||||
|
||||
@blockify_hessian
|
||||
def d2logpdf_df2(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
Evaluates the link function link(f) then computes the second derivative of log likelihood using it
|
||||
|
|
@ -353,13 +391,18 @@ class Likelihood(Parameterized):
|
|||
:returns: second derivative of log likelihood evaluated for this point (diagonal only)
|
||||
:rtype: 1xN array
|
||||
"""
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
|
||||
if isinstance(self.gp_link, link_functions.Identity):
|
||||
d2logpdf_df2 = self.d2logpdf_dlink2(f, y, Y_metadata=Y_metadata)
|
||||
else:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
d2logpdf_df2 = chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
|
||||
return d2logpdf_df2
|
||||
|
||||
@blockify_third
|
||||
def d3logpdf_df3(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
Evaluates the link function link(f) then computes the third derivative of log likelihood using it
|
||||
|
|
@ -376,64 +419,96 @@ class Likelihood(Parameterized):
|
|||
:returns: third derivative of log likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
d3link_df3 = self.gp_link.d3transf_df3(f)
|
||||
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
|
||||
if isinstance(self.gp_link, link_functions.Identity):
|
||||
d3logpdf_df3 = self.d3logpdf_dlink3(f, y, Y_metadata=Y_metadata)
|
||||
else:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
d3link_df3 = self.gp_link.d3transf_df3(f)
|
||||
d3logpdf_df3 = chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
|
||||
return d3logpdf_df3
|
||||
|
||||
|
||||
def dlogpdf_dtheta(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
TODO: Doc strings
|
||||
"""
|
||||
if self.size > 0:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
if self.not_block_really:
|
||||
raise NotImplementedError("Need to make a decorator for this!")
|
||||
if isinstance(self.gp_link, link_functions.Identity):
|
||||
return self.dlogpdf_link_dtheta(f, y, Y_metadata=Y_metadata)
|
||||
else:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
else:
|
||||
# There are no parameters so return an empty array for derivatives
|
||||
return np.zeros([1, 0])
|
||||
return np.zeros((0, f.shape[0], f.shape[1]))
|
||||
|
||||
def dlogpdf_df_dtheta(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
TODO: Doc strings
|
||||
"""
|
||||
if self.size > 0:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
return chain_1(dlogpdf_dlink_dtheta, dlink_df)
|
||||
if self.not_block_really:
|
||||
raise NotImplementedError("Need to make a decorator for this!")
|
||||
if isinstance(self.gp_link, link_functions.Identity):
|
||||
return self.dlogpdf_dlink_dtheta(f, y, Y_metadata=Y_metadata)
|
||||
else:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
|
||||
dlogpdf_df_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
|
||||
#Chain each parameter of hte likelihood seperately
|
||||
for p in range(self.size):
|
||||
dlogpdf_df_dtheta[p, :, :] = chain_1(dlogpdf_dlink_dtheta[p,:,:], dlink_df)
|
||||
return dlogpdf_df_dtheta
|
||||
#return chain_1(dlogpdf_dlink_dtheta, dlink_df)
|
||||
else:
|
||||
# There are no parameters so return an empty array for derivatives
|
||||
return np.zeros([f.shape[0], 0])
|
||||
return np.zeros((0, f.shape[0], f.shape[1]))
|
||||
|
||||
def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
TODO: Doc strings
|
||||
"""
|
||||
if self.size > 0:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
|
||||
if self.not_block_really:
|
||||
raise NotImplementedError("Need to make a decorator for this!")
|
||||
if isinstance(self.gp_link, link_functions.Identity):
|
||||
return self.d2logpdf_dlink2_dtheta(f, y, Y_metadata=Y_metadata)
|
||||
else:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
|
||||
d2logpdf_df2_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
|
||||
#Chain each parameter of hte likelihood seperately
|
||||
for p in range(self.size):
|
||||
d2logpdf_df2_dtheta[p, :, :] = chain_2(d2logpdf_dlink2_dtheta[p,:,:], dlink_df, dlogpdf_dlink_dtheta[p,:,:], d2link_df2)
|
||||
return d2logpdf_df2_dtheta
|
||||
#return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
|
||||
else:
|
||||
# There are no parameters so return an empty array for derivatives
|
||||
return np.zeros([f.shape[0], 0])
|
||||
return np.zeros((0, f.shape[0], f.shape[1]))
|
||||
|
||||
def _laplace_gradients(self, f, y, Y_metadata=None):
|
||||
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata).sum(axis=0)
|
||||
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, Y_metadata=Y_metadata)
|
||||
d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, Y_metadata=Y_metadata)
|
||||
|
||||
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
|
||||
# ensure we have gradients for every parameter we want to optimize
|
||||
assert len(dlogpdf_dtheta) == self.size #1 x num_param array
|
||||
assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix
|
||||
assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix
|
||||
assert dlogpdf_dtheta.shape[0] == self.size #f, d x num_param array
|
||||
assert dlogpdf_df_dtheta.shape[0] == self.size #f x d x num_param matrix or just f x num_param
|
||||
assert d2logpdf_df2_dtheta.shape[0] == self.size #f x num_param matrix or f x d x num_param matrix, f x f x num_param or f x f x d x num_param
|
||||
|
||||
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
|
||||
|
||||
|
|
@ -454,19 +529,98 @@ class Likelihood(Parameterized):
|
|||
|
||||
def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
|
||||
#compute the quantiles by sampling!!!
|
||||
N_samp = 1000
|
||||
N_samp = 500
|
||||
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
|
||||
#ss_f = s.flatten()
|
||||
#ss_y = self.samples(ss_f, Y_metadata)
|
||||
#ss_y = self.samples(s, Y_metadata, samples=100)
|
||||
ss_y = self.samples(s, Y_metadata)
|
||||
#ss_y = ss_y.reshape(mu.shape[0], N_samp)
|
||||
|
||||
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]
|
||||
|
||||
def samples(self, gp, Y_metadata=None):
|
||||
def samples(self, gp, Y_metadata=None, samples=1):
|
||||
"""
|
||||
Returns a set of samples of observations based on a given value of the latent variable.
|
||||
|
||||
:param gp: latent variable
|
||||
:param samples: number of samples to take for each f location
|
||||
"""
|
||||
raise NotImplementedError
|
||||
raise NotImplementedError("""May be possible to use MCMC with user-tuning, see
|
||||
MCMC_pdf_samples in likelihood.py and write samples function
|
||||
using this, beware this is a simple implementation
|
||||
of Metropolis and will not work well for all likelihoods""")
|
||||
|
||||
def MCMC_pdf_samples(self, fNew, num_samples=1000, starting_loc=None, stepsize=0.1, burn_in=1000, Y_metadata=None):
|
||||
"""
|
||||
Simple implementation of Metropolis sampling algorithm
|
||||
|
||||
Will run a parallel chain for each input dimension (treats each f independently)
|
||||
Thus assumes f*_1 independant of f*_2 etc.
|
||||
|
||||
:param num_samples: Number of samples to take
|
||||
:param fNew: f at which to sample around
|
||||
:param starting_loc: Starting locations of the independant chains (usually will be conditional_mean of likelihood), often link_f
|
||||
:param stepsize: Stepsize for the normal proposal distribution (will need modifying)
|
||||
:param burnin: number of samples to use for burnin (will need modifying)
|
||||
:param Y_metadata: Y_metadata for pdf
|
||||
"""
|
||||
print "Warning, using MCMC for sampling y*, needs to be tuned!"
|
||||
if starting_loc is None:
|
||||
starting_loc = fNew
|
||||
from functools import partial
|
||||
logpdf = partial(self.logpdf, f=fNew, Y_metadata=Y_metadata)
|
||||
pdf = lambda y_star: np.exp(logpdf(y=y_star[:, None]))
|
||||
#Should be the link function of f is a good starting point
|
||||
#(i.e. the point before you corrupt it with the likelihood)
|
||||
par_chains = starting_loc.shape[0]
|
||||
chain_values = np.zeros((par_chains, num_samples))
|
||||
chain_values[:, 0][:,None] = starting_loc
|
||||
#Use same stepsize for all par_chains
|
||||
stepsize = np.ones(par_chains)*stepsize
|
||||
accepted = np.zeros((par_chains, num_samples+burn_in))
|
||||
accept_ratio = np.zeros(num_samples+burn_in)
|
||||
#Whilst burning in, only need to keep the previous lot
|
||||
burnin_cache = np.zeros(par_chains)
|
||||
burnin_cache[:] = starting_loc.flatten()
|
||||
burning_in = True
|
||||
for i in xrange(burn_in+num_samples):
|
||||
next_ind = i-burn_in
|
||||
if burning_in:
|
||||
old_y = burnin_cache
|
||||
else:
|
||||
old_y = chain_values[:,next_ind-1]
|
||||
|
||||
old_lik = pdf(old_y)
|
||||
#Propose new y from Gaussian proposal
|
||||
new_y = np.random.normal(loc=old_y, scale=stepsize)
|
||||
new_lik = pdf(new_y)
|
||||
#Accept using Metropolis (not hastings) acceptance
|
||||
#Always accepts if new_lik > old_lik
|
||||
accept_probability = np.minimum(1, new_lik/old_lik)
|
||||
u = np.random.uniform(0,1,par_chains)
|
||||
#print "Accept prob: ", accept_probability
|
||||
accepts = u < accept_probability
|
||||
if burning_in:
|
||||
burnin_cache[accepts] = new_y[accepts]
|
||||
burnin_cache[~accepts] = old_y[~accepts]
|
||||
if i == burn_in:
|
||||
burning_in = False
|
||||
chain_values[:,0] = burnin_cache
|
||||
else:
|
||||
#If it was accepted then new_y becomes the latest sample
|
||||
chain_values[accepts, next_ind] = new_y[accepts]
|
||||
#Otherwise use old y as the sample
|
||||
chain_values[~accepts, next_ind] = old_y[~accepts]
|
||||
|
||||
accepted[~accepts, i] = 0
|
||||
accepted[accepts, i] = 1
|
||||
accept_ratio[i] = np.sum(accepted[:,i])/float(par_chains)
|
||||
|
||||
#Show progress
|
||||
if i % int((burn_in+num_samples)*0.1) == 0:
|
||||
print "{}% of samples taken ({})".format((i/int((burn_in+num_samples)*0.1)*10), i)
|
||||
print "Last run accept ratio: ", accept_ratio[i]
|
||||
|
||||
print "Average accept ratio: ", np.mean(accept_ratio)
|
||||
return chain_values
|
||||
|
|
|
|||
|
|
@ -35,8 +35,8 @@ class StudentT(Likelihood):
|
|||
|
||||
self.log_concave = False
|
||||
|
||||
def parameters_changed(self):
|
||||
self.variance = (self.v / float(self.v - 2)) * self.sigma2
|
||||
#def parameters_changed(self):
|
||||
#self.variance = (self.v / float(self.v - 2)) * self.sigma2
|
||||
|
||||
def update_gradients(self, grads):
|
||||
"""
|
||||
|
|
@ -180,7 +180,8 @@ class StudentT(Likelihood):
|
|||
:rtype: float
|
||||
"""
|
||||
e = y - inv_link_f
|
||||
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
|
||||
e2 = np.square(e)
|
||||
dlogpdf_dvar = self.v*(e2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e2))
|
||||
return dlogpdf_dvar
|
||||
|
||||
def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None):
|
||||
|
|
@ -226,17 +227,18 @@ class StudentT(Likelihood):
|
|||
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
|
||||
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet
|
||||
return np.hstack((dlogpdf_dvar, dlogpdf_dv))
|
||||
return np.array((dlogpdf_dvar, dlogpdf_dv))
|
||||
|
||||
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
|
||||
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet
|
||||
return np.hstack((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
|
||||
return np.array((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
|
||||
|
||||
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
|
||||
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
|
||||
d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
|
||||
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
|
||||
|
||||
return np.array((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
|
||||
|
||||
def predictive_mean(self, mu, sigma, Y_metadata=None):
|
||||
# The comment here confuses mean and median.
|
||||
|
|
|
|||
|
|
@ -4,4 +4,5 @@
|
|||
from kernel import Kernel
|
||||
from linear import Linear
|
||||
from mlp import MLP
|
||||
#from rbf import RBF
|
||||
from additive import Additive
|
||||
from compound import Compound
|
||||
|
|
|
|||
|
|
@ -2,8 +2,7 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ..core.mapping import Mapping
|
||||
import GPy
|
||||
from ..core import Mapping
|
||||
|
||||
class Additive(Mapping):
|
||||
"""
|
||||
|
|
@ -17,45 +16,23 @@ class Additive(Mapping):
|
|||
:type mapping1: GPy.mappings.Mapping
|
||||
:param mapping2: second mapping to add together.
|
||||
:type mapping2: GPy.mappings.Mapping
|
||||
:param tensor: whether or not to use the tensor product of input spaces
|
||||
:type tensor: bool
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, mapping1, mapping2, tensor=False):
|
||||
if tensor:
|
||||
input_dim = mapping1.input_dim + mapping2.input_dim
|
||||
else:
|
||||
input_dim = mapping1.input_dim
|
||||
assert(mapping1.input_dim==mapping2.input_dim)
|
||||
def __init__(self, mapping1, mapping2):
|
||||
assert(mapping1.input_dim==mapping2.input_dim)
|
||||
assert(mapping1.output_dim==mapping2.output_dim)
|
||||
output_dim = mapping1.output_dim
|
||||
input_dim, output_dim = mapping1.input_dim, mapping1.output_dim
|
||||
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
|
||||
self.mapping1 = mapping1
|
||||
self.mapping2 = mapping2
|
||||
self.num_params = self.mapping1.num_params + self.mapping2.num_params
|
||||
self.name = self.mapping1.name + '+' + self.mapping2.name
|
||||
def _get_param_names(self):
|
||||
return self.mapping1._get_param_names + self.mapping2._get_param_names
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack((self.mapping1._get_params(), self.mapping2._get_params()))
|
||||
|
||||
def _set_params(self, x):
|
||||
self.mapping1._set_params(x[:self.mapping1.num_params])
|
||||
self.mapping2._set_params(x[self.mapping1.num_params:])
|
||||
|
||||
def randomize(self):
|
||||
self.mapping1._randomize()
|
||||
self.mapping2._randomize()
|
||||
|
||||
def f(self, X):
|
||||
return self.mapping1.f(X) + self.mapping2.f(X)
|
||||
|
||||
def df_dtheta(self, dL_df, X):
|
||||
self._df_dA = (dL_df[:, :, None]*self.kern.K(X, self.X)[:, None, :]).sum(0).T
|
||||
self._df_dbias = (dL_df.sum(0))
|
||||
return np.hstack((self._df_dA.flatten(), self._df_dbias))
|
||||
def update_gradients(self, dL_dF, X):
|
||||
self.mapping1.update_gradients(dL_dF, X)
|
||||
self.mapping2.update_gradients(dL_dF, X)
|
||||
|
||||
def df_dX(self, dL_df, X):
|
||||
return self.kern.dK_dX((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X)
|
||||
def gradients_X(self, dL_dF, X):
|
||||
return self.mapping1.gradients_X(dL_dF, X) + self.mapping2.gradients_X(dL_dF, X)
|
||||
|
|
|
|||
39
GPy/mappings/compound.py
Normal file
39
GPy/mappings/compound.py
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
# Copyright (c) 2015, James Hensman and Alan Saul
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from ..core import Mapping
|
||||
|
||||
class Compound(Mapping):
|
||||
"""
|
||||
Mapping based on passing one mapping through another
|
||||
|
||||
.. math::
|
||||
|
||||
f(\mathbf{x}) = f_2(f_1(\mathbf{x}))
|
||||
|
||||
:param mapping1: first mapping
|
||||
:type mapping1: GPy.mappings.Mapping
|
||||
:param mapping2: second mapping
|
||||
:type mapping2: GPy.mappings.Mapping
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, mapping1, mapping2):
|
||||
assert(mapping1.output_dim==mapping2.input_dim)
|
||||
input_dim, output_dim = mapping1.input_dim, mapping2.output_dim
|
||||
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
|
||||
self.mapping1 = mapping1
|
||||
self.mapping2 = mapping2
|
||||
self.link_parameters(self.mapping1, self.mapping2)
|
||||
|
||||
def f(self, X):
|
||||
return self.mapping2.f(self.mapping1.f(X))
|
||||
|
||||
def update_gradients(self, dL_dF, X):
|
||||
hidden = self.mapping1.f(X)
|
||||
self.mapping2.update_gradients(dL_dF, hidden)
|
||||
self.mapping1.update_gradients(self.mapping2.gradients_X(dL_dF, hidden), X)
|
||||
|
||||
def gradients_X(self, dL_dF, X):
|
||||
hidden = self.mapping1.f(X)
|
||||
return self.mapping1.gradients_X(self.mapping2.gradients_X(dL_dF, hidden), X)
|
||||
26
GPy/mappings/identity.py
Normal file
26
GPy/mappings/identity.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
# Copyright (c) 2015, James Hensman
|
||||
|
||||
from ..core.mapping import Mapping
|
||||
from ..core import Param
|
||||
|
||||
class Identity(Mapping):
|
||||
"""
|
||||
A mapping that does nothing!
|
||||
"""
|
||||
def __init__(self, input_dim, output_dim, name='identity'):
|
||||
Mapping.__init__(self, input_dim, output_dim, name)
|
||||
|
||||
def f(self, X):
|
||||
return X
|
||||
|
||||
def update_gradients(self, dL_dF, X):
|
||||
pass
|
||||
|
||||
def gradients_X(self, dL_dF, X):
|
||||
return dL_dF
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,9 +1,10 @@
|
|||
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
||||
# Copyright (c) 2015, James Hensman
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ..core.mapping import Mapping
|
||||
import GPy
|
||||
from ..core import Param
|
||||
|
||||
class Kernel(Mapping):
|
||||
"""
|
||||
|
|
@ -11,50 +12,41 @@ class Kernel(Mapping):
|
|||
|
||||
.. math::
|
||||
|
||||
f(\mathbf{x}*) = \mathbf{A}\mathbf{k}(\mathbf{X}, \mathbf{x}^*) + \mathbf{b}
|
||||
f(\mathbf{x}) = \sum_i \alpha_i k(\mathbf{z}_i, \mathbf{x})
|
||||
|
||||
:param X: input observations containing :math:`\mathbf{X}`
|
||||
:type X: ndarray
|
||||
or for multple outputs
|
||||
|
||||
.. math::
|
||||
|
||||
f_i(\mathbf{x}) = \sum_j \alpha_{i,j} k(\mathbf{z}_i, \mathbf{x})
|
||||
|
||||
|
||||
:param input_dim: dimension of input.
|
||||
:type input_dim: int
|
||||
:param output_dim: dimension of output.
|
||||
:type output_dim: int
|
||||
:param Z: input observations containing :math:`\mathbf{Z}`
|
||||
:type Z: ndarray
|
||||
:param kernel: a GPy kernel, defaults to GPy.kern.RBF
|
||||
:type kernel: GPy.kern.kern
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, X, output_dim=1, kernel=None):
|
||||
Mapping.__init__(self, input_dim=X.shape[1], output_dim=output_dim)
|
||||
if kernel is None:
|
||||
kernel = GPy.kern.RBF(self.input_dim)
|
||||
def __init__(self, input_dim, output_dim, Z, kernel, name='kernmap'):
|
||||
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
|
||||
self.kern = kernel
|
||||
self.X = X
|
||||
self.num_data = X.shape[0]
|
||||
self.num_params = self.output_dim*(self.num_data + 1)
|
||||
self.A = np.array((self.num_data, self.output_dim))
|
||||
self.bias = np.array(self.output_dim)
|
||||
self.randomize()
|
||||
self.name = 'kernel'
|
||||
def _get_param_names(self):
|
||||
return sum([['A_%i_%i' % (n, d) for d in range(self.output_dim)] for n in range(self.num_data)], []) + ['bias_%i' % d for d in range(self.output_dim)]
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack((self.A.flatten(), self.bias))
|
||||
|
||||
def _set_params(self, x):
|
||||
self.A = x[:self.num_data * self.output_dim].reshape(self.num_data, self.output_dim).copy()
|
||||
self.bias = x[self.num_data*self.output_dim:].copy()
|
||||
|
||||
def randomize(self):
|
||||
self.A = np.random.randn(self.num_data, self.output_dim)/np.sqrt(self.num_data+1)
|
||||
self.bias = np.random.randn(self.output_dim)/np.sqrt(self.num_data+1)
|
||||
self.Z = Z
|
||||
self.num_bases, Zdim = Z.shape
|
||||
assert Zdim == self.input_dim
|
||||
self.A = Param('A', np.random.randn(self.num_bases, self.output_dim))
|
||||
self.link_parameter(self.A)
|
||||
|
||||
def f(self, X):
|
||||
return np.dot(self.kern.K(X, self.X),self.A) + self.bias
|
||||
return np.dot(self.kern.K(X, self.Z), self.A)
|
||||
|
||||
def df_dtheta(self, dL_df, X):
|
||||
self._df_dA = (dL_df[:, :, None]*self.kern.K(X, self.X)[:, None, :]).sum(0).T
|
||||
self._df_dbias = (dL_df.sum(0))
|
||||
return np.hstack((self._df_dA.flatten(), self._df_dbias))
|
||||
def update_gradients(self, dL_dF, X):
|
||||
self.kern.update_gradients_full(np.dot(dL_dF, self.A.T), X, self.Z)
|
||||
self.A.gradient = np.dot( self.kern.K(self.Z, X), dL_dF)
|
||||
|
||||
def df_dX(self, dL_df, X):
|
||||
return self.kern.gradients_X((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X)
|
||||
def gradients_X(self, dL_dF, X):
|
||||
return self.kern.gradients_X(np.dot(dL_dF, self.A.T), X, self.Z)
|
||||
|
|
|
|||
|
|
@ -1,43 +1,39 @@
|
|||
# Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt).
|
||||
# Copyright (c) 2015, James Hensman
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ..core.mapping import Bijective_mapping
|
||||
from ..core.mapping import Mapping
|
||||
from ..core.parameterization import Param
|
||||
|
||||
class Linear(Bijective_mapping):
|
||||
class Linear(Mapping):
|
||||
"""
|
||||
Mapping based on a linear model.
|
||||
A Linear mapping.
|
||||
|
||||
.. math::
|
||||
|
||||
f(\mathbf{x}*) = \mathbf{W}\mathbf{x}^* + \mathbf{b}
|
||||
F(\mathbf{x}) = \mathbf{A} \mathbf{x})
|
||||
|
||||
:param X: input observations
|
||||
:type X: ndarray
|
||||
|
||||
:param input_dim: dimension of input.
|
||||
:type input_dim: int
|
||||
:param output_dim: dimension of output.
|
||||
:type output_dim: int
|
||||
:param kernel: a GPy kernel, defaults to GPy.kern.RBF
|
||||
:type kernel: GPy.kern.kern
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim=1, output_dim=1, name='linear'):
|
||||
Bijective_mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
|
||||
self.W = Param('W',np.array((self.input_dim, self.output_dim)))
|
||||
self.bias = Param('bias',np.array(self.output_dim))
|
||||
self.link_parameters(self.W, self.bias)
|
||||
def __init__(self, input_dim, output_dim, name='linmap'):
|
||||
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
|
||||
self.A = Param('A', np.random.randn(self.input_dim, self.output_dim))
|
||||
self.link_parameter(self.A)
|
||||
|
||||
def f(self, X):
|
||||
return np.dot(X,self.W) + self.bias
|
||||
return np.dot(X, self.A)
|
||||
|
||||
def g(self, f):
|
||||
V = np.linalg.solve(np.dot(self.W.T, self.W), W.T)
|
||||
return np.dot(f-self.bias, V)
|
||||
def update_gradients(self, dL_dF, X):
|
||||
self.A.gradient = np.dot( X.T, dL_dF)
|
||||
|
||||
def df_dtheta(self, dL_df, X):
|
||||
df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T
|
||||
df_dbias = (dL_df.sum(0))
|
||||
return np.hstack((df_dW.flatten(), df_dbias))
|
||||
|
||||
def dL_dX(self, partial, X):
|
||||
"""The gradient of L with respect to the inputs to the mapping, where L is a function that is dependent on the output of the mapping, f."""
|
||||
return (partial[:, None, :]*self.W[None, :, :]).sum(2)
|
||||
def gradients_X(self, dL_dF, X):
|
||||
return np.dot(dL_dF, self.A.T)
|
||||
|
|
|
|||
|
|
@ -3,128 +3,53 @@
|
|||
|
||||
import numpy as np
|
||||
from ..core.mapping import Mapping
|
||||
from ..core import Param
|
||||
|
||||
class MLP(Mapping):
|
||||
"""
|
||||
Mapping based on a multi-layer perceptron neural network model.
|
||||
|
||||
.. math::
|
||||
|
||||
f(\\mathbf{x}*) = \\mathbf{W}^0\\boldsymbol{\\phi}(\\mathbf{W}^1\\mathbf{x}+\\mathbf{b}^1)^* + \\mathbf{b}^0
|
||||
|
||||
where
|
||||
|
||||
.. math::
|
||||
|
||||
\\phi(\\cdot) = \\text{tanh}(\\cdot)
|
||||
|
||||
:param X: input observations
|
||||
:type X: ndarray
|
||||
:param output_dim: dimension of output.
|
||||
:type output_dim: int
|
||||
:param hidden_dim: dimension of hidden layer. If it is an int, there is one hidden layer of the given dimension. If it is a list of ints there are as manny hidden layers as the length of the list, each with the given number of hidden nodes in it.
|
||||
:type hidden_dim: int or list of ints.
|
||||
|
||||
Mapping based on a multi-layer perceptron neural network model, with a single hidden layer
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim=1, output_dim=1, hidden_dim=3):
|
||||
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
|
||||
self.name = 'mlp'
|
||||
if isinstance(hidden_dim, int):
|
||||
hidden_dim = [hidden_dim]
|
||||
def __init__(self, input_dim=1, output_dim=1, hidden_dim=3, name='mlpmap'):
|
||||
super(MLP, self).__init__(input_dim=input_dim, output_dim=output_dim, name=name)
|
||||
self.hidden_dim = hidden_dim
|
||||
self.activation = [None]*len(self.hidden_dim)
|
||||
self.W = []
|
||||
self._dL_dW = []
|
||||
self.bias = []
|
||||
self._dL_dbias = []
|
||||
self.W.append(np.zeros((self.input_dim, self.hidden_dim[0])))
|
||||
self._dL_dW.append(np.zeros((self.input_dim, self.hidden_dim[0])))
|
||||
self.bias.append(np.zeros(self.hidden_dim[0]))
|
||||
self._dL_dbias.append(np.zeros(self.hidden_dim[0]))
|
||||
self.num_params = self.hidden_dim[0]*(self.input_dim+1)
|
||||
for h1, h0 in zip(hidden_dim[1:], hidden_dim[0:-1]):
|
||||
self.W.append(np.zeros((h0, h1)))
|
||||
self._dL_dW.append(np.zeros((h0, h1)))
|
||||
self.bias.append(np.zeros(h1))
|
||||
self._dL_dbias.append(np.zeros(h1))
|
||||
self.num_params += h1*(h0+1)
|
||||
self.W.append(np.zeros((self.hidden_dim[-1], self.output_dim)))
|
||||
self._dL_dW.append(np.zeros((self.hidden_dim[-1], self.output_dim)))
|
||||
self.bias.append(np.zeros(self.output_dim))
|
||||
self._dL_dbias.append(np.zeros(self.output_dim))
|
||||
self.num_params += self.output_dim*(self.hidden_dim[-1]+1)
|
||||
self.randomize()
|
||||
self.W1 = Param('W1', np.random.randn(self.input_dim, self.hidden_dim))
|
||||
self.b1 = Param('b1', np.random.randn(self.hidden_dim))
|
||||
self.W2 = Param('W2', np.random.randn(self.hidden_dim, self.output_dim))
|
||||
self.b2 = Param('b2', np.random.randn(self.output_dim))
|
||||
self.link_parameters(self.W1, self.b1, self.W2, self.b2)
|
||||
|
||||
def _get_param_names(self):
|
||||
return sum([['W%i_%i_%i' % (i, n, d) for n in range(self.W[i].shape[0]) for d in range(self.W[i].shape[1])] + ['bias%i_%i' % (i, d) for d in range(self.W[i].shape[1])] for i in range(len(self.W))], [])
|
||||
|
||||
def _get_params(self):
|
||||
param = np.array([])
|
||||
for W, bias in zip(self.W, self.bias):
|
||||
param = np.hstack((param, W.flatten(), bias))
|
||||
return param
|
||||
|
||||
def _set_params(self, x):
|
||||
start = 0
|
||||
for W, bias in zip(self.W, self.bias):
|
||||
end = W.shape[0]*W.shape[1]+start
|
||||
W[:] = x[start:end].reshape(W.shape[0], W.shape[1]).copy()
|
||||
start = end
|
||||
end = W.shape[1]+end
|
||||
bias[:] = x[start:end].copy()
|
||||
start = end
|
||||
|
||||
def randomize(self):
|
||||
for W, bias in zip(self.W, self.bias):
|
||||
W[:] = np.random.randn(W.shape[0], W.shape[1])/np.sqrt(W.shape[0]+1)
|
||||
bias[:] = np.random.randn(W.shape[1])/np.sqrt(W.shape[0]+1)
|
||||
|
||||
def f(self, X):
|
||||
self._f_computations(X)
|
||||
return np.dot(np.tanh(self.activation[-1]), self.W[-1]) + self.bias[-1]
|
||||
layer1 = np.dot(X, self.W1) + self.b1
|
||||
activations = np.tanh(layer1)
|
||||
return np.dot(activations, self.W2) + self.b2
|
||||
|
||||
def _f_computations(self, X):
|
||||
W = self.W[0]
|
||||
bias = self.bias[0]
|
||||
self.activation[0] = np.dot(X,W) + bias
|
||||
for W, bias, index in zip(self.W[1:-1], self.bias[1:-1], range(1, len(self.activation))):
|
||||
self.activation[index] = np.dot(np.tanh(self.activation[index-1]), W)+bias
|
||||
def update_gradients(self, dL_dF, X):
|
||||
layer1 = np.dot(X,self.W1) + self.b1
|
||||
activations = np.tanh(layer1)
|
||||
|
||||
#Evaluate second-layer gradients.
|
||||
self.W2.gradient = np.dot(activations.T, dL_dF)
|
||||
self.b2.gradient = np.sum(dL_dF, 0)
|
||||
|
||||
# Backpropagation to hidden layer.
|
||||
dL_dact = np.dot(dL_dF, self.W2.T)
|
||||
dL_dlayer1 = dL_dact / np.square(np.cosh(layer1))
|
||||
|
||||
# Finally, evaluate the first-layer gradients.
|
||||
self.W1.gradient = np.dot(X.T,dL_dlayer1)
|
||||
self.b1.gradient = np.sum(dL_dlayer1, 0)
|
||||
|
||||
def gradients_X(self, dL_dF, X):
|
||||
layer1 = np.dot(X,self.W1) + self.b1
|
||||
activations = np.tanh(layer1)
|
||||
|
||||
# Backpropagation to hidden layer.
|
||||
dL_dact = np.dot(dL_dF, self.W2.T)
|
||||
dL_dlayer1 = dL_dact / np.square(np.cosh(layer1))
|
||||
|
||||
return np.dot(dL_dlayer1, self.W1.T)
|
||||
|
||||
def df_dtheta(self, dL_df, X):
|
||||
self._df_computations(dL_df, X)
|
||||
g = np.array([])
|
||||
for gW, gbias in zip(self._dL_dW, self._dL_dbias):
|
||||
g = np.hstack((g, gW.flatten(), gbias))
|
||||
return g
|
||||
|
||||
def _df_computations(self, dL_df, X):
|
||||
self._f_computations(X)
|
||||
a0 = self.activation[-1]
|
||||
W = self.W[-1]
|
||||
self._dL_dW[-1] = (dL_df[:, :, None]*np.tanh(a0[:, None, :])).sum(0).T
|
||||
dL_dta=(dL_df[:, None, :]*W[None, :, :]).sum(2)
|
||||
self._dL_dbias[-1] = (dL_df.sum(0))
|
||||
for dL_dW, dL_dbias, W, bias, a0, a1 in zip(self._dL_dW[-2:0:-1],
|
||||
self._dL_dbias[-2:0:-1],
|
||||
self.W[-2:0:-1],
|
||||
self.bias[-2:0:-1],
|
||||
self.activation[-2::-1],
|
||||
self.activation[-1:0:-1]):
|
||||
ta = np.tanh(a1)
|
||||
dL_da = dL_dta*(1-ta*ta)
|
||||
dL_dW[:] = (dL_da[:, :, None]*np.tanh(a0[:, None, :])).sum(0).T
|
||||
dL_dbias[:] = (dL_da.sum(0))
|
||||
dL_dta = (dL_da[:, None, :]*W[None, :, :]).sum(2)
|
||||
ta = np.tanh(self.activation[0])
|
||||
dL_da = dL_dta*(1-ta*ta)
|
||||
W = self.W[0]
|
||||
self._dL_dW[0] = (dL_da[:, :, None]*X[:, None, :]).sum(0).T
|
||||
self._dL_dbias[0] = (dL_da.sum(0))
|
||||
self._dL_dX = (dL_da[:, None, :]*W[None, :, :]).sum(2)
|
||||
|
||||
|
||||
def df_dX(self, dL_df, X):
|
||||
self._df_computations(dL_df, X)
|
||||
return self._dL_dX
|
||||
|
||||
|
|
|
|||
94
GPy/mappings/piecewise_linear.py
Normal file
94
GPy/mappings/piecewise_linear.py
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
from GPy.core.mapping import Mapping
|
||||
from GPy.core import Param
|
||||
import numpy as np
|
||||
|
||||
class PiecewiseLinear(Mapping):
|
||||
"""
|
||||
A piecewise-linear mapping.
|
||||
|
||||
The parameters of this mapping are the positions and values of the function where it is broken (self.breaks, self.values).
|
||||
|
||||
Outside the range of the breaks, the function is assumed to have gradient 1
|
||||
"""
|
||||
def __init__(self, input_dim, output_dim, values, breaks, name='piecewise_linear'):
|
||||
|
||||
assert input_dim==1
|
||||
assert output_dim==1
|
||||
|
||||
Mapping.__init__(self, input_dim, output_dim, name)
|
||||
|
||||
values, breaks = np.array(values).flatten(), np.array(breaks).flatten()
|
||||
assert values.size == breaks.size
|
||||
self.values = Param('values', values)
|
||||
self.breaks = Param('breaks', breaks)
|
||||
self.link_parameter(self.values)
|
||||
self.link_parameter(self.breaks)
|
||||
|
||||
def parameters_changed(self):
|
||||
self.order = np.argsort(self.breaks)*1
|
||||
self.reverse_order = np.zeros_like(self.order)
|
||||
self.reverse_order[self.order] = np.arange(self.order.size)
|
||||
|
||||
self.sorted_breaks = self.breaks[self.order]
|
||||
self.sorted_values = self.values[self.order]
|
||||
|
||||
self.grads = np.diff(self.sorted_values)/np.diff(self.sorted_breaks)
|
||||
|
||||
def f(self, X):
|
||||
x = X.flatten()
|
||||
y = x.copy()
|
||||
|
||||
#first adjus the points below the first value
|
||||
y[x<self.sorted_breaks[0]] = x[x<self.sorted_breaks[0]] + self.sorted_values[0] - self.sorted_breaks[0]
|
||||
|
||||
#now all the points pas the last break
|
||||
y[x>self.sorted_breaks[-1]] = x[x>self.sorted_breaks[-1]] + self.sorted_values[-1] - self.sorted_breaks[-1]
|
||||
|
||||
#loop throught the pairs of points
|
||||
for low, up, g, v in zip(self. sorted_breaks[:-1], self.sorted_breaks[1:], self.grads, self.sorted_values[:-1]):
|
||||
i = np.logical_and(x>low, x<up)
|
||||
y[i] = v + (x[i]-low)*g
|
||||
|
||||
return y.reshape(-1,1)
|
||||
|
||||
def update_gradients(self, dL_dF, X):
|
||||
x = X.flatten()
|
||||
dL_dF = dL_dF.flatten()
|
||||
|
||||
dL_db = np.zeros(self.sorted_breaks.size)
|
||||
dL_dv = np.zeros(self.sorted_values.size)
|
||||
|
||||
#loop across each interval, computing the gradient for each of the 4 parameters that define it
|
||||
for i, (low, up, g, v) in enumerate(zip(self. sorted_breaks[:-1], self.sorted_breaks[1:], self.grads, self.sorted_values[:-1])):
|
||||
index = np.logical_and(x>low, x<up)
|
||||
xx = x[index]
|
||||
grad = dL_dF[index]
|
||||
span = up-low
|
||||
dL_dv[i] += np.sum(grad*( (low - xx)/span + 1))
|
||||
dL_dv[i+1] += np.sum(grad*(xx-low)/span)
|
||||
dL_db[i] += np.sum(grad*g*(xx-up)/span)
|
||||
dL_db[i+1] += np.sum(grad*g*(low-xx)/span)
|
||||
|
||||
#now the end parts
|
||||
dL_db[0] -= np.sum(dL_dF[x<self.sorted_breaks[0]])
|
||||
dL_db[-1] -= np.sum(dL_dF[x>self.sorted_breaks[-1]])
|
||||
dL_dv[0] += np.sum(dL_dF[x<self.sorted_breaks[0]])
|
||||
dL_dv[-1] += np.sum(dL_dF[x>self.sorted_breaks[-1]])
|
||||
|
||||
#now put the gradients back in the correct order!
|
||||
self.breaks.gradient = dL_db[self.reverse_order]
|
||||
self.values.gradient = dL_dv[self.reverse_order]
|
||||
|
||||
def gradients_X(self, dL_dF, X):
|
||||
x = X.flatten()
|
||||
|
||||
#outside the range of the breakpoints, the function is just offset by a contant, so the partial derivative is 1.
|
||||
dL_dX = dL_dF.copy().flatten()
|
||||
|
||||
#insude the breakpoints, the partial derivative is self.grads
|
||||
for low, up, g, v in zip(self. sorted_breaks[:-1], self.sorted_breaks[1:], self.grads, self.sorted_values[:-1]):
|
||||
i = np.logical_and(x>low, x<up)
|
||||
dL_dX[i] = dL_dF[i]*g
|
||||
|
||||
return dL_dX.reshape(-1,1)
|
||||
|
||||
|
|
@ -43,10 +43,11 @@ class SparseGPMiniBatch(SparseGP):
|
|||
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
|
||||
name='sparse gp', Y_metadata=None, normalizer=False,
|
||||
missing_data=False, stochastic=False, batchsize=1):
|
||||
#pick a sensible inference method
|
||||
|
||||
# pick a sensible inference method
|
||||
if inference_method is None:
|
||||
if isinstance(likelihood, likelihoods.Gaussian):
|
||||
inference_method = var_dtc.VarDTC(limit=1 if not self.missing_data else Y.shape[1])
|
||||
inference_method = var_dtc.VarDTC(limit=1 if not missing_data else Y.shape[1])
|
||||
else:
|
||||
#inference_method = ??
|
||||
raise NotImplementedError, "what to do what to do?"
|
||||
|
|
|
|||
|
|
@ -39,7 +39,10 @@ class SSGPLVM(SparseGP_MPI):
|
|||
X_variance = np.random.uniform(0,.1,X.shape)
|
||||
|
||||
if Gamma is None:
|
||||
gamma = np.random.randn(X.shape[0], input_dim)
|
||||
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
|
||||
gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim)
|
||||
gamma[gamma>1.-1e-9] = 1.-1e-9
|
||||
gamma[gamma<1e-9] = 1e-9
|
||||
else:
|
||||
gamma = Gamma.copy()
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
import numpy as np
|
||||
from ..util.warping_functions import *
|
||||
from ..core import GP
|
||||
|
|
@ -10,14 +9,16 @@ from GPy.util.warping_functions import TanhWarpingFunction_d
|
|||
from GPy import kern
|
||||
|
||||
class WarpedGP(GP):
|
||||
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3, normalize_X=False, normalize_Y=False):
|
||||
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3):
|
||||
|
||||
if kernel is None:
|
||||
kernel = kern.rbf(X.shape[1])
|
||||
kernel = kern.RBF(X.shape[1])
|
||||
|
||||
if warping_function == None:
|
||||
self.warping_function = TanhWarpingFunction_d(warping_terms)
|
||||
self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1,) * 1)
|
||||
else:
|
||||
self.warping_function = warping_function
|
||||
|
||||
self.scale_data = False
|
||||
if self.scale_data:
|
||||
|
|
@ -25,10 +26,10 @@ class WarpedGP(GP):
|
|||
self.has_uncertain_inputs = False
|
||||
self.Y_untransformed = Y.copy()
|
||||
self.predict_in_warped_space = False
|
||||
likelihood = likelihoods.Gaussian(self.transform_data(), normalize=normalize_Y)
|
||||
likelihood = likelihoods.Gaussian()
|
||||
|
||||
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
||||
self._set_params(self._get_params())
|
||||
GP.__init__(self, X, self.transform_data(), likelihood=likelihood, kernel=kernel)
|
||||
self.link_parameter(self.warping_function)
|
||||
|
||||
def _scale_data(self, Y):
|
||||
self._Ymax = Y.max()
|
||||
|
|
@ -38,62 +39,55 @@ class WarpedGP(GP):
|
|||
def _unscale_data(self, Y):
|
||||
return (Y + 0.5) * (self._Ymax - self._Ymin) + self._Ymin
|
||||
|
||||
def _set_params(self, x):
|
||||
self.warping_params = x[:self.warping_function.num_parameters]
|
||||
Y = self.transform_data()
|
||||
self.likelihood.set_data(Y)
|
||||
GP._set_params(self, x[self.warping_function.num_parameters:].copy())
|
||||
def parameters_changed(self):
|
||||
self.Y[:] = self.transform_data()
|
||||
super(WarpedGP, self).parameters_changed()
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack((self.warping_params.flatten().copy(), GP._get_params(self).copy()))
|
||||
Kiy = self.posterior.woodbury_vector.flatten()
|
||||
|
||||
def _get_param_names(self):
|
||||
warping_names = self.warping_function._get_param_names()
|
||||
param_names = GP._get_param_names(self)
|
||||
return warping_names + param_names
|
||||
|
||||
def transform_data(self):
|
||||
Y = self.warping_function.f(self.Y_untransformed.copy(), self.warping_params).copy()
|
||||
return Y
|
||||
|
||||
def log_likelihood(self):
|
||||
ll = GP.log_likelihood(self)
|
||||
jacobian = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
|
||||
return ll + np.log(jacobian).sum()
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
ll_grads = GP._log_likelihood_gradients(self)
|
||||
alpha = np.dot(self.Ki, self.likelihood.Y.flatten())
|
||||
warping_grads = self.warping_function_gradients(alpha)
|
||||
|
||||
warping_grads = np.append(warping_grads[:, :-1].flatten(), warping_grads[0, -1])
|
||||
return np.hstack((warping_grads.flatten(), ll_grads.flatten()))
|
||||
|
||||
def warping_function_gradients(self, Kiy):
|
||||
grad_y = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
|
||||
grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed, self.warping_params,
|
||||
grad_y = self.warping_function.fgrad_y(self.Y_untransformed)
|
||||
grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed,
|
||||
return_covar_chain=True)
|
||||
djac_dpsi = ((1.0 / grad_y[:, :, None, None]) * grad_y_psi).sum(axis=0).sum(axis=0)
|
||||
dquad_dpsi = (Kiy[:, None, None, None] * grad_psi).sum(axis=0).sum(axis=0)
|
||||
|
||||
return -dquad_dpsi + djac_dpsi
|
||||
warping_grads = -dquad_dpsi + djac_dpsi
|
||||
|
||||
self.warping_function.psi.gradient[:] = warping_grads[:, :-1]
|
||||
self.warping_function.d.gradient[:] = warping_grads[0, -1]
|
||||
|
||||
|
||||
def transform_data(self):
|
||||
Y = self.warping_function.f(self.Y_untransformed.copy()).copy()
|
||||
return Y
|
||||
|
||||
def log_likelihood(self):
|
||||
ll = GP.log_likelihood(self)
|
||||
jacobian = self.warping_function.fgrad_y(self.Y_untransformed)
|
||||
return ll + np.log(jacobian).sum()
|
||||
|
||||
def plot_warping(self):
|
||||
self.warping_function.plot(self.warping_params, self.Y_untransformed.min(), self.Y_untransformed.max())
|
||||
self.warping_function.plot(self.Y_untransformed.min(), self.Y_untransformed.max())
|
||||
|
||||
def predict(self, Xnew, which_parts='all', full_cov=False, pred_init=None):
|
||||
def predict(self, Xnew, which_parts='all', pred_init=None):
|
||||
# normalize X values
|
||||
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
|
||||
mu, var = GP._raw_predict(self, Xnew, full_cov=full_cov, which_parts=which_parts)
|
||||
# Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
|
||||
mu, var = GP._raw_predict(self, Xnew)
|
||||
|
||||
# now push through likelihood
|
||||
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
|
||||
mean, var = self.likelihood.predictive_values(mu, var)
|
||||
|
||||
if self.predict_in_warped_space:
|
||||
mean = self.warping_function.f_inv(mean, self.warping_params, y=pred_init)
|
||||
var = self.warping_function.f_inv(var, self.warping_params)
|
||||
mean = self.warping_function.f_inv(mean, y=pred_init)
|
||||
var = self.warping_function.f_inv(var)
|
||||
|
||||
if self.scale_data:
|
||||
mean = self._unscale_data(mean)
|
||||
|
||||
return mean, var, _025pm, _975pm
|
||||
|
||||
return mean, var
|
||||
|
||||
if __name__ == '__main__':
|
||||
X = np.random.randn(100, 1)
|
||||
Y = np.sin(X) + np.random.randn(100, 1)*0.05
|
||||
|
||||
m = WarpedGP(X, Y)
|
||||
|
|
|
|||
|
|
@ -6,7 +6,11 @@ try:
|
|||
from matplotlib.patches import Polygon
|
||||
from matplotlib.collections import PatchCollection
|
||||
#from matplotlib import cm
|
||||
pb.ion()
|
||||
try:
|
||||
__IPYTHON__
|
||||
pb.ion()
|
||||
except NameError:
|
||||
pass
|
||||
except:
|
||||
pass
|
||||
import re
|
||||
|
|
|
|||
|
|
@ -11,39 +11,38 @@ import GPy
|
|||
|
||||
|
||||
class InferenceXTestCase(unittest.TestCase):
|
||||
|
||||
|
||||
def genData(self):
|
||||
D1,D2,N = 12,12,50
|
||||
np.random.seed(1234)
|
||||
|
||||
|
||||
x = np.linspace(0, 4 * np.pi, N)[:, None]
|
||||
s1 = np.vectorize(lambda x: np.sin(x))
|
||||
s2 = np.vectorize(lambda x: np.cos(x)**2)
|
||||
s3 = np.vectorize(lambda x:-np.exp(-np.cos(2 * x)))
|
||||
sS = np.vectorize(lambda x: np.cos(x))
|
||||
|
||||
|
||||
s1 = s1(x)
|
||||
s2 = s2(x)
|
||||
s3 = s3(x)
|
||||
sS = sS(x)
|
||||
|
||||
|
||||
s1 -= s1.mean(); s1 /= s1.std(0)
|
||||
s2 -= s2.mean(); s2 /= s2.std(0)
|
||||
s3 -= s3.mean(); s3 /= s3.std(0)
|
||||
sS -= sS.mean(); sS /= sS.std(0)
|
||||
|
||||
|
||||
S1 = np.hstack([s1, sS])
|
||||
S2 = np.hstack([s3, sS])
|
||||
|
||||
|
||||
P1 = np.random.randn(S1.shape[1], D1)
|
||||
P2 = np.random.randn(S2.shape[1], D2)
|
||||
|
||||
|
||||
Y1 = S1.dot(P1)
|
||||
Y2 = S2.dot(P2)
|
||||
|
||||
|
||||
Y1 += .01 * np.random.randn(*Y1.shape)
|
||||
Y2 += .01 * np.random.randn(*Y2.shape)
|
||||
|
||||
|
||||
Y1 -= Y1.mean(0)
|
||||
Y2 -= Y2.mean(0)
|
||||
Y1 /= Y1.std(0)
|
||||
|
|
@ -52,33 +51,34 @@ class InferenceXTestCase(unittest.TestCase):
|
|||
slist = [s1, s2, s3, sS]
|
||||
slist_names = ["s1", "s2", "s3", "sS"]
|
||||
Ylist = [Y1, Y2]
|
||||
|
||||
|
||||
return Ylist
|
||||
|
||||
|
||||
def test_inferenceX_BGPLVM(self):
|
||||
Ys = self.genData()
|
||||
m = GPy.models.BayesianGPLVM(Ys[0],5,kernel=GPy.kern.Linear(5,ARD=True))
|
||||
|
||||
|
||||
x,mi = m.infer_newX(m.Y, optimize=False)
|
||||
self.assertTrue(mi.checkgrad())
|
||||
|
||||
m.optimize(max_iters=10000)
|
||||
x,mi = m.infer_newX(m.Y)
|
||||
|
||||
self.assertTrue(np.allclose(m.X.mean, mi.X.mean))
|
||||
self.assertTrue(np.allclose(m.X.variance, mi.X.variance))
|
||||
m.optimize(max_iters=10000)
|
||||
x, mi = m.infer_newX(m.Y)
|
||||
|
||||
print m.X.mean - mi.X.mean
|
||||
self.assertTrue(np.allclose(m.X.mean, mi.X.mean, rtol=1e-4, atol=1e-4))
|
||||
self.assertTrue(np.allclose(m.X.variance, mi.X.variance, rtol=1e-4, atol=1e-4))
|
||||
|
||||
def test_inferenceX_GPLVM(self):
|
||||
Ys = self.genData()
|
||||
m = GPy.models.GPLVM(Ys[0],3,kernel=GPy.kern.RBF(3,ARD=True))
|
||||
|
||||
|
||||
x,mi = m.infer_newX(m.Y, optimize=False)
|
||||
self.assertTrue(mi.checkgrad())
|
||||
|
||||
|
||||
# m.optimize(max_iters=10000)
|
||||
# x,mi = m.infer_newX(m.Y)
|
||||
# self.assertTrue(np.allclose(m.X, x))
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -256,13 +256,23 @@ class KernelGradientTestsContinuous(unittest.TestCase):
|
|||
k.randomize()
|
||||
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||
|
||||
def test_Prod1(self):
|
||||
k = GPy.kern.RBF(self.D) * GPy.kern.Linear(self.D)
|
||||
k.randomize()
|
||||
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||
|
||||
def test_Prod2(self):
|
||||
k = (GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D))
|
||||
k = GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D)
|
||||
k.randomize()
|
||||
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||
|
||||
def test_Prod3(self):
|
||||
k = (GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D))
|
||||
k = GPy.kern.RBF(self.D) * GPy.kern.Linear(self.D) * GPy.kern.Bias(self.D)
|
||||
k.randomize()
|
||||
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||
|
||||
def test_Prod4(self):
|
||||
k = GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D) * GPy.kern.Matern32(2, active_dims=[0,1])
|
||||
k.randomize()
|
||||
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||
|
||||
|
|
@ -401,11 +411,27 @@ class Coregionalize_weave_test(unittest.TestCase):
|
|||
GPy.util.config.config.set('weave', 'working', 'False')
|
||||
|
||||
|
||||
class KernelTestsProductWithZeroValues(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.X = np.array([[0,1],[1,0]])
|
||||
self.k = GPy.kern.Linear(2) * GPy.kern.Bias(2)
|
||||
|
||||
def test_zero_valued_kernel_full(self):
|
||||
self.k.update_gradients_full(1, self.X)
|
||||
self.assertFalse(np.isnan(self.k['linear.variances'].gradient),
|
||||
"Gradient resulted in NaN")
|
||||
|
||||
def test_zero_valued_kernel_gradients_X(self):
|
||||
target = self.k.gradients_X(1, self.X)
|
||||
self.assertFalse(np.any(np.isnan(target)),
|
||||
"Gradient resulted in NaN")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print "Running unit tests, please be (very) patient..."
|
||||
unittest.main()
|
||||
|
||||
# np.random.seed(0)
|
||||
# N0 = 3
|
||||
# N1 = 9
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ from GPy.likelihoods import link_functions
|
|||
from GPy.core.parameterization import Param
|
||||
from functools import partial
|
||||
#np.random.seed(300)
|
||||
#np.random.seed(7)
|
||||
#np.random.seed(4)
|
||||
|
||||
#np.seterr(divide='raise')
|
||||
def dparam_partial(inst_func, *args):
|
||||
|
|
@ -52,8 +52,17 @@ def dparam_checkgrad(func, dfunc, params, params_names, args, constraints=None,
|
|||
zipped_params = zip(params, params_names)
|
||||
for param_ind, (param_val, param_name) in enumerate(zipped_params):
|
||||
#Check one parameter at a time, make sure it is 2d (as some gradients only return arrays) then strip out the parameter
|
||||
fnum = np.atleast_2d(partial_f(param_val, param_name))[:, param_ind].shape[0]
|
||||
dfnum = np.atleast_2d(partial_df(param_val, param_name))[:, param_ind].shape[0]
|
||||
f_ = partial_f(param_val, param_name)
|
||||
df_ = partial_df(param_val, param_name)
|
||||
#Reshape it such that we have a 3d matrix incase, that is we want it (?, N, D) regardless of whether ? is num_params or not
|
||||
f_ = f_.reshape(-1, f_.shape[0], f_.shape[1])
|
||||
df_ = df_.reshape(-1, f_.shape[0], f_.shape[1])
|
||||
|
||||
#Get the number of f and number of dimensions
|
||||
fnum = f_.shape[-2]
|
||||
fdim = f_.shape[-1]
|
||||
dfnum = df_.shape[-2]
|
||||
|
||||
for fixed_val in range(dfnum):
|
||||
#dlik and dlik_dvar gives back 1 value for each
|
||||
f_ind = min(fnum, fixed_val+1) - 1
|
||||
|
|
@ -61,9 +70,13 @@ def dparam_checkgrad(func, dfunc, params, params_names, args, constraints=None,
|
|||
#Make grad checker with this param moving, note that set_params is NOT being called
|
||||
#The parameter is being set directly with __setattr__
|
||||
#Check only the parameter and function value we wish to check at a time
|
||||
grad = GradientChecker(lambda p_val: np.atleast_2d(partial_f(p_val, param_name))[f_ind, param_ind],
|
||||
lambda p_val: np.atleast_2d(partial_df(p_val, param_name))[fixed_val, param_ind],
|
||||
param_val, [param_name])
|
||||
#func = lambda p_val, fnum, fdim, param_ind, f_ind, param_ind: partial_f(p_val, param_name).reshape(-1, fnum, fdim)[param_ind, f_ind, :]
|
||||
#dfunc_dparam = lambda d_val, fnum, fdim, param_ind, fixed_val: partial_df(d_val, param_name).reshape(-1, fnum, fdim)[param_ind, fixed_val, :]
|
||||
|
||||
#First we reshape the output such that it is (num_params, N, D) then we pull out the relavent parameter-findex and checkgrad just this index at a time
|
||||
func = lambda p_val: partial_f(p_val, param_name).reshape(-1, fnum, fdim)[param_ind, f_ind, :]
|
||||
dfunc_dparam = lambda d_val: partial_df(d_val, param_name).reshape(-1, fnum, fdim)[param_ind, fixed_val, :]
|
||||
grad = GradientChecker(func, dfunc_dparam, param_val, [param_name])
|
||||
|
||||
if constraints is not None:
|
||||
for constrain_param, constraint in constraints:
|
||||
|
|
@ -104,37 +117,9 @@ class TestNoiseModels(object):
|
|||
|
||||
self.var = 0.2
|
||||
|
||||
self.var = np.random.rand(1)
|
||||
|
||||
#Make a bigger step as lower bound can be quite curved
|
||||
self.step = 1e-4
|
||||
|
||||
def tearDown(self):
|
||||
self.Y = None
|
||||
self.f = None
|
||||
self.X = None
|
||||
|
||||
def test_scale2_models(self):
|
||||
self.setUp()
|
||||
|
||||
####################################################
|
||||
# Constraint wrappers so we can just list them off #
|
||||
####################################################
|
||||
def constrain_fixed(regex, model):
|
||||
model[regex].constrain_fixed()
|
||||
|
||||
def constrain_negative(regex, model):
|
||||
model[regex].constrain_negative()
|
||||
|
||||
def constrain_positive(regex, model):
|
||||
model[regex].constrain_positive()
|
||||
|
||||
def constrain_bounded(regex, model, lower, upper):
|
||||
"""
|
||||
Used like: partial(constrain_bounded, lower=0, upper=1)
|
||||
"""
|
||||
model[regex].constrain_bounded(lower, upper)
|
||||
|
||||
"""
|
||||
Dictionary where we nest models we would like to check
|
||||
Name: {
|
||||
|
|
@ -149,136 +134,170 @@ class TestNoiseModels(object):
|
|||
"link_f_constraints": [constraint_wrappers, listed_here]
|
||||
}
|
||||
"""
|
||||
noise_models = {"Student_t_default": {
|
||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*t_scale2"],
|
||||
"vals": [self.var],
|
||||
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||
#"constraints": [("t_scale2", constrain_positive), ("deg_free", partial(constrain_fixed, value=5))]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_1_var": {
|
||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*t_scale2"],
|
||||
"vals": [1.0],
|
||||
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_small_deg_free": {
|
||||
"model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*t_scale2"],
|
||||
"vals": [self.var],
|
||||
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_small_var": {
|
||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*t_scale2"],
|
||||
"vals": [0.001],
|
||||
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_large_var": {
|
||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*t_scale2"],
|
||||
"vals": [10.0],
|
||||
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_approx_gauss": {
|
||||
"model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*t_scale2"],
|
||||
"vals": [self.var],
|
||||
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_log": {
|
||||
"model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*t_scale2"],
|
||||
"vals": [self.var],
|
||||
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Gaussian_default": {
|
||||
"model": GPy.likelihoods.Gaussian(variance=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*variance"],
|
||||
"vals": [self.var],
|
||||
"constraints": [(".*variance", constrain_positive)]
|
||||
},
|
||||
"laplace": True,
|
||||
"ep": False # FIXME: Should be True when we have it working again
|
||||
},
|
||||
#"Gaussian_log": {
|
||||
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log(), variance=self.var, D=self.D, N=self.N),
|
||||
#"grad_params": {
|
||||
#"names": ["noise_model_variance"],
|
||||
#"vals": [self.var],
|
||||
#"constraints": [constrain_positive]
|
||||
#},
|
||||
#"laplace": True
|
||||
#},
|
||||
#"Gaussian_probit": {
|
||||
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Probit(), variance=self.var, D=self.D, N=self.N),
|
||||
#"grad_params": {
|
||||
#"names": ["noise_model_variance"],
|
||||
#"vals": [self.var],
|
||||
#"constraints": [constrain_positive]
|
||||
#},
|
||||
#"laplace": True
|
||||
#},
|
||||
#"Gaussian_log_ex": {
|
||||
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log_ex_1(), variance=self.var, D=self.D, N=self.N),
|
||||
#"grad_params": {
|
||||
#"names": ["noise_model_variance"],
|
||||
#"vals": [self.var],
|
||||
#"constraints": [constrain_positive]
|
||||
#},
|
||||
#"laplace": True
|
||||
#},
|
||||
"Bernoulli_default": {
|
||||
"model": GPy.likelihoods.Bernoulli(),
|
||||
"link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)],
|
||||
"laplace": True,
|
||||
"Y": self.binary_Y,
|
||||
"ep": False # FIXME: Should be True when we have it working again
|
||||
},
|
||||
"Exponential_default": {
|
||||
"model": GPy.likelihoods.Exponential(),
|
||||
"link_f_constraints": [constrain_positive],
|
||||
"Y": self.positive_Y,
|
||||
"laplace": True,
|
||||
},
|
||||
"Poisson_default": {
|
||||
"model": GPy.likelihoods.Poisson(),
|
||||
"link_f_constraints": [constrain_positive],
|
||||
"Y": self.integer_Y,
|
||||
"laplace": True,
|
||||
"ep": False #Should work though...
|
||||
}#,
|
||||
#GAMMA needs some work!"Gamma_default": {
|
||||
#"model": GPy.likelihoods.Gamma(),
|
||||
#"link_f_constraints": [constrain_positive],
|
||||
#"Y": self.positive_Y,
|
||||
#"laplace": True
|
||||
#}
|
||||
}
|
||||
self.noise_models = {"Student_t_default": {
|
||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*t_scale2"],
|
||||
"vals": [self.var],
|
||||
"constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_1_var": {
|
||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*t_scale2"],
|
||||
"vals": [1.0],
|
||||
"constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_small_deg_free": {
|
||||
"model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*t_scale2"],
|
||||
"vals": [self.var],
|
||||
"constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_small_var": {
|
||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*t_scale2"],
|
||||
"vals": [0.001],
|
||||
"constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_large_var": {
|
||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*t_scale2"],
|
||||
"vals": [10.0],
|
||||
"constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_approx_gauss": {
|
||||
"model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*t_scale2"],
|
||||
"vals": [self.var],
|
||||
"constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
#"Student_t_log": {
|
||||
#"model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var),
|
||||
#"grad_params": {
|
||||
#"names": [".*t_noise"],
|
||||
#"vals": [self.var],
|
||||
#"constraints": [(".*t_noise", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
|
||||
#},
|
||||
#"laplace": True
|
||||
#},
|
||||
"Gaussian_default": {
|
||||
"model": GPy.likelihoods.Gaussian(variance=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*variance"],
|
||||
"vals": [self.var],
|
||||
"constraints": [(".*variance", self.constrain_positive)]
|
||||
},
|
||||
"laplace": True,
|
||||
"ep": False # FIXME: Should be True when we have it working again
|
||||
},
|
||||
"Gaussian_log": {
|
||||
"model": GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*variance"],
|
||||
"vals": [self.var],
|
||||
"constraints": [(".*variance", self.constrain_positive)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
#"Gaussian_probit": {
|
||||
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Probit(), variance=self.var, D=self.D, N=self.N),
|
||||
#"grad_params": {
|
||||
#"names": ["noise_model_variance"],
|
||||
#"vals": [self.var],
|
||||
#"constraints": [constrain_positive]
|
||||
#},
|
||||
#"laplace": True
|
||||
#},
|
||||
#"Gaussian_log_ex": {
|
||||
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log_ex_1(), variance=self.var, D=self.D, N=self.N),
|
||||
#"grad_params": {
|
||||
#"names": ["noise_model_variance"],
|
||||
#"vals": [self.var],
|
||||
#"constraints": [constrain_positive]
|
||||
#},
|
||||
#"laplace": True
|
||||
#},
|
||||
"Bernoulli_default": {
|
||||
"model": GPy.likelihoods.Bernoulli(),
|
||||
"link_f_constraints": [partial(self.constrain_bounded, lower=0, upper=1)],
|
||||
"laplace": True,
|
||||
"Y": self.binary_Y,
|
||||
"ep": False # FIXME: Should be True when we have it working again
|
||||
},
|
||||
"Exponential_default": {
|
||||
"model": GPy.likelihoods.Exponential(),
|
||||
"link_f_constraints": [self.constrain_positive],
|
||||
"Y": self.positive_Y,
|
||||
"laplace": True,
|
||||
},
|
||||
"Poisson_default": {
|
||||
"model": GPy.likelihoods.Poisson(),
|
||||
"link_f_constraints": [self.constrain_positive],
|
||||
"Y": self.integer_Y,
|
||||
"laplace": True,
|
||||
"ep": False #Should work though...
|
||||
},
|
||||
#,
|
||||
#GAMMA needs some work!"Gamma_default": {
|
||||
#"model": GPy.likelihoods.Gamma(),
|
||||
#"link_f_constraints": [constrain_positive],
|
||||
#"Y": self.positive_Y,
|
||||
#"laplace": True
|
||||
#}
|
||||
}
|
||||
|
||||
for name, attributes in noise_models.iteritems():
|
||||
|
||||
####################################################
|
||||
# Constraint wrappers so we can just list them off #
|
||||
####################################################
|
||||
def constrain_fixed(self, regex, model):
|
||||
model[regex].constrain_fixed()
|
||||
|
||||
def constrain_negative(self, regex, model):
|
||||
model[regex].constrain_negative()
|
||||
|
||||
def constrain_positive(self, regex, model):
|
||||
model[regex].constrain_positive()
|
||||
|
||||
def constrain_fixed_below(self, regex, model, up_to):
|
||||
model[regex][0:up_to].constrain_fixed()
|
||||
|
||||
def constrain_fixed_above(self, regex, model, above):
|
||||
model[regex][above:].constrain_fixed()
|
||||
|
||||
def constrain_bounded(self, regex, model, lower, upper):
|
||||
"""
|
||||
Used like: partial(constrain_bounded, lower=0, upper=1)
|
||||
"""
|
||||
model[regex].constrain_bounded(lower, upper)
|
||||
|
||||
|
||||
def tearDown(self):
|
||||
self.Y = None
|
||||
self.f = None
|
||||
self.X = None
|
||||
|
||||
def test_scale2_models(self):
|
||||
self.setUp()
|
||||
|
||||
for name, attributes in self.noise_models.iteritems():
|
||||
model = attributes["model"]
|
||||
if "grad_params" in attributes:
|
||||
params = attributes["grad_params"]
|
||||
|
|
@ -290,7 +309,7 @@ class TestNoiseModels(object):
|
|||
param_vals = []
|
||||
param_names = []
|
||||
constrain_positive = []
|
||||
param_constraints = [] # ??? TODO: Saul to Fix.
|
||||
param_constraints = []
|
||||
if "link_f_constraints" in attributes:
|
||||
link_f_constraints = attributes["link_f_constraints"]
|
||||
else:
|
||||
|
|
@ -303,6 +322,10 @@ class TestNoiseModels(object):
|
|||
f = attributes["f"].copy()
|
||||
else:
|
||||
f = self.f.copy()
|
||||
if "Y_metadata" in attributes:
|
||||
Y_metadata = attributes["Y_metadata"].copy()
|
||||
else:
|
||||
Y_metadata = None
|
||||
if "laplace" in attributes:
|
||||
laplace = attributes["laplace"]
|
||||
else:
|
||||
|
|
@ -317,30 +340,30 @@ class TestNoiseModels(object):
|
|||
|
||||
#Required by all
|
||||
#Normal derivatives
|
||||
yield self.t_logpdf, model, Y, f
|
||||
yield self.t_dlogpdf_df, model, Y, f
|
||||
yield self.t_d2logpdf_df2, model, Y, f
|
||||
yield self.t_logpdf, model, Y, f, Y_metadata
|
||||
yield self.t_dlogpdf_df, model, Y, f, Y_metadata
|
||||
yield self.t_d2logpdf_df2, model, Y, f, Y_metadata
|
||||
#Link derivatives
|
||||
yield self.t_dlogpdf_dlink, model, Y, f, link_f_constraints
|
||||
yield self.t_d2logpdf_dlink2, model, Y, f, link_f_constraints
|
||||
yield self.t_dlogpdf_dlink, model, Y, f, Y_metadata, link_f_constraints
|
||||
yield self.t_d2logpdf_dlink2, model, Y, f, Y_metadata, link_f_constraints
|
||||
if laplace:
|
||||
#Laplace only derivatives
|
||||
yield self.t_d3logpdf_df3, model, Y, f
|
||||
yield self.t_d3logpdf_dlink3, model, Y, f, link_f_constraints
|
||||
yield self.t_d3logpdf_df3, model, Y, f, Y_metadata
|
||||
yield self.t_d3logpdf_dlink3, model, Y, f, Y_metadata, link_f_constraints
|
||||
#Params
|
||||
yield self.t_dlogpdf_dparams, model, Y, f, param_vals, param_names, param_constraints
|
||||
yield self.t_dlogpdf_df_dparams, model, Y, f, param_vals, param_names, param_constraints
|
||||
yield self.t_d2logpdf2_df2_dparams, model, Y, f, param_vals, param_names, param_constraints
|
||||
yield self.t_dlogpdf_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
|
||||
yield self.t_dlogpdf_df_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
|
||||
yield self.t_d2logpdf2_df2_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
|
||||
#Link params
|
||||
yield self.t_dlogpdf_link_dparams, model, Y, f, param_vals, param_names, param_constraints
|
||||
yield self.t_dlogpdf_dlink_dparams, model, Y, f, param_vals, param_names, param_constraints
|
||||
yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, param_vals, param_names, param_constraints
|
||||
yield self.t_dlogpdf_link_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
|
||||
yield self.t_dlogpdf_dlink_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
|
||||
yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
|
||||
|
||||
#laplace likelihood gradcheck
|
||||
yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
|
||||
yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, Y_metadata, self.step, param_vals, param_names, param_constraints
|
||||
if ep:
|
||||
#ep likelihood gradcheck
|
||||
yield self.t_ep_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
|
||||
yield self.t_ep_fit_rbf_white, model, self.X, Y, f, Y_metadata, self.step, param_vals, param_names, param_constraints
|
||||
|
||||
|
||||
self.tearDown()
|
||||
|
|
@ -349,41 +372,41 @@ class TestNoiseModels(object):
|
|||
# dpdf_df's #
|
||||
#############
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_logpdf(self, model, Y, f):
|
||||
def t_logpdf(self, model, Y, f, Y_metadata):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
print model
|
||||
#print model._get_params()
|
||||
np.testing.assert_almost_equal(
|
||||
model.pdf(f.copy(), Y.copy()).prod(),
|
||||
np.exp(model.logpdf(f.copy(), Y.copy()).sum())
|
||||
model.pdf(f.copy(), Y.copy(), Y_metadata=Y_metadata).prod(),
|
||||
np.exp(model.logpdf(f.copy(), Y.copy(), Y_metadata=Y_metadata).sum())
|
||||
)
|
||||
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_dlogpdf_df(self, model, Y, f):
|
||||
def t_dlogpdf_df(self, model, Y, f, Y_metadata):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
self.description = "\n{}".format(inspect.stack()[0][3])
|
||||
logpdf = functools.partial(np.sum(model.logpdf), y=Y)
|
||||
dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y)
|
||||
logpdf = functools.partial(np.sum(model.logpdf), y=Y, Y_metadata=Y_metadata)
|
||||
dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y, Y_metadata=Y_metadata)
|
||||
grad = GradientChecker(logpdf, dlogpdf_df, f.copy(), 'g')
|
||||
grad.randomize()
|
||||
print model
|
||||
assert grad.checkgrad(verbose=1)
|
||||
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_d2logpdf_df2(self, model, Y, f):
|
||||
def t_d2logpdf_df2(self, model, Y, f, Y_metadata):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y)
|
||||
d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y)
|
||||
dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y, Y_metadata=Y_metadata)
|
||||
d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y, Y_metadata=Y_metadata)
|
||||
grad = GradientChecker(dlogpdf_df, d2logpdf_df2, f.copy(), 'g')
|
||||
grad.randomize()
|
||||
print model
|
||||
assert grad.checkgrad(verbose=1)
|
||||
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_d3logpdf_df3(self, model, Y, f):
|
||||
def t_d3logpdf_df3(self, model, Y, f, Y_metadata):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y)
|
||||
d3logpdf_df3 = functools.partial(model.d3logpdf_df3, y=Y)
|
||||
d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y, Y_metadata=Y_metadata)
|
||||
d3logpdf_df3 = functools.partial(model.d3logpdf_df3, y=Y, Y_metadata=Y_metadata)
|
||||
grad = GradientChecker(d2logpdf_df2, d3logpdf_df3, f.copy(), 'g')
|
||||
grad.randomize()
|
||||
print model
|
||||
|
|
@ -393,32 +416,32 @@ class TestNoiseModels(object):
|
|||
# df_dparams #
|
||||
##############
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_dlogpdf_dparams(self, model, Y, f, params, params_names, param_constraints):
|
||||
def t_dlogpdf_dparams(self, model, Y, f, Y_metadata, params, params_names, param_constraints):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
print model
|
||||
assert (
|
||||
dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta,
|
||||
params, params_names, args=(f, Y), constraints=param_constraints,
|
||||
params, params_names, args=(f, Y, Y_metadata), constraints=param_constraints,
|
||||
randomize=False, verbose=True)
|
||||
)
|
||||
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_dlogpdf_df_dparams(self, model, Y, f, params, params_names, param_constraints):
|
||||
def t_dlogpdf_df_dparams(self, model, Y, f, Y_metadata, params, params_names, param_constraints):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
print model
|
||||
assert (
|
||||
dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta,
|
||||
params, params_names, args=(f, Y), constraints=param_constraints,
|
||||
params, params_names, args=(f, Y, Y_metadata), constraints=param_constraints,
|
||||
randomize=False, verbose=True)
|
||||
)
|
||||
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_d2logpdf2_df2_dparams(self, model, Y, f, params, params_names, param_constraints):
|
||||
def t_d2logpdf2_df2_dparams(self, model, Y, f, Y_metadata, params, params_names, param_constraints):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
print model
|
||||
assert (
|
||||
dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta,
|
||||
params, params_names, args=(f, Y), constraints=param_constraints,
|
||||
params, params_names, args=(f, Y, Y_metadata), constraints=param_constraints,
|
||||
randomize=False, verbose=True)
|
||||
)
|
||||
|
||||
|
|
@ -426,10 +449,10 @@ class TestNoiseModels(object):
|
|||
# dpdf_dlink's #
|
||||
################
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_dlogpdf_dlink(self, model, Y, f, link_f_constraints):
|
||||
def t_dlogpdf_dlink(self, model, Y, f, Y_metadata, link_f_constraints):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
logpdf = functools.partial(model.logpdf_link, y=Y)
|
||||
dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y)
|
||||
logpdf = functools.partial(model.logpdf_link, y=Y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y, Y_metadata=Y_metadata)
|
||||
grad = GradientChecker(logpdf, dlogpdf_dlink, f.copy(), 'g')
|
||||
|
||||
#Apply constraints to link_f values
|
||||
|
|
@ -442,10 +465,10 @@ class TestNoiseModels(object):
|
|||
assert grad.checkgrad(verbose=1)
|
||||
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_d2logpdf_dlink2(self, model, Y, f, link_f_constraints):
|
||||
def t_d2logpdf_dlink2(self, model, Y, f, Y_metadata, link_f_constraints):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y)
|
||||
d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y)
|
||||
dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y, Y_metadata=Y_metadata)
|
||||
d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y, Y_metadata=Y_metadata)
|
||||
grad = GradientChecker(dlogpdf_dlink, d2logpdf_dlink2, f.copy(), 'g')
|
||||
|
||||
#Apply constraints to link_f values
|
||||
|
|
@ -458,10 +481,10 @@ class TestNoiseModels(object):
|
|||
assert grad.checkgrad(verbose=1)
|
||||
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_d3logpdf_dlink3(self, model, Y, f, link_f_constraints):
|
||||
def t_d3logpdf_dlink3(self, model, Y, f, Y_metadata, link_f_constraints):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y)
|
||||
d3logpdf_dlink3 = functools.partial(model.d3logpdf_dlink3, y=Y)
|
||||
d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y, Y_metadata=Y_metadata)
|
||||
d3logpdf_dlink3 = functools.partial(model.d3logpdf_dlink3, y=Y, Y_metadata=Y_metadata)
|
||||
grad = GradientChecker(d2logpdf_dlink2, d3logpdf_dlink3, f.copy(), 'g')
|
||||
|
||||
#Apply constraints to link_f values
|
||||
|
|
@ -477,32 +500,32 @@ class TestNoiseModels(object):
|
|||
# dlink_dparams #
|
||||
#################
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_dlogpdf_link_dparams(self, model, Y, f, params, param_names, param_constraints):
|
||||
def t_dlogpdf_link_dparams(self, model, Y, f, Y_metadata, params, param_names, param_constraints):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
print model
|
||||
assert (
|
||||
dparam_checkgrad(model.logpdf_link, model.dlogpdf_link_dtheta,
|
||||
params, param_names, args=(f, Y), constraints=param_constraints,
|
||||
params, param_names, args=(f, Y, Y_metadata), constraints=param_constraints,
|
||||
randomize=False, verbose=True)
|
||||
)
|
||||
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_dlogpdf_dlink_dparams(self, model, Y, f, params, param_names, param_constraints):
|
||||
def t_dlogpdf_dlink_dparams(self, model, Y, f, Y_metadata, params, param_names, param_constraints):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
print model
|
||||
assert (
|
||||
dparam_checkgrad(model.dlogpdf_dlink, model.dlogpdf_dlink_dtheta,
|
||||
params, param_names, args=(f, Y), constraints=param_constraints,
|
||||
params, param_names, args=(f, Y, Y_metadata), constraints=param_constraints,
|
||||
randomize=False, verbose=True)
|
||||
)
|
||||
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_d2logpdf2_dlink2_dparams(self, model, Y, f, params, param_names, param_constraints):
|
||||
def t_d2logpdf2_dlink2_dparams(self, model, Y, f, Y_metadata, params, param_names, param_constraints):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
print model
|
||||
assert (
|
||||
dparam_checkgrad(model.d2logpdf_dlink2, model.d2logpdf_dlink2_dtheta,
|
||||
params, param_names, args=(f, Y), constraints=param_constraints,
|
||||
params, param_names, args=(f, Y, Y_metadata), constraints=param_constraints,
|
||||
randomize=False, verbose=True)
|
||||
)
|
||||
|
||||
|
|
@ -510,14 +533,15 @@ class TestNoiseModels(object):
|
|||
# laplace test #
|
||||
################
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_laplace_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints):
|
||||
def t_laplace_fit_rbf_white(self, model, X, Y, f, Y_metadata, step, param_vals, param_names, constraints):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
#Normalize
|
||||
Y = Y/Y.max()
|
||||
white_var = 1e-6
|
||||
white_var = 1e-5
|
||||
kernel = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1])
|
||||
laplace_likelihood = GPy.inference.latent_function_inference.Laplace()
|
||||
m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, inference_method=laplace_likelihood)
|
||||
|
||||
m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, Y_metadata=Y_metadata, inference_method=laplace_likelihood)
|
||||
m['.*white'].constrain_fixed(white_var)
|
||||
|
||||
#Set constraints
|
||||
|
|
@ -526,6 +550,7 @@ class TestNoiseModels(object):
|
|||
|
||||
print m
|
||||
m.randomize()
|
||||
m.randomize()
|
||||
|
||||
#Set params
|
||||
for param_num in range(len(param_names)):
|
||||
|
|
@ -545,14 +570,15 @@ class TestNoiseModels(object):
|
|||
# EP test #
|
||||
###########
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_ep_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints):
|
||||
def t_ep_fit_rbf_white(self, model, X, Y, f, Y_metadata, step, param_vals, param_names, constraints):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
#Normalize
|
||||
Y = Y/Y.max()
|
||||
white_var = 1e-6
|
||||
kernel = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1])
|
||||
ep_inf = GPy.inference.latent_function_inference.EP()
|
||||
m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, inference_method=ep_inf)
|
||||
|
||||
m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, Y_metadata=Y_metadata, inference_method=ep_inf)
|
||||
m['.*white'].constrain_fixed(white_var)
|
||||
|
||||
for param_num in range(len(param_names)):
|
||||
|
|
@ -571,8 +597,8 @@ class LaplaceTests(unittest.TestCase):
|
|||
"""
|
||||
|
||||
def setUp(self):
|
||||
self.N = 5
|
||||
self.D = 3
|
||||
self.N = 15
|
||||
self.D = 1
|
||||
self.X = np.random.rand(self.N, self.D)*10
|
||||
|
||||
self.real_std = 0.1
|
||||
|
|
@ -636,20 +662,20 @@ class LaplaceTests(unittest.TestCase):
|
|||
exact_inf = GPy.inference.latent_function_inference.ExactGaussianInference()
|
||||
m1 = GPy.core.GP(X, Y.copy(), kernel=kernel1, likelihood=gauss_distr1, inference_method=exact_inf)
|
||||
m1['.*white'].constrain_fixed(1e-6)
|
||||
m1['.*rbf.variance'] = initial_var_guess
|
||||
m1['.*rbf.variance'].constrain_bounded(1e-4, 10)
|
||||
m1['.*Gaussian_noise.variance'].constrain_bounded(1e-4, 10)
|
||||
m1.randomize()
|
||||
|
||||
gauss_distr2 = GPy.likelihoods.Gaussian(variance=initial_var_guess)
|
||||
laplace_inf = GPy.inference.latent_function_inference.Laplace()
|
||||
m2 = GPy.core.GP(X, Y.copy(), kernel=kernel2, likelihood=gauss_distr2, inference_method=laplace_inf)
|
||||
m2['.*white'].constrain_fixed(1e-6)
|
||||
m2['.*rbf.variance'].constrain_bounded(1e-4, 10)
|
||||
m2['.*Gaussian_noise.variance'].constrain_bounded(1e-4, 10)
|
||||
m2.randomize()
|
||||
|
||||
if debug:
|
||||
print m1
|
||||
print m2
|
||||
|
||||
optimizer = 'scg'
|
||||
print "Gaussian"
|
||||
m1.optimize(optimizer, messages=debug, ipython_notebook=False)
|
||||
|
|
@ -687,8 +713,6 @@ class LaplaceTests(unittest.TestCase):
|
|||
pb.scatter(X, m1.likelihood.Y, c='g')
|
||||
pb.scatter(X, m2.likelihood.Y, c='r', marker='x')
|
||||
|
||||
|
||||
|
||||
#Check Y's are the same
|
||||
np.testing.assert_almost_equal(m1.Y, m2.Y, decimal=5)
|
||||
#Check marginals are the same
|
||||
|
|
|
|||
72
GPy/testing/mapping_tests.py
Normal file
72
GPy/testing/mapping_tests.py
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import GPy
|
||||
|
||||
class MappingGradChecker(GPy.core.Model):
|
||||
"""
|
||||
This class has everything we need to check the gradient of a mapping. It
|
||||
implement a simple likelihood which is a weighted sum of the outputs of the
|
||||
mapping. the gradients are checked against the parameters of the mapping
|
||||
and the input.
|
||||
"""
|
||||
def __init__(self, mapping, X, name='map_grad_check'):
|
||||
super(MappingGradChecker, self).__init__(name)
|
||||
self.mapping = mapping
|
||||
self.link_parameter(self.mapping)
|
||||
self.X = GPy.core.Param('X',X)
|
||||
self.link_parameter(self.X)
|
||||
self.dL_dY = np.random.randn(self.X.shape[0], self.mapping.output_dim)
|
||||
def log_likelihood(self):
|
||||
return np.sum(self.mapping.f(self.X) * self.dL_dY)
|
||||
def parameters_changed(self):
|
||||
self.X.gradient = self.mapping.gradients_X(self.dL_dY, self.X)
|
||||
self.mapping.update_gradients(self.dL_dY, self.X)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class MappingTests(unittest.TestCase):
|
||||
|
||||
def test_kernelmapping(self):
|
||||
X = np.random.randn(100,3)
|
||||
Z = np.random.randn(10,3)
|
||||
mapping = GPy.mappings.Kernel(3, 2, Z, GPy.kern.RBF(3))
|
||||
self.assertTrue(MappingGradChecker(mapping, X).checkgrad())
|
||||
|
||||
def test_linearmapping(self):
|
||||
mapping = GPy.mappings.Linear(3, 2)
|
||||
X = np.random.randn(100,3)
|
||||
self.assertTrue(MappingGradChecker(mapping, X).checkgrad())
|
||||
|
||||
def test_mlpmapping(self):
|
||||
mapping = GPy.mappings.MLP(input_dim=3, hidden_dim=5, output_dim=2)
|
||||
X = np.random.randn(100,3)
|
||||
self.assertTrue(MappingGradChecker(mapping, X).checkgrad())
|
||||
|
||||
def test_addmapping(self):
|
||||
m1 = GPy.mappings.MLP(input_dim=3, hidden_dim=5, output_dim=2)
|
||||
m2 = GPy.mappings.Linear(input_dim=3, output_dim=2)
|
||||
mapping = GPy.mappings.Additive(m1, m2)
|
||||
X = np.random.randn(100,3)
|
||||
self.assertTrue(MappingGradChecker(mapping, X).checkgrad())
|
||||
|
||||
def test_compoundmapping(self):
|
||||
m1 = GPy.mappings.MLP(input_dim=3, hidden_dim=5, output_dim=2)
|
||||
Z = np.random.randn(10,2)
|
||||
m2 = GPy.mappings.Kernel(2, 4, Z, GPy.kern.RBF(2))
|
||||
mapping = GPy.mappings.Compound(m1, m2)
|
||||
X = np.random.randn(100,3)
|
||||
self.assertTrue(MappingGradChecker(mapping, X).checkgrad())
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print "Running unit tests, please be (very) patient..."
|
||||
unittest.main()
|
||||
56
GPy/testing/meanfunc_tests.py
Normal file
56
GPy/testing/meanfunc_tests.py
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
# Copyright (c) 2015, James Hensman
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import GPy
|
||||
|
||||
class MFtests(unittest.TestCase):
|
||||
def simple_mean_function():
|
||||
"""
|
||||
The simplest possible mean function. No parameters, just a simple Sinusoid.
|
||||
"""
|
||||
#create simple mean function
|
||||
mf = GPy.core.Mapping(1,1)
|
||||
mf.f = np.sin
|
||||
mf.update_gradients = lambda a,b: None
|
||||
|
||||
X = np.linspace(0,10,50).reshape(-1,1)
|
||||
Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape)
|
||||
|
||||
k =GPy.kern.RBF(1)
|
||||
lik = GPy.likelihoods.Gaussian()
|
||||
m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
|
||||
self.assertTrue(m.checkgrad())
|
||||
|
||||
def test_parametric_mean_function(self):
|
||||
"""
|
||||
A linear mean function with parameters that we'll learn alongside the kernel
|
||||
"""
|
||||
|
||||
X = np.linspace(0,10,50).reshape(-1,1)
|
||||
Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape) + 3*X
|
||||
|
||||
mf = GPy.mappings.Linear(1,1)
|
||||
|
||||
k =GPy.kern.RBF(1)
|
||||
lik = GPy.likelihoods.Gaussian()
|
||||
m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
|
||||
self.assertTrue(m.checkgrad())
|
||||
|
||||
def test_svgp_mean_function(self):
|
||||
|
||||
# an instance of the SVIGOP with a men function
|
||||
X = np.linspace(0,10,500).reshape(-1,1)
|
||||
Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape)
|
||||
Y = np.where(Y>0, 1,0) # make aclassificatino problem
|
||||
|
||||
mf = GPy.mappings.Linear(1,1)
|
||||
Z = np.linspace(0,10,50).reshape(-1,1)
|
||||
lik = GPy.likelihoods.Bernoulli()
|
||||
k =GPy.kern.RBF(1) + GPy.kern.White(1, 1e-4)
|
||||
m = GPy.core.SVGP(X, Y,Z=Z, kernel=k, likelihood=lik, mean_function=mf)
|
||||
self.assertTrue(m.checkgrad())
|
||||
|
||||
|
||||
|
||||
18
GPy/testing/misc_tests.py
Normal file
18
GPy/testing/misc_tests.py
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
import numpy as np
|
||||
import scipy as sp
|
||||
import GPy
|
||||
|
||||
class MiscTests(np.testing.TestCase):
|
||||
"""
|
||||
Testing some utilities of misc
|
||||
"""
|
||||
def setUp(self):
|
||||
self._lim_val = np.finfo(np.float64).max
|
||||
self._lim_val_exp = np.log(self._lim_val)
|
||||
|
||||
def test_safe_exp_upper(self):
|
||||
assert np.exp(self._lim_val_exp + 1) == np.inf
|
||||
assert GPy.util.misc.safe_exp(self._lim_val_exp + 1) < np.inf
|
||||
|
||||
def test_safe_exp_lower(self):
|
||||
assert GPy.util.misc.safe_exp(1e-10) < np.inf
|
||||
54
GPy/testing/svgp_tests.py
Normal file
54
GPy/testing/svgp_tests.py
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
import numpy as np
|
||||
import scipy as sp
|
||||
import GPy
|
||||
|
||||
class SVGP_nonconvex(np.testing.TestCase):
|
||||
"""
|
||||
Inference in the SVGP with a student-T likelihood
|
||||
"""
|
||||
def setUp(self):
|
||||
X = np.linspace(0,10,100).reshape(-1,1)
|
||||
Z = np.linspace(0,10,10).reshape(-1,1)
|
||||
Y = np.sin(X) + np.random.randn(*X.shape)*0.1
|
||||
Y[50] += 3
|
||||
|
||||
lik = GPy.likelihoods.StudentT(deg_free=2)
|
||||
k = GPy.kern.RBF(1, lengthscale=5.) + GPy.kern.White(1, 1e-6)
|
||||
self.m = GPy.core.SVGP(X, Y, Z=Z, likelihood=lik, kernel=k)
|
||||
def test_grad(self):
|
||||
assert self.m.checkgrad(step=1e-4)
|
||||
|
||||
class SVGP_classification(np.testing.TestCase):
|
||||
"""
|
||||
Inference in the SVGP with a Bernoulli likelihood
|
||||
"""
|
||||
def setUp(self):
|
||||
X = np.linspace(0,10,100).reshape(-1,1)
|
||||
Z = np.linspace(0,10,10).reshape(-1,1)
|
||||
Y = np.where((np.sin(X) + np.random.randn(*X.shape)*0.1)>0, 1,0)
|
||||
|
||||
lik = GPy.likelihoods.Bernoulli()
|
||||
k = GPy.kern.RBF(1, lengthscale=5.) + GPy.kern.White(1, 1e-6)
|
||||
self.m = GPy.core.SVGP(X, Y, Z=Z, likelihood=lik, kernel=k)
|
||||
def test_grad(self):
|
||||
assert self.m.checkgrad(step=1e-4)
|
||||
|
||||
class SVGP_Poisson_with_meanfunction(np.testing.TestCase):
|
||||
"""
|
||||
Inference in the SVGP with a Bernoulli likelihood
|
||||
"""
|
||||
def setUp(self):
|
||||
X = np.linspace(0,10,100).reshape(-1,1)
|
||||
Z = np.linspace(0,10,10).reshape(-1,1)
|
||||
latent_f = np.exp(0.1*X * 0.05*X**2)
|
||||
Y = np.array([np.random.poisson(f) for f in latent_f.flatten()]).reshape(-1,1)
|
||||
|
||||
mf = GPy.mappings.Linear(1,1)
|
||||
|
||||
lik = GPy.likelihoods.Poisson()
|
||||
k = GPy.kern.RBF(1, lengthscale=5.) + GPy.kern.White(1, 1e-6)
|
||||
self.m = GPy.core.SVGP(X, Y, Z=Z, likelihood=lik, kernel=k, mean_function=mf)
|
||||
def test_grad(self):
|
||||
assert self.m.checkgrad(step=1e-4)
|
||||
|
||||
|
||||
|
|
@ -17,6 +17,54 @@ def get_blocks(A, blocksizes):
|
|||
count_i += i
|
||||
return B
|
||||
|
||||
def get_block_shapes(B):
|
||||
assert B.dtype is np.dtype('object'), "Must be a block matrix"
|
||||
return [B[b,b].shape[0] for b in range(0, B.shape[0])]
|
||||
|
||||
def unblock(B):
|
||||
assert B.dtype is np.dtype('object'), "Must be a block matrix"
|
||||
block_shapes = get_block_shapes(B)
|
||||
num_elements = np.sum(block_shapes)
|
||||
A = np.empty(shape=(num_elements, num_elements))
|
||||
count_i = 0
|
||||
for Bi, i in enumerate(block_shapes):
|
||||
count_j = 0
|
||||
for Bj, j in enumerate(block_shapes):
|
||||
A[count_i:count_i + i, count_j:count_j + j] = B[Bi, Bj]
|
||||
count_j += j
|
||||
count_i += i
|
||||
return A
|
||||
|
||||
def block_dot(A, B):
|
||||
"""
|
||||
Element wise dot product on block matricies
|
||||
|
||||
+------+------+ +------+------+ +-------+-------+
|
||||
| | | | | | |A11.B11|B12.B12|
|
||||
| A11 | A12 | | B11 | B12 | | | |
|
||||
+------+------+ o +------+------| = +-------+-------+
|
||||
| | | | | | |A21.B21|A22.B22|
|
||||
| A21 | A22 | | B21 | B22 | | | |
|
||||
+-------------+ +------+------+ +-------+-------+
|
||||
|
||||
..Note
|
||||
If either (A or B) of the diagonal matrices are stored as vectors then a more
|
||||
efficient dot product using numpy broadcasting will be used, i.e. A11*B11
|
||||
"""
|
||||
#Must have same number of blocks and be a block matrix
|
||||
assert A.dtype is np.dtype('object'), "Must be a block matrix"
|
||||
assert B.dtype is np.dtype('object'), "Must be a block matrix"
|
||||
Ashape = A.shape
|
||||
Bshape = B.shape
|
||||
assert Ashape == Bshape
|
||||
def f(A,B):
|
||||
if Ashape[0] == Ashape[1] or Bshape[0] == Bshape[1]:
|
||||
#FIXME: Careful if one is transpose of other, would make a matrix
|
||||
return A*B
|
||||
else:
|
||||
return np.dot(A,B)
|
||||
dot = np.vectorize(f, otypes = [np.object])
|
||||
return dot(A,B)
|
||||
|
||||
|
||||
if __name__=='__main__':
|
||||
|
|
@ -24,3 +72,5 @@ if __name__=='__main__':
|
|||
B = get_blocks(A,[2,3])
|
||||
B[0,0] += 7
|
||||
print B
|
||||
|
||||
assert np.all(unblock(B) == A)
|
||||
|
|
|
|||
|
|
@ -96,16 +96,21 @@ def jitchol(A, maxtries=5):
|
|||
num_tries = 1
|
||||
while num_tries <= maxtries and np.isfinite(jitter):
|
||||
try:
|
||||
print jitter
|
||||
L = linalg.cholesky(A + np.eye(A.shape[0]) * jitter, lower=True)
|
||||
logging.warning('Added {} rounds of jitter, jitter of {:.10e}\n'.format(num_tries, jitter))
|
||||
return L
|
||||
except:
|
||||
jitter *= 10
|
||||
finally:
|
||||
num_tries += 1
|
||||
raise linalg.LinAlgError, "not positive definite, even with jitter."
|
||||
import traceback
|
||||
logging.warning('\n'.join(['Added {} rounds of jitter, jitter of {:.10e}'.format(num_tries-1, jitter),
|
||||
' in '+traceback.format_list(traceback.extract_stack(limit=2)[-2:-1])[0][2:]]))
|
||||
raise linalg.LinAlgError, "not positive definite, even with jitter."
|
||||
try: raise
|
||||
except:
|
||||
logging.warning('\n'.join(['Added jitter of {:.10e}'.format(jitter),
|
||||
' in '+traceback.format_list(traceback.extract_stack(limit=2)[-2:-1])[0][2:]]))
|
||||
import ipdb;ipdb.set_trace()
|
||||
return L
|
||||
|
||||
# def dtrtri(L, lower=1):
|
||||
# """
|
||||
|
|
|
|||
|
|
@ -4,6 +4,16 @@
|
|||
import numpy as np
|
||||
from config import *
|
||||
|
||||
_lim_val = np.finfo(np.float64).max
|
||||
|
||||
_lim_val_exp = np.log(_lim_val)
|
||||
_lim_val_square = np.sqrt(_lim_val)
|
||||
_lim_val_cube = np.power(_lim_val, -3)
|
||||
|
||||
def safe_exp(f):
|
||||
clip_f = np.clip(f, -np.inf, _lim_val_exp)
|
||||
return np.exp(clip_f)
|
||||
|
||||
def chain_1(df_dg, dg_dx):
|
||||
"""
|
||||
Generic chaining function for first derivative
|
||||
|
|
@ -11,6 +21,11 @@ def chain_1(df_dg, dg_dx):
|
|||
.. math::
|
||||
\\frac{d(f . g)}{dx} = \\frac{df}{dg} \\frac{dg}{dx}
|
||||
"""
|
||||
if np.all(dg_dx==1.):
|
||||
return df_dg
|
||||
if len(df_dg) > 1 and df_dg.shape[-1] > 1:
|
||||
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
|
||||
raise NotImplementedError('Not implemented for matricies yet')
|
||||
return df_dg * dg_dx
|
||||
|
||||
def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
|
||||
|
|
@ -20,7 +35,13 @@ def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
|
|||
.. math::
|
||||
\\frac{d^{2}(f . g)}{dx^{2}} = \\frac{d^{2}f}{dg^{2}}(\\frac{dg}{dx})^{2} + \\frac{df}{dg}\\frac{d^{2}g}{dx^{2}}
|
||||
"""
|
||||
return d2f_dg2*(dg_dx**2) + df_dg*d2g_dx2
|
||||
if np.all(dg_dx==1.) and np.all(d2g_dx2 == 0):
|
||||
return d2f_dg2
|
||||
if len(d2f_dg2) > 1 and d2f_dg2.shape[-1] > 1:
|
||||
raise NotImplementedError('Not implemented for matricies yet')
|
||||
#dg_dx_2 = np.clip(dg_dx, 1e-12, _lim_val_square)**2
|
||||
dg_dx_2 = dg_dx**2
|
||||
return d2f_dg2*(dg_dx_2) + df_dg*d2g_dx2
|
||||
|
||||
def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3):
|
||||
"""
|
||||
|
|
@ -29,11 +50,18 @@ def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3):
|
|||
.. math::
|
||||
\\frac{d^{3}(f . g)}{dx^{3}} = \\frac{d^{3}f}{dg^{3}}(\\frac{dg}{dx})^{3} + 3\\frac{d^{2}f}{dg^{2}}\\frac{dg}{dx}\\frac{d^{2}g}{dx^{2}} + \\frac{df}{dg}\\frac{d^{3}g}{dx^{3}}
|
||||
"""
|
||||
return d3f_dg3*(dg_dx**3) + 3*d2f_dg2*dg_dx*d2g_dx2 + df_dg*d3g_dx3
|
||||
if np.all(dg_dx==1.) and np.all(d2g_dx2==0) and np.all(d3g_dx3==0):
|
||||
return d3f_dg3
|
||||
if ( (len(d2f_dg2) > 1 and d2f_dg2.shape[-1] > 1)
|
||||
or (len(d3f_dg3) > 1 and d3f_dg3.shape[-1] > 1)):
|
||||
raise NotImplementedError('Not implemented for matricies yet')
|
||||
#dg_dx_3 = np.clip(dg_dx, 1e-12, _lim_val_cube)**3
|
||||
dg_dx_3 = dg_dx**3
|
||||
return d3f_dg3*(dg_dx_3) + 3*d2f_dg2*dg_dx*d2g_dx2 + df_dg*d3g_dx3
|
||||
|
||||
def opt_wrapper(m, **kwargs):
|
||||
"""
|
||||
This function just wraps the optimization procedure of a GPy
|
||||
Thit function just wraps the optimization procedure of a GPy
|
||||
object so that optimize() pickleable (necessary for multiprocessing).
|
||||
"""
|
||||
m.optimize(**kwargs)
|
||||
|
|
@ -96,3 +124,47 @@ from :class:ndarray)"""
|
|||
if len(param) == 1:
|
||||
return param[0].view(np.ndarray)
|
||||
return [x.view(np.ndarray) for x in param]
|
||||
|
||||
def blockify_hessian(func):
|
||||
def wrapper_func(self, *args, **kwargs):
|
||||
# Invoke the wrapped function first
|
||||
retval = func(self, *args, **kwargs)
|
||||
# Now do something here with retval and/or action
|
||||
if self.not_block_really and (retval.shape[0] != retval.shape[1]):
|
||||
return np.diagflat(retval)
|
||||
else:
|
||||
return retval
|
||||
return wrapper_func
|
||||
|
||||
def blockify_third(func):
|
||||
def wrapper_func(self, *args, **kwargs):
|
||||
# Invoke the wrapped function first
|
||||
retval = func(self, *args, **kwargs)
|
||||
# Now do something here with retval and/or action
|
||||
if self.not_block_really and (len(retval.shape) < 3):
|
||||
num_data = retval.shape[0]
|
||||
d3_block_cache = np.zeros((num_data, num_data, num_data))
|
||||
diag_slice = range(num_data)
|
||||
d3_block_cache[diag_slice, diag_slice, diag_slice] = np.squeeze(retval)
|
||||
return d3_block_cache
|
||||
else:
|
||||
return retval
|
||||
return wrapper_func
|
||||
|
||||
def blockify_dhess_dtheta(func):
|
||||
def wrapper_func(self, *args, **kwargs):
|
||||
# Invoke the wrapped function first
|
||||
retval = func(self, *args, **kwargs)
|
||||
# Now do something here with retval and/or action
|
||||
if self.not_block_really and (len(retval.shape) < 3):
|
||||
num_data = retval.shape[0]
|
||||
num_params = retval.shape[-1]
|
||||
dhess_dtheta = np.zeros((num_data, num_data, num_params))
|
||||
diag_slice = range(num_data)
|
||||
for param_ind in range(num_params):
|
||||
dhess_dtheta[diag_slice, diag_slice, param_ind] = np.squeeze(retval[:,param_ind])
|
||||
return dhess_dtheta
|
||||
else:
|
||||
return retval
|
||||
return wrapper_func
|
||||
|
||||
|
|
|
|||
|
|
@ -1,17 +1,18 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
import numpy as np
|
||||
from GPy.core.parameterization import Parameterized, Param
|
||||
from ..core.parameterization.transformations import Logexp
|
||||
|
||||
class WarpingFunction(object):
|
||||
class WarpingFunction(Parameterized):
|
||||
"""
|
||||
abstract function for warping
|
||||
z = f(y)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
raise NotImplementedError
|
||||
def __init__(self, name):
|
||||
super(WarpingFunction, self).__init__(name=name)
|
||||
|
||||
def f(self,y,psi):
|
||||
"""function transformation
|
||||
|
|
@ -34,9 +35,10 @@ class WarpingFunction(object):
|
|||
def _get_param_names(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def plot(self, psi, xmin, xmax):
|
||||
def plot(self, xmin, xmax):
|
||||
psi = self.psi
|
||||
y = np.arange(xmin, xmax, 0.01)
|
||||
f_y = self.f(y, psi)
|
||||
f_y = self.f(y)
|
||||
from matplotlib import pyplot as plt
|
||||
plt.figure()
|
||||
plt.plot(y, f_y)
|
||||
|
|
@ -50,6 +52,7 @@ class TanhWarpingFunction(WarpingFunction):
|
|||
"""n_terms specifies the number of tanh terms to be used"""
|
||||
self.n_terms = n_terms
|
||||
self.num_parameters = 3 * self.n_terms
|
||||
super(TanhWarpingFunction, self).__init__(name='warp_tanh')
|
||||
|
||||
def f(self,y,psi):
|
||||
"""
|
||||
|
|
@ -163,8 +166,18 @@ class TanhWarpingFunction_d(WarpingFunction):
|
|||
"""n_terms specifies the number of tanh terms to be used"""
|
||||
self.n_terms = n_terms
|
||||
self.num_parameters = 3 * self.n_terms + 1
|
||||
self.psi = np.ones((self.n_terms, 3))
|
||||
|
||||
def f(self,y,psi):
|
||||
super(TanhWarpingFunction_d, self).__init__(name='warp_tanh')
|
||||
self.psi = Param('psi', self.psi)
|
||||
self.psi[:, :2].constrain_positive()
|
||||
|
||||
self.d = Param('%s' % ('d'), 1.0, Logexp())
|
||||
self.link_parameter(self.psi)
|
||||
self.link_parameter(self.d)
|
||||
|
||||
|
||||
def f(self,y):
|
||||
"""
|
||||
Transform y with f using parameter vector psi
|
||||
psi = [[a,b,c]]
|
||||
|
|
@ -175,9 +188,9 @@ class TanhWarpingFunction_d(WarpingFunction):
|
|||
#1. check that number of params is consistent
|
||||
# assert psi.shape[0] == self.n_terms, 'inconsistent parameter dimensions'
|
||||
# assert psi.shape[1] == 4, 'inconsistent parameter dimensions'
|
||||
mpsi = psi.copy()
|
||||
d = psi[-1]
|
||||
mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
|
||||
|
||||
d = self.d
|
||||
mpsi = self.psi
|
||||
|
||||
#3. transform data
|
||||
z = d*y.copy()
|
||||
|
|
@ -187,7 +200,7 @@ class TanhWarpingFunction_d(WarpingFunction):
|
|||
return z
|
||||
|
||||
|
||||
def f_inv(self, z, psi, max_iterations=1000, y=None):
|
||||
def f_inv(self, z, max_iterations=1000, y=None):
|
||||
"""
|
||||
calculate the numerical inverse of f
|
||||
|
||||
|
|
@ -198,12 +211,12 @@ class TanhWarpingFunction_d(WarpingFunction):
|
|||
z = z.copy()
|
||||
if y is None:
|
||||
y = np.ones_like(z)
|
||||
|
||||
|
||||
it = 0
|
||||
update = np.inf
|
||||
|
||||
while it == 0 or (np.abs(update).sum() > 1e-10 and it < max_iterations):
|
||||
update = (self.f(y, psi) - z)/self.fgrad_y(y, psi)
|
||||
update = (self.f(y) - z)/self.fgrad_y(y)
|
||||
y -= update
|
||||
it += 1
|
||||
if it == max_iterations:
|
||||
|
|
@ -212,7 +225,7 @@ class TanhWarpingFunction_d(WarpingFunction):
|
|||
return y
|
||||
|
||||
|
||||
def fgrad_y(self, y, psi, return_precalc = False):
|
||||
def fgrad_y(self, y,return_precalc = False):
|
||||
"""
|
||||
gradient of f w.r.t to y ([N x 1])
|
||||
|
||||
|
|
@ -221,9 +234,8 @@ class TanhWarpingFunction_d(WarpingFunction):
|
|||
"""
|
||||
|
||||
|
||||
mpsi = psi.copy()
|
||||
d = psi[-1]
|
||||
mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
|
||||
d = self.d
|
||||
mpsi = self.psi
|
||||
|
||||
# vectorized version
|
||||
|
||||
|
|
@ -240,7 +252,7 @@ class TanhWarpingFunction_d(WarpingFunction):
|
|||
return GRAD
|
||||
|
||||
|
||||
def fgrad_y_psi(self, y, psi, return_covar_chain = False):
|
||||
def fgrad_y_psi(self, y, return_covar_chain = False):
|
||||
"""
|
||||
gradient of f w.r.t to y and psi
|
||||
|
||||
|
|
@ -248,10 +260,10 @@ class TanhWarpingFunction_d(WarpingFunction):
|
|||
|
||||
"""
|
||||
|
||||
mpsi = psi.copy()
|
||||
mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
|
||||
|
||||
w, s, r, d = self.fgrad_y(y, psi, return_precalc = True)
|
||||
mpsi = self.psi
|
||||
|
||||
w, s, r, d = self.fgrad_y(y, return_precalc = True)
|
||||
|
||||
gradients = np.zeros((y.shape[0], y.shape[1], len(mpsi), 4))
|
||||
for i in range(len(mpsi)):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue