Merge branch 'devel' of https://github.com/SheffieldML/GPy into devel

This commit is contained in:
mzwiessele 2015-04-08 08:24:59 +02:00
commit 970df9b88e
52 changed files with 2183 additions and 772 deletions

View file

@ -5,6 +5,7 @@ import numpy as np
import sys
from .. import kern
from model import Model
from mapping import Mapping
from parameterization import ObsAr
from .. import likelihoods
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
@ -34,7 +35,7 @@ class GP(Model):
"""
def __init__(self, X, Y, kernel, likelihood, inference_method=None, name='gp', Y_metadata=None, normalizer=False):
def __init__(self, X, Y, kernel, likelihood, mean_function=None, inference_method=None, name='gp', Y_metadata=None, normalizer=False):
super(GP, self).__init__(name)
assert X.ndim == 2
@ -75,6 +76,15 @@ class GP(Model):
assert isinstance(likelihood, likelihoods.Likelihood)
self.likelihood = likelihood
#handle the mean function
self.mean_function = mean_function
if mean_function is not None:
assert isinstance(self.mean_function, Mapping)
assert mean_function.input_dim == self.input_dim
assert mean_function.output_dim == self.output_dim
self.link_parameter(mean_function)
#find a sensible inference method
logger.info("initializing inference method")
if inference_method is None:
@ -153,9 +163,11 @@ class GP(Model):
This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call
this method yourself, there may be unexpected consequences.
"""
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.mean_function, self.Y_metadata)
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X)
if self.mean_function is not None:
self.mean_function.update_gradients(self.grad_dict['dL_dm'], self.X)
def log_likelihood(self):
"""
@ -192,6 +204,10 @@ class GP(Model):
#force mu to be a column vector
if len(mu.shape)==1: mu = mu[:,None]
#add the mean function in
if not self.mean_function is None:
mu += self.mean_function.f(_Xnew)
return mu, var
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
@ -241,12 +257,14 @@ class GP(Model):
def predictive_gradients(self, Xnew):
"""
Compute the derivatives of the latent function with respect to X*
Compute the derivatives of the predicted latent function with respect to X*
Given a set of points at which to predict X* (size [N*,Q]), compute the
derivatives of the mean and variance. Resulting arrays are sized:
dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
Note that this is not the same as computing the mean and variance of the derivative of the function!
dv_dX* -- [N*, Q], (since all outputs have the same variance)
:param X: The points at which to get the predictive gradients
:type X: np.ndarray (Xnew x self.input_dim)

View file

@ -1,4 +1,5 @@
# Copyright (c) 2013,2014, GPy authors (see AUTHORS.txt).
# Copyright (c) 2015, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sys
@ -7,7 +8,7 @@ import numpy as np
class Mapping(Parameterized):
"""
Base model for shared behavior between models that can act like a mapping.
Base model for shared mapping behaviours
"""
def __init__(self, input_dim, output_dim, name='mapping'):
@ -18,49 +19,12 @@ class Mapping(Parameterized):
def f(self, X):
raise NotImplementedError
def df_dX(self, dL_df, X):
"""Evaluate derivatives of mapping outputs with respect to inputs.
:param dL_df: gradient of the objective with respect to the function.
:type dL_df: ndarray (num_data x output_dim)
:param X: the input locations where derivatives are to be evaluated.
:type X: ndarray (num_data x input_dim)
:returns: matrix containing gradients of the function with respect to the inputs.
"""
def gradients_X(self, dL_dF, X):
raise NotImplementedError
def df_dtheta(self, dL_df, X):
"""The gradient of the outputs of the mapping with respect to each of the parameters.
:param dL_df: gradient of the objective with respect to the function.
:type dL_df: ndarray (num_data x output_dim)
:param X: input locations where the function is evaluated.
:type X: ndarray (num_data x input_dim)
:returns: Matrix containing gradients with respect to parameters of each output for each input data.
:rtype: ndarray (num_params length)
"""
def update_gradients(self, dL_dF, X):
raise NotImplementedError
def plot(self, *args):
"""
Plots the mapping associated with the model.
- In one dimension, the function is plotted.
- In two dimensions, a contour-plot shows the function
- In higher dimensions, we've not implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions
using which_data and which_functions
This is a convenience function: arguments are passed to
GPy.plotting.matplot_dep.models_plots.plot_mapping
"""
if "matplotlib" in sys.modules:
from ..plotting.matplot_dep import models_plots
mapping_plots.plot_mapping(self,*args)
else:
raise NameError, "matplotlib package has not been imported."
class Bijective_mapping(Mapping):
"""

View file

@ -50,31 +50,29 @@ class SpikeAndSlabPrior(VariationalPrior):
def KL_divergence(self, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma,gamma1 = variational_posterior.gamma_probabilities()
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
gamma = variational_posterior.gamma.values
if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
else:
pi = self.pi
var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S))
var_gamma = (gamma*(log_gamma-np.log(pi))).sum()+(gamma1*(log_gamma1-np.log(1-pi))).sum()
var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()
return var_gamma+ (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2.
def update_gradients_KL(self, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma,gamma1 = variational_posterior.gamma_probabilities()
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
gamma = variational_posterior.gamma.values
if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
else:
pi = self.pi
variational_posterior.binary_prob.gradient -= (np.log((1-pi)/pi)+log_gamma-log_gamma1+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.)*gamma*gamma1
variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
mu.gradient -= gamma*mu/self.variance
S.gradient -= (1./self.variance - 1./S) * gamma /2.
if self.learnPi:
@ -161,24 +159,8 @@ class SpikeAndSlabPosterior(VariationalPosterior):
binary_prob : the probability of the distribution on the slab part.
"""
super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
self.gamma = Param("binary_prob",binary_prob)
self.gamma = Param("binary_prob",binary_prob,Logistic(0.,1.))
self.link_parameter(self.gamma)
@Cache_this(limit=5)
def gamma_probabilities(self):
prob = np.zeros_like(param_to_array(self.gamma))
prob[self.gamma>-710] = 1./(1.+np.exp(-self.gamma[self.gamma>-710]))
prob1 = -np.zeros_like(param_to_array(self.gamma))
prob1[self.gamma<710] = 1./(1.+np.exp(self.gamma[self.gamma<710]))
return prob, prob1
@Cache_this(limit=5)
def gamma_log_prob(self):
loggamma = param_to_array(self.gamma).copy()
loggamma[loggamma>-40] = -np.log1p(np.exp(-loggamma[loggamma>-40]))
loggamma1 = -param_to_array(self.gamma).copy()
loggamma1[loggamma1>-40] = -np.log1p(np.exp(-loggamma1[loggamma1>-40]))
return loggamma,loggamma1
def set_gradients(self, grad):
self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad

View file

@ -19,7 +19,7 @@ class SparseGP(GP):
This model allows (approximate) inference using variational DTC or FITC
(Gaussian likelihoods) as well as non-conjugate sparse methods based on
these.
This is not for missing data, as the implementation for missing data involves
some inefficient optimization routine decisions.
See missing data SparseGP implementation in py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'.
@ -39,7 +39,7 @@ class SparseGP(GP):
"""
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, inference_method=None,
name='sparse gp', Y_metadata=None, normalizer=False):
#pick a sensible inference method
if inference_method is None:
@ -53,7 +53,7 @@ class SparseGP(GP):
self.Z = Param('inducing inputs', Z)
self.num_inducing = Z.shape[0]
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
GP.__init__(self, X, Y, kernel, likelihood, mean_function, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
logger.info("Adding Z as parameter")
self.link_parameter(self.Z, index=0)
@ -61,7 +61,7 @@ class SparseGP(GP):
def has_uncertain_inputs(self):
return isinstance(self.X, VariationalPosterior)
def set_Z(self, Z, trigger_update=True):
if trigger_update: self.update_model(False)
self.unlink_parameter(self.Z)
@ -110,8 +110,8 @@ class SparseGP(GP):
def _raw_predict(self, Xnew, full_cov=False, kern=None):
"""
Make a prediction for the latent function values.
Make a prediction for the latent function values.
For certain inputs we give back a full_cov of shape NxN,
if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of,
we take only the diagonal elements across N.
@ -136,6 +136,9 @@ class SparseGP(GP):
else:
Kxx = kern.Kdiag(Xnew)
var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
#add in the mean function
if self.mean_function is not None:
mu += self.mean_function.f(Xnew)
else:
psi0_star = self.kern.psi0(self.Z, Xnew)
psi1_star = self.kern.psi1(self.Z, Xnew)
@ -165,4 +168,5 @@ class SparseGP(GP):
var[i] = var_
else:
var[i] = np.diag(var_)+p0-t2
return mu, var

View file

@ -9,7 +9,7 @@ from ..inference.latent_function_inference import SVGP as svgp_inf
class SVGP(SparseGP):
def __init__(self, X, Y, Z, kernel, likelihood, name='SVGP', Y_metadata=None, batchsize=None):
def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, name='SVGP', Y_metadata=None, batchsize=None):
"""
Stochastic Variational GP.
@ -38,7 +38,7 @@ class SVGP(SparseGP):
#create the SVI inference method
inf_method = svgp_inf()
SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, inference_method=inf_method,
SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, mean_function=mean_function, inference_method=inf_method,
name=name, Y_metadata=Y_metadata, normalizer=False)
self.m = Param('q_u_mean', np.zeros((self.num_inducing, Y.shape[1])))
@ -48,7 +48,7 @@ class SVGP(SparseGP):
self.link_parameter(self.m)
def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.q_u_mean, self.q_u_chol, self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata, KL_scale=1.0, batch_scale=float(self.X_all.shape[0])/float(self.X.shape[0]))
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.q_u_mean, self.q_u_chol, self.kern, self.X, self.Z, self.likelihood, self.Y, self.mean_function, self.Y_metadata, KL_scale=1.0, batch_scale=float(self.X_all.shape[0])/float(self.X.shape[0]))
#update the kernel gradients
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z)
@ -65,6 +65,13 @@ class SVGP(SparseGP):
self.m.gradient = self.grad_dict['dL_dm']
self.chol.gradient = self.grad_dict['dL_dchol']
if self.mean_function is not None:
self.mean_function.update_gradients(self.grad_dict['dL_dmfX'], self.X)
g = self.mean_function.gradient[:].copy()
self.mean_function.update_gradients(self.grad_dict['dL_dmfZ'], self.Z)
self.mean_function.gradient[:] += g
self.Z.gradient[:] += self.mean_function.gradients_X(self.grad_dict['dL_dmfZ'], self.Z)
def set_data(self, X, Y):
"""
Set the data without calling parameters_changed to avoid wasted computation

View file

@ -505,3 +505,48 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
print m
return m
def simple_mean_function(max_iters=100, optimize=True, plot=True):
"""
The simplest possible mean function. No parameters, just a simple Sinusoid.
"""
#create simple mean function
mf = GPy.core.Mapping(1,1)
mf.f = np.sin
mf.update_gradients = lambda a,b: None
X = np.linspace(0,10,50).reshape(-1,1)
Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape)
k =GPy.kern.RBF(1)
lik = GPy.likelihoods.Gaussian()
m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
if optimize:
m.optimize(max_iters=max_iters)
if plot:
m.plot(plot_limits=(-10,15))
return m
def parametric_mean_function(max_iters=100, optimize=True, plot=True):
"""
A linear mean function with parameters that we'll learn alongside the kernel
"""
#create simple mean function
mf = GPy.core.Mapping(1,1)
mf.f = np.sin
X = np.linspace(0,10,50).reshape(-1,1)
Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape) + 3*X
mf = GPy.mappings.Linear(1,1)
k =GPy.kern.RBF(1)
lik = GPy.likelihoods.Gaussian()
m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
if optimize:
m.optimize(max_iters=max_iters)
if plot:
m.plot()
return m

View file

@ -50,19 +50,19 @@ class InferenceMethodList(LatentFunctionInference, list):
def on_optimization_end(self):
for inf in self:
inf.on_optimization_end()
def __getstate__(self):
state = []
for inf in self:
state.append(inf)
return state
def __setstate__(self, state):
for inf in state:
self.append(inf)
from exact_gaussian_inference import ExactGaussianInference
from laplace import Laplace
from laplace import Laplace, LaplaceBlock
from GPy.inference.latent_function_inference.var_dtc import VarDTC
from expectation_propagation import EP
from expectation_propagation_dtc import EPDTC
@ -78,9 +78,9 @@ from svgp import SVGP
# class EMLikeLatentFunctionInference(LatentFunctionInference):
# def update_approximation(self):
# """
# This function gets called when the
# This function gets called when the
# """
#
#
# def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
# """
# Do inference on the latent functions given a covariance function `kern`,
@ -88,7 +88,7 @@ from svgp import SVGP
# Additional metadata for the outputs `Y` can be given in `Y_metadata`.
# """
# raise NotImplementedError, "Abstract base class for full inference"
#
#
# class VariationalLatentFunctionInference(LatentFunctionInference):
# def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
# """

View file

@ -20,7 +20,8 @@ class DTC(LatentFunctionInference):
def __init__(self):
self.const_jitter = 1e-6
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
assert mean_function is None, "inference with a mean function not implemented"
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
num_inducing, _ = Z.shape
@ -88,7 +89,8 @@ class vDTC(object):
def __init__(self):
self.const_jitter = 1e-6
def inference(self, kern, X, X_variance, Z, likelihood, Y, Y_metadata):
def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
assert mean_function is None, "inference with a mean function not implemented"
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
num_inducing, _ = Z.shape

View file

@ -36,11 +36,18 @@ class ExactGaussianInference(LatentFunctionInference):
#print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!"
return Y
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None):
"""
Returns a Posterior class containing essential quantities of the posterior
"""
YYT_factor = self.get_YYTfactor(Y)
if mean_function is None:
m = 0
else:
m = mean_function.f(X)
YYT_factor = self.get_YYTfactor(Y-m)
K = kern.K(X)
@ -56,4 +63,4 @@ class ExactGaussianInference(LatentFunctionInference):
dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK),Y_metadata)
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}

View file

@ -33,15 +33,19 @@ class EP(LatentFunctionInference):
# TODO: update approximation in the end as well? Maybe even with a switch?
pass
def inference(self, kern, X, likelihood, Y, Y_metadata=None, Z=None):
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, Z=None):
assert mean_function is None, "inference with a mean function not implemented"
num_data, output_dim = Y.shape
assert output_dim ==1, "ep in 1D only (for now!)"
K = kern.K(X)
if self._ep_approximation is None:
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
else:
#if we've already run EP, just use the existing approximation stored in self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation
Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde))

View file

@ -64,7 +64,8 @@ class EPDTC(LatentFunctionInference):
self.old_mutilde, self.old_vtilde = None, None
self._ep_approximation = None
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
assert mean_function is None, "inference with a mean function not implemented"
num_data, output_dim = Y.shape
assert output_dim ==1, "ep in 1D only (for now!)"

View file

@ -18,7 +18,8 @@ class FITC(LatentFunctionInference):
"""
const_jitter = 1e-6
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
assert mean_function is None, "inference with a mean function not implemented"
num_inducing, _ = Z.shape
num_data, output_dim = Y.shape

View file

@ -39,10 +39,11 @@ class Laplace(LatentFunctionInference):
self.first_run = True
self._previous_Ki_fhat = None
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None):
"""
Returns a Posterior class containing essential quantities of the posterior
"""
assert mean_function is None, "inference with a mean function not implemented"
# Compute K
K = kern.K(X)
@ -50,21 +51,25 @@ class Laplace(LatentFunctionInference):
#Find mode
if self.bad_fhat or self.first_run:
Ki_f_init = np.zeros_like(Y)
first_run = False
self.first_run = False
else:
Ki_f_init = self._previous_Ki_fhat
Ki_f_init = np.zeros_like(Y)# FIXME: take this out
f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
self.f_hat = f_hat
self.Ki_fhat = Ki_fhat
self.K = K.copy()
#self.Ki_fhat = Ki_fhat
#self.K = K.copy()
#Compute hessian and other variables at mode
log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
self._previous_Ki_fhat = Ki_fhat.copy()
return Posterior(woodbury_vector=Ki_fhat, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None):
def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None, *args, **kwargs):
"""
Rasmussen's numerically stable mode finding
For nomenclature see Rasmussen & Williams 2006
@ -89,7 +94,12 @@ class Laplace(LatentFunctionInference):
#define the objective function (to be maximised)
def obj(Ki_f, f):
return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata))
ll = -0.5*np.sum(np.dot(Ki_f.T, f)) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata))
if np.isnan(ll):
return -np.inf
else:
return ll
difference = np.inf
iteration = 0
@ -104,7 +114,7 @@ class Laplace(LatentFunctionInference):
W_f = W*f
b = W_f + grad # R+W p46 line 6.
W12BiW12, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave)
W12BiW12, _, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave, *args, **kwargs)
W12BiW12Kb = np.dot(W12BiW12, np.dot(K, b))
#Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
@ -121,7 +131,9 @@ class Laplace(LatentFunctionInference):
step = optimize.brent(inner_obj, tol=1e-4, maxiter=12)
Ki_f_new = Ki_f + step*dKi_f
f_new = np.dot(K, Ki_f_new)
#print "new {} vs old {}".format(obj(Ki_f_new, f_new), obj(Ki_f, f))
if obj(Ki_f_new, f_new) < obj(Ki_f, f):
raise ValueError("Shouldn't happen, brent optimization failing")
difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f))
Ki_f = Ki_f_new
f = f_new
@ -152,14 +164,10 @@ class Laplace(LatentFunctionInference):
if np.any(np.isnan(W)):
raise ValueError('One or more element(s) of W is NaN')
K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave)
#compute vital matrices
C = np.dot(LiW12, K)
Ki_W_i = K - C.T.dot(C)
K_Wi_i, logdet_I_KW, I_KW_i, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave)
#compute the log marginal
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + np.sum(likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata)) - np.sum(np.log(np.diag(L)))
log_marginal = -0.5*np.sum(np.dot(Ki_f.T, f_hat)) + np.sum(likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata)) - 0.5*logdet_I_KW
# Compute matrices for derivatives
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
@ -196,23 +204,23 @@ class Laplace(LatentFunctionInference):
dL_dthetaL = np.zeros(num_params)
for thetaL_i in range(num_params):
#Explicit
dL_dthetaL_exp = ( np.sum(dlik_dthetaL[thetaL_i])
dL_dthetaL_exp = ( np.sum(dlik_dthetaL[thetaL_i,:, :])
# The + comes from the fact that dlik_hess_dthetaL == -dW_dthetaL
+ 0.5*np.sum(np.diag(Ki_W_i).flatten()*dlik_hess_dthetaL[:, thetaL_i].flatten())
+ 0.5*np.sum(np.diag(Ki_W_i)*np.squeeze(dlik_hess_dthetaL[thetaL_i, :, :]))
)
#Implicit
dfhat_dthetaL = mdot(I_KW_i, K, dlik_grad_dthetaL[:, thetaL_i])
#dfhat_dthetaL = mdot(Ki_W_i, dlik_grad_dthetaL[:, thetaL_i])
dfhat_dthetaL = mdot(I_KW_i, K, dlik_grad_dthetaL[thetaL_i, :, :])
#dfhat_dthetaL = mdot(Ki_W_i, dlik_grad_dthetaL[thetaL_i, :, :])
dL_dthetaL_imp = np.dot(dL_dfhat.T, dfhat_dthetaL)
dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp
dL_dthetaL[thetaL_i] = np.sum(dL_dthetaL_exp + dL_dthetaL_imp)
else:
dL_dthetaL = np.zeros(likelihood.size)
return log_marginal, K_Wi_i, dL_dK, dL_dthetaL
def _compute_B_statistics(self, K, W, log_concave):
def _compute_B_statistics(self, K, W, log_concave, *args, **kwargs):
"""
Rasmussen suggests the use of a numerically stable positive definite matrix B
Which has a positive diagonal elements and can be easily inverted
@ -225,7 +233,7 @@ class Laplace(LatentFunctionInference):
"""
if not log_concave:
#print "Under 1e-10: {}".format(np.sum(W < 1e-6))
W[W<1e-6] = 1e-6
W = np.clip(W, 1e-6, 1e+30)
# NOTE: when setting a parameter inside parameters_changed it will allways come to closed update circles!!!
#W.__setitem__(W < 1e-6, 1e-6, update=False) # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
@ -247,5 +255,160 @@ class Laplace(LatentFunctionInference):
#K_Wi_i_2 , _= dpotri(L2)
#symmetrify(K_Wi_i_2)
return K_Wi_i, L, LiW12
#compute vital matrices
C = np.dot(LiW12, K)
Ki_W_i = K - C.T.dot(C)
I_KW_i = np.eye(K.shape[0]) - np.dot(K, K_Wi_i)
logdet_I_KW = 2*np.sum(np.log(np.diag(L)))
return K_Wi_i, logdet_I_KW, I_KW_i, Ki_W_i
class LaplaceBlock(Laplace):
def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None, *args, **kwargs):
Ki_f = Ki_f_init.copy()
f = np.dot(K, Ki_f)
#define the objective function (to be maximised)
def obj(Ki_f, f):
ll = -0.5*np.dot(Ki_f.T, f) + np.sum(likelihood.logpdf_sum(f, Y, Y_metadata=Y_metadata))
if np.isnan(ll):
return -np.inf
else:
return ll
difference = np.inf
iteration = 0
I = np.eye(K.shape[0])
while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter:
W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata)
W[np.diag_indices_from(W)] = np.clip(np.diag(W), 1e-6, 1e+30)
W_f = np.dot(W, f)
grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata)
b = W_f + grad # R+W p46 line 6.
K_Wi_i, _, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave, *args, **kwargs)
#Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
#a = (I - (K+Wi)i*K)*b
full_step_Ki_f = np.dot(I - np.dot(K_Wi_i, K), b)
dKi_f = full_step_Ki_f - Ki_f
#define an objective for the line search (minimize this one)
def inner_obj(step_size):
Ki_f_trial = Ki_f + step_size*dKi_f
f_trial = np.dot(K, Ki_f_trial)
return -obj(Ki_f_trial, f_trial)
#use scipy for the line search, the compute new values of f, Ki_f
step = optimize.brent(inner_obj, tol=1e-4, maxiter=12)
Ki_f_new = Ki_f + step*dKi_f
f_new = np.dot(K, Ki_f_new)
difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f))
Ki_f = Ki_f_new
f = f_new
iteration += 1
#Warn of bad fits
if difference > self._mode_finding_tolerance:
if not self.bad_fhat:
warnings.warn("Not perfect f_hat fit difference: {}".format(difference))
self._previous_Ki_fhat = np.zeros_like(Y)
self.bad_fhat = True
elif self.bad_fhat:
self.bad_fhat = False
warnings.warn("f_hat now fine again")
if iteration > self._mode_finding_max_iter:
warnings.warn("didn't find the best")
return f, Ki_f
def mode_computations(self, f_hat, Ki_f, K, Y, likelihood, kern, Y_metadata):
#At this point get the hessian matrix (or vector as W is diagonal)
W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
W[np.diag_indices_from(W)] = np.clip(np.diag(W), 1e-6, 1e+30)
K_Wi_i, log_B_det, I_KW_i, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave)
#compute the log marginal
#FIXME: The derterminant should be output_dim*0.5 I think, gradients may now no longer check
log_marginal = -0.5*np.dot(f_hat.T, Ki_f) + np.sum(likelihood.logpdf_sum(f_hat, Y, Y_metadata=Y_metadata)) - 0.5*log_B_det
#Compute vival matrices for derivatives
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
#dL_dfhat = np.zeros((f_hat.shape[0]))
#for i in range(f_hat.shape[0]):
#dL_dfhat[i] = -0.5*np.trace(np.dot(Ki_W_i, dW_df[:,:,i]))
dL_dfhat = -0.5*np.einsum('ij,ijk->k', Ki_W_i, dW_df)
woodbury_vector = likelihood.dlogpdf_df(f_hat, Y, Y_metadata=Y_metadata)
####################
#compute dL_dK#
####################
if kern.size > 0 and not kern.is_fixed:
#Explicit
explicit_part = 0.5*(np.dot(Ki_f, Ki_f.T) - K_Wi_i)
#Implicit
implicit_part = woodbury_vector.dot(dL_dfhat[None,:]).dot(I_KW_i)
#implicit_part = Ki_f.dot(dL_dfhat[None,:]).dot(I_KW_i)
dL_dK = explicit_part + implicit_part
else:
dL_dK = np.zeros_like(K)
####################
#compute dL_dthetaL#
####################
if likelihood.size > 0 and not likelihood.is_fixed:
raise NotImplementedError
else:
dL_dthetaL = np.zeros(likelihood.size)
#self.K_Wi_i = K_Wi_i
#self.Ki_W_i = Ki_W_i
#self.W = W
#self.K = K
#self.dL_dfhat = dL_dfhat
#self.explicit_part = explicit_part
#self.implicit_part = implicit_part
return log_marginal, K_Wi_i, dL_dK, dL_dthetaL
def _compute_B_statistics(self, K, W, log_concave, *args, **kwargs):
"""
Rasmussen suggests the use of a numerically stable positive definite matrix B
Which has a positive diagonal element and can be easyily inverted
:param K: Prior Covariance matrix evaluated at locations X
:type K: NxN matrix
:param W: Negative hessian at a point (diagonal matrix)
:type W: Vector of diagonal values of hessian (1xN)
:returns: (K_Wi_i, L_B, not_provided)
"""
#w = GPy.util.diag.view(W)
#W[:] = np.where(w<1e-6, 1e-6, w)
#B = I + KW
B = np.eye(K.shape[0]) + np.dot(K, W)
#Bi, L, Li, logdetB = pdinv(B)
Bi = np.linalg.inv(B)
#K_Wi_i = np.eye(K.shape[0]) - mdot(W, Bi, K)
K_Wi_i = np.dot(W, Bi)
#self.K_Wi_i_brute = np.linalg.inv(K + np.linalg.inv(W))
#self.B = B
#self.Bi = Bi
Ki_W_i = np.dot(Bi, K)
sign, logdetB = np.linalg.slogdet(B)
return K_Wi_i, sign*logdetB, Bi, Ki_W_i

View file

@ -15,7 +15,7 @@ class Posterior(object):
the function at any new point x_* by integrating over this posterior.
"""
def __init__(self, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None, woodbury_inv=None):
def __init__(self, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None, woodbury_inv=None, prior_mean=0):
"""
woodbury_chol : a lower triangular matrix L that satisfies posterior_covariance = K - K L^{-T} L^{-1} K
woodbury_vector : a matrix (or vector, as Nx1 matrix) M which satisfies posterior_mean = K M
@ -67,6 +67,7 @@ class Posterior(object):
#option 2:
self._mean = mean
self._covariance = cov
self._prior_mean = prior_mean
#compute this lazily
self._precision = None
@ -175,7 +176,7 @@ class Posterior(object):
$$
"""
if self._woodbury_vector is None:
self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean)
self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean - self._prior_mean)
return self._woodbury_vector
@property

View file

@ -6,7 +6,8 @@ from posterior import Posterior
class SVGP(LatentFunctionInference):
def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, Y_metadata=None, KL_scale=1.0, batch_scale=1.0):
def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None, KL_scale=1.0, batch_scale=1.0):
num_inducing = Z.shape[0]
num_data, num_outputs = Y.shape
@ -22,6 +23,15 @@ class SVGP(LatentFunctionInference):
#S = S + np.eye(S.shape[0])*1e-5*np.max(np.max(S))
#Si, Lnew, _,_ = linalg.pdinv(S)
#compute mean function stuff
if mean_function is not None:
prior_mean_u = mean_function.f(Z)
prior_mean_f = mean_function.f(X)
else:
prior_mean_u = np.zeros((num_inducing, num_outputs))
prior_mean_f = np.zeros((num_data, num_outputs))
#compute kernel related stuff
Kmm = kern.K(Z)
Knm = kern.K(X, Z)
@ -30,29 +40,43 @@ class SVGP(LatentFunctionInference):
#compute the marginal means and variances of q(f)
A = np.dot(Knm, Kmmi)
mu = np.dot(A, q_u_mean)
mu = prior_mean_f + np.dot(A, q_u_mean - prior_mean_u)
v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * np.einsum('ij,jkl->ikl', A, S),1)
#compute the KL term
Kmmim = np.dot(Kmmi, q_u_mean)
KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.einsum('ij,ijk->k', Kmmi, S) + 0.5*np.sum(q_u_mean*Kmmim,0)
KL = KLs.sum()
dKL_dm = Kmmim
#gradient of the KL term (assuming zero mean function)
dKL_dm = Kmmim.copy()
dKL_dS = 0.5*(Kmmi[:,:,None] - Si)
dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(-1)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T)
if mean_function is not None:
#adjust KL term for mean function
Kmmi_mfZ = np.dot(Kmmi, prior_mean_u)
KL += -np.sum(q_u_mean*Kmmi_mfZ)
KL += 0.5*np.sum(Kmmi_mfZ*prior_mean_u)
#adjust gradient for mean fucntion
dKL_dm -= Kmmi_mfZ
dKL_dKmm += Kmmim.dot(Kmmi_mfZ.T)
dKL_dKmm -= 0.5*Kmmi_mfZ.dot(Kmmi_mfZ.T)
#compute gradients for mean_function
dKL_dmfZ = Kmmi_mfZ - Kmmim
#quadrature for the likelihood
F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v, Y_metadata=Y_metadata)
#rescale the F term if working on a batch
F, dF_dmu, dF_dv = F*batch_scale, dF_dmu*batch_scale, dF_dv*batch_scale
if dF_dthetaL is not None:
dF_dthetaL = dF_dthetaL.sum(1).sum(1)*batch_scale
#derivatives of expected likelihood
#derivatives of expected likelihood, assuming zero mean function
Adv = A.T[:,:,None]*dF_dv[None,:,:] # As if dF_Dv is diagonal
Admu = A.T.dot(dF_dmu)
#AdvA = np.einsum('ijk,jl->ilk', Adv, A)
#AdvA = np.dot(A.T, Adv).swapaxes(0,1)
AdvA = np.dstack([np.dot(A.T, Adv[:,:,i].T) for i in range(num_outputs)])
tmp = np.einsum('ijk,jlk->il', AdvA, S).dot(Kmmi)
dF_dKmm = -Admu.dot(Kmmim.T) + AdvA.sum(-1) - tmp - tmp.T
@ -62,6 +86,14 @@ class SVGP(LatentFunctionInference):
dF_dm = Admu
dF_dS = AdvA
#adjust gradient to account for mean function
if mean_function is not None:
dF_dmfX = dF_dmu.copy()
dF_dmfZ = -Admu
dF_dKmn -= np.dot(Kmmi_mfZ, dF_dmu.T)
dF_dKmm += Admu.dot(Kmmi_mfZ.T)
#sum (gradients of) expected likelihood and KL part
log_marginal = F.sum() - KL
dL_dm, dL_dS, dL_dKmm, dL_dKmn = dF_dm - dKL_dm, dF_dS- dKL_dS, dF_dKmm- dKL_dKmm, dF_dKmn
@ -69,4 +101,8 @@ class SVGP(LatentFunctionInference):
dL_dchol = np.dstack([2.*np.dot(dL_dS[:,:,i], L[:,:,i]) for i in range(num_outputs)])
dL_dchol = choleskies.triang_to_flat(dL_dchol)
return Posterior(mean=q_u_mean, cov=S, K=Kmm), log_marginal, {'dL_dKmm':dL_dKmm, 'dL_dKmn':dL_dKmn, 'dL_dKdiag': dF_dv, 'dL_dm':dL_dm, 'dL_dchol':dL_dchol, 'dL_dthetaL':dF_dthetaL}
grad_dict = {'dL_dKmm':dL_dKmm, 'dL_dKmn':dL_dKmn, 'dL_dKdiag': dF_dv.sum(1), 'dL_dm':dL_dm, 'dL_dchol':dL_dchol, 'dL_dthetaL':dF_dthetaL}
if mean_function is not None:
grad_dict['dL_dmfZ'] = dF_dmfZ - dKL_dmfZ
grad_dict['dL_dmfX'] = dF_dmfX
return Posterior(mean=q_u_mean, cov=S, K=Kmm, prior_mean=prior_mean_u), log_marginal, grad_dict

View file

@ -169,11 +169,13 @@ class VarDTC_minibatch(LatentFunctionInference):
Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter)
Lm = jitchol(Kmm, maxtries=100)
if not np.isfinite(Kmm).all():
print Kmm
Lm = jitchol(Kmm)
LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
LL = jitchol(Lambda, maxtries=100)
LL = jitchol(Lambda)
logdet_L = 2.*np.sum(np.log(np.diag(LL)))
b = dtrtrs(LL,dtrtrs(Lm,psi1Y_full.T)[0])[0]
bbt = np.square(b).sum()

View file

@ -16,5 +16,6 @@ from _src.poly import Poly
from _src.eq_ode2 import EQ_ODE2
from _src.trunclinear import TruncLinear,TruncLinear_inf
from _src.splitKern import SplitKern,DiffGenomeKern
from _src.splitKern import SplitKern,DEtime
from _src.splitKern import DEtime as DiffGenomeKern

View file

@ -6,6 +6,20 @@ from kern import CombinationKernel
from ...util.caching import Cache_this
import itertools
def numpy_invalid_op_as_exception(func):
"""
A decorator that allows catching numpy invalid operations
as exceptions (the default behaviour is raising warnings).
"""
def func_wrapper(*args, **kwargs):
np.seterr(invalid='raise')
result = func(*args, **kwargs)
np.seterr(invalid='warn')
return result
return func_wrapper
class Prod(CombinationKernel):
"""
Computes the product of 2 kernels
@ -46,18 +60,20 @@ class Prod(CombinationKernel):
self.parts[0].update_gradients_full(dL_dK*self.parts[1].K(X,X2), X, X2)
self.parts[1].update_gradients_full(dL_dK*self.parts[0].K(X,X2), X, X2)
else:
k = self.K(X,X2)*dL_dK
for p in self.parts:
p.update_gradients_full(k/p.K(X,X2),X,X2)
for combination in itertools.combinations(self.parts, len(self.parts) - 1):
prod = reduce(np.multiply, [p.K(X, X2) for p in combination])
to_update = list(set(self.parts) - set(combination))[0]
to_update.update_gradients_full(dL_dK * prod, X, X2)
def update_gradients_diag(self, dL_dKdiag, X):
if len(self.parts)==2:
self.parts[0].update_gradients_diag(dL_dKdiag*self.parts[1].Kdiag(X), X)
self.parts[1].update_gradients_diag(dL_dKdiag*self.parts[0].Kdiag(X), X)
else:
k = self.Kdiag(X)*dL_dKdiag
for p in self.parts:
p.update_gradients_diag(k/p.Kdiag(X),X)
for combination in itertools.combinations(self.parts, len(self.parts) - 1):
prod = reduce(np.multiply, [p.Kdiag(X) for p in combination])
to_update = list(set(self.parts) - set(combination))[0]
to_update.update_gradients_diag(dL_dKdiag * prod, X)
def gradients_X(self, dL_dK, X, X2=None):
target = np.zeros(X.shape)
@ -65,9 +81,10 @@ class Prod(CombinationKernel):
target += self.parts[0].gradients_X(dL_dK*self.parts[1].K(X, X2), X, X2)
target += self.parts[1].gradients_X(dL_dK*self.parts[0].K(X, X2), X, X2)
else:
k = self.K(X,X2)*dL_dK
for p in self.parts:
target += p.gradients_X(k/p.K(X,X2),X,X2)
for combination in itertools.combinations(self.parts, len(self.parts) - 1):
prod = reduce(np.multiply, [p.K(X, X2) for p in combination])
to_update = list(set(self.parts) - set(combination))[0]
target += to_update.gradients_X(dL_dK * prod, X, X2)
return target
def gradients_X_diag(self, dL_dKdiag, X):
@ -80,3 +97,5 @@ class Prod(CombinationKernel):
for p in self.parts:
target += p.gradients_X_diag(k/p.Kdiag(X),X)
return target

View file

@ -37,11 +37,11 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variati
# Compute for psi0 and psi1
mu2S = np.square(mu)+S
dL_dvar += np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu)
dL_dgamma += np.einsum('n,q,nq->nq',dL_dpsi0,variance,mu2S) + np.einsum('nm,q,mq,nq->nq',dL_dpsi1,variance,Z,mu)
dL_dmu += np.einsum('n,nq,q,nq->nq',dL_dpsi0,gamma,2.*variance,mu) + np.einsum('nm,nq,q,mq->nq',dL_dpsi1,gamma,variance,Z)
dL_dS += np.einsum('n,nq,q->nq',dL_dpsi0,gamma,variance)
dL_dZ += np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, variance,mu)
dL_dvar += (dL_dpsi0[:,None]*gamma*mu2S).sum(axis=0) + (dL_dpsi1.T.dot(gamma*mu)*Z).sum(axis=0)
dL_dgamma += dL_dpsi0[:,None]*variance*mu2S+ dL_dpsi1.dot(Z)*mu*variance
dL_dmu += dL_dpsi0[:,None]*2.*variance*gamma*mu + dL_dpsi1.dot(Z)*gamma*variance
dL_dS += dL_dpsi0[:,None]*variance*gamma
dL_dZ += dL_dpsi1.T.dot(gamma*mu)*variance
return dL_dvar, dL_dZ, dL_dmu, dL_dS, dL_dgamma
@ -64,29 +64,23 @@ def _psi2computations(dL_dpsi2, variance, Z, mu, S, gamma):
gamma2 = np.square(gamma)
variance2 = np.square(variance)
mu2S = mu2+S # NxQ
gvm = np.einsum('nq,nq,q->nq',gamma,mu,variance)
common_sum = np.einsum('nq,mq->nm',gvm,Z)
# common_sum = np.einsum('nq,q,mq,nq->nm',gamma,variance,Z,mu) # NxM
Z_expect = np.einsum('mo,mq,oq->q',dL_dpsi2,Z,Z)
gvm = gamma*mu*variance
common_sum = gvm.dot(Z.T)
Z_expect = (np.dot(dL_dpsi2,Z)*Z).sum(axis=0)
Z_expect_var2 = Z_expect*variance2
dL_dpsi2T = dL_dpsi2+dL_dpsi2.T
tmp = np.einsum('mo,oq->mq',dL_dpsi2T,Z)
common_expect = np.einsum('mq,nm->nq',tmp,common_sum)
# common_expect = np.einsum('mo,mq,no->nq',dL_dpsi2+dL_dpsi2.T,Z,common_sum)
Z2_expect = np.einsum('om,nm->no',dL_dpsi2T,common_sum)
Z1_expect = np.einsum('om,mq->oq',dL_dpsi2T,Z)
common_expect = common_sum.dot(dL_dpsi2T).dot(Z)
Z2_expect = common_sum.dot(dL_dpsi2T)
Z1_expect = dL_dpsi2T.dot(Z)
dL_dvar = np.einsum('nq,q,q->q',2.*(gamma*mu2S-gamma2*mu2),variance,Z_expect)+\
np.einsum('nq,nq,nq->q',common_expect,gamma,mu)
dL_dvar = variance*Z_expect*2.*(gamma*mu2S-gamma2*mu2).sum(axis=0)+(common_expect*gamma*mu).sum(axis=0)
dL_dgamma = np.einsum('q,q,nq->nq',Z_expect,variance2,(mu2S-2.*gamma*mu2))+\
np.einsum('nq,q,nq->nq',common_expect,variance,mu)
dL_dgamma = Z_expect_var2*(mu2S-2.*gamma*mu2)+common_expect*mu*variance
dL_dmu = Z_expect_var2*mu*2.*(gamma-gamma2) + common_expect*gamma*variance
dL_dS = gamma*Z_expect_var2
dL_dmu = np.einsum('q,q,nq,nq->nq',Z_expect,variance2,mu,2.*(gamma-gamma2))+\
np.einsum('nq,nq,q->nq',common_expect,gamma,variance)
dL_dS = np.einsum('q,nq,q->nq',Z_expect,gamma,variance2)
# dL_dZ = 2.*(np.einsum('om,nq,q,mq,nq->oq',dL_dpsi2,gamma,variance2,Z,(mu2S-gamma*mu2))+np.einsum('om,nq,q,nq,nm->oq',dL_dpsi2,gamma,variance,mu,common_sum))
dL_dZ = Z1_expect*np.einsum('nq,q,nq->q',gamma,variance2,(mu2S-gamma*mu2))+np.einsum('nq,q,nq,nm->mq',gamma,variance,mu,Z2_expect)
dL_dZ = (gamma*(mu2S-gamma*mu2)).sum(axis=0)*variance2*Z1_expect+ Z2_expect.T.dot(gamma*mu)*variance
return dL_dvar, dL_dgamma, dL_dmu, dL_dS, dL_dZ

View file

@ -22,12 +22,14 @@ try:
# _psi1 NxM
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
l2 = np.square(lengthscale)
log_denom1 = np.log(S/l2+1)
log_denom2 = np.log(2*S/l2+1)
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
log_gamma = np.log(gamma)
log_gamma1 = np.log(1.-gamma)
variance = float(variance)
psi0 = np.empty(N)
psi0[:] = variance
@ -37,6 +39,7 @@ try:
from ....util.misc import param_to_array
S = param_to_array(S)
mu = param_to_array(mu)
gamma = param_to_array(gamma)
Z = param_to_array(Z)
support_code = """
@ -79,7 +82,7 @@ try:
}
}
"""
weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz)
weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz)
psi2 = psi2n.sum(axis=0)
return psi0,psi1,psi2,psi2n
@ -94,12 +97,13 @@ try:
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
l2 = np.square(lengthscale)
log_denom1 = np.log(S/l2+1)
log_denom2 = np.log(2*S/l2+1)
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
gamma, gamma1 = variational_posterior.gamma_probabilities()
log_gamma = np.log(gamma)
log_gamma1 = np.log(1.-gamma)
variance = float(variance)
dvar = np.zeros(1)
@ -113,6 +117,7 @@ try:
from ....util.misc import param_to_array
S = param_to_array(S)
mu = param_to_array(mu)
gamma = param_to_array(gamma)
Z = param_to_array(Z)
support_code = """
@ -130,7 +135,6 @@ try:
double Zm1q = Z(m1,q);
double Zm2q = Z(m2,q);
double gnq = gamma(n,q);
double g1nq = gamma1(n,q);
double mu_nq = mu(n,q);
if(m2==0) {
@ -156,7 +160,7 @@ try:
dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum);
dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.;
dgamma(n,q) += lpsi1*(d_exp1*g1nq-d_exp2*gnq)/exp_sum;
dgamma(n,q) += lpsi1*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
dl(q) += lpsi1*((Zmu2_denom+Snq/lq)/denom*d_exp1+Zm1q*Zm1q/(lq*lq)*d_exp2)/(2.*exp_sum);
dZ(m1,q) += lpsi1*(-Zmu/denom*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
}
@ -184,7 +188,7 @@ try:
dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum;
dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum;
dgamma(n,q) += lpsi2*(d_exp1*g1nq-d_exp2*gnq)/exp_sum;
dgamma(n,q) += lpsi2*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum;
dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
}
@ -192,7 +196,7 @@ try:
}
}
"""
weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','gamma1','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz)
weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz)
dl *= 2.*lengthscale
if not ARD:

View file

@ -7,7 +7,7 @@ from kern import Kern,CombinationKernel
from .independent_outputs import index_to_slices
import itertools
class DiffGenomeKern(Kern):
class DEtime(Kern):
def __init__(self, kernel, idx_p, Xp, index_dim=-1, name='DiffGenomeKern'):
self.idx_p = idx_p

View file

@ -60,7 +60,10 @@ class White(Static):
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.trace(dL_dK)
if X2 is None:
self.variance.gradient = np.trace(dL_dK)
else:
self.variance.gradient = 0.
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = dL_dKdiag.sum()

View file

@ -296,6 +296,8 @@ class Exponential(Stationary):
return -0.5*self.K_of_r(r)
class OU(Stationary):
"""
OU kernel:

View file

@ -77,7 +77,7 @@ class Bernoulli(Likelihood):
return Z_hat, mu_hat, sigma2_hat
def variational_expectations(self, Y, m, v, gh_points=None):
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
if isinstance(self.gp_link, link_functions.Probit):
if gh_points is None:

View file

@ -34,7 +34,9 @@ class Gaussian(Likelihood):
if gp_link is None:
gp_link = link_functions.Identity()
assert isinstance(gp_link, link_functions.Identity), "the likelihood only implemented for the identity link"
if not isinstance(gp_link, link_functions.Identity):
print "Warning, Exact inference is not implemeted for non-identity link functions,\
if you are not already, ensure Laplace inference_method is used"
super(Gaussian, self).__init__(gp_link, name=name)
@ -263,16 +265,19 @@ class Gaussian(Likelihood):
return d2logpdf_dlink2_dvar
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
return dlogpdf_dvar
dlogpdf_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
dlogpdf_dtheta[0,:,:] = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
return dlogpdf_dtheta
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
return dlogpdf_dlink_dvar
dlogpdf_dlink_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
dlogpdf_dlink_dtheta[0, :, :]= self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
return dlogpdf_dlink_dtheta
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
return d2logpdf_dlink2_dvar
d2logpdf_dlink2_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
d2logpdf_dlink2_dtheta[0, :, :] = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
return d2logpdf_dlink2_dtheta
def _mean(self, gp):
"""
@ -305,18 +310,17 @@ class Gaussian(Likelihood):
Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp])
return Ysim.reshape(orig_shape)
def log_predictive_density(self, y_test, mu_star, var_star):
def log_predictive_density(self, y_test, mu_star, var_star, Y_metadata=None):
"""
assumes independence
"""
v = var_star + self.variance
return -0.5*np.log(2*np.pi) -0.5*np.log(v) - 0.5*np.square(y_test - mu_star)/v
def variational_expectations(self, Y, m, v, gh_points=None):
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
lik_var = float(self.variance)
F = -0.5*np.log(2*np.pi) -0.5*np.log(lik_var) - 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/lik_var
dF_dmu = (Y - m)/lik_var
dF_dv = np.ones_like(v)*(-0.5/lik_var)
dF_dlik_var = np.sum(-0.5/lik_var + 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/(lik_var**2))
dF_dtheta = [dF_dlik_var]
return F, dF_dmu, dF_dv, dF_dtheta
dF_dtheta = -0.5/lik_var + 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/(lik_var**2)
return F, dF_dmu, dF_dv, dF_dtheta.reshape(1, Y.shape[0], Y.shape[1])

View file

@ -5,7 +5,7 @@ import numpy as np
from scipy import stats,special
import scipy as sp
import link_functions
from ..util.misc import chain_1, chain_2, chain_3
from ..util.misc import chain_1, chain_2, chain_3, blockify_dhess_dtheta, blockify_third, blockify_hessian, safe_exp
from scipy.integrate import quad
import warnings
from ..core.parameterization import Parameterized
@ -39,6 +39,7 @@ class Likelihood(Parameterized):
assert isinstance(gp_link,link_functions.GPTransformation), "gp_link is not a valid GPTransformation."
self.gp_link = gp_link
self.log_concave = False
self.not_block_really = False
def _gradients(self,partial):
return np.zeros(0)
@ -177,7 +178,12 @@ class Likelihood(Parameterized):
if np.any(np.isnan(dF_dm)) or np.any(np.isinf(dF_dm)):
stop
dF_dtheta = None # Not yet implemented
if self.size:
dF_dtheta = self.dlogpdf_dtheta(X, Y[:,None]) # Ntheta x (orig size) x N_{quad_points}
dF_dtheta = np.dot(dF_dtheta, gh_w)
dF_dtheta = dF_dtheta.reshape(self.size, shape[0], shape[1])
else:
dF_dtheta = None # Not yet implemented
return F.reshape(*shape), dF_dm.reshape(*shape), dF_dv.reshape(*shape), dF_dtheta
def predictive_mean(self, mu, variance, Y_metadata=None):
@ -189,20 +195,27 @@ class Likelihood(Parameterized):
"""
#conditional_mean: the edpected value of y given some f, under this likelihood
fmin = -np.inf
fmax = np.inf
def int_mean(f,m,v):
p = np.exp(-(0.5/v)*np.square(f - m))
exponent = -(0.5/v)*np.square(f - m)
#If exponent is under -30 then exp(exponent) will be very small, so don't exp it!)
#If p is zero then conditional_mean will overflow
assert v.all() > 0
p = safe_exp(exponent)
#If p is zero then conditional_variance will overflow
if p < 1e-10:
return 0.
else:
return self.conditional_mean(f)*p
scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
scaled_mean = [quad(int_mean, fmin, fmax,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance))
return mean
def _conditional_mean(self, f):
"""Quadrature calculation of the conditional mean: E(Y_star|f)"""
"""Quadrature calculation of the conditional mean: E(Y_star|f_star)"""
raise NotImplementedError, "implement this function to make predictions"
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
@ -210,7 +223,7 @@ class Likelihood(Parameterized):
Approximation to the predictive variance: V(Y_star)
The following variance decomposition is used:
V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
V(Y_star) = E( V(Y_star|f_star)**2 ) + V( E(Y_star|f_star) )**2
:param mu: mean of posterior
:param sigma: standard deviation of posterior
@ -220,15 +233,22 @@ class Likelihood(Parameterized):
#sigma2 = sigma**2
normalizer = np.sqrt(2*np.pi*variance)
fmin_v = -np.inf
fmin_m = np.inf
fmin = -np.inf
fmax = np.inf
from ..util.misc import safe_exp
# E( V(Y_star|f_star) )
def int_var(f,m,v):
p = np.exp(-(0.5/v)*np.square(f - m))
exponent = -(0.5/v)*np.square(f - m)
p = safe_exp(exponent)
#If p is zero then conditional_variance will overflow
if p < 1e-10:
return 0.
else:
return self.conditional_variance(f)*p
scaled_exp_variance = [quad(int_var, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
scaled_exp_variance = [quad(int_var, fmin_v, fmax,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
exp_var = np.array(scaled_exp_variance)[:,None] / normalizer
#V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star) )**2
@ -240,14 +260,15 @@ class Likelihood(Parameterized):
#E( E(Y_star|f_star)**2 )
def int_pred_mean_sq(f,m,v,predictive_mean_sq):
p = np.exp(-(0.5/v)*np.square(f - m))
exponent = -(0.5/v)*np.square(f - m)
p = np.exp(exponent)
#If p is zero then conditional_mean**2 will overflow
if p < 1e-10:
return 0.
else:
return self.conditional_mean(f)**2*p
scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)]
scaled_exp_exp2 = [quad(int_pred_mean_sq, fmin_m, fmax,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)]
exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer
var_exp = exp_exp2 - predictive_mean_sq
@ -295,8 +316,18 @@ class Likelihood(Parameterized):
:returns: likelihood evaluated for this point
:rtype: float
"""
inv_link_f = self.gp_link.transf(f)
return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata)
if isinstance(self.gp_link, link_functions.Identity):
return self.pdf_link(f, y, Y_metadata=Y_metadata)
else:
inv_link_f = self.gp_link.transf(f)
return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata)
def logpdf_sum(self, f, y, Y_metadata=None):
"""
Convenience function that can overridden for functions where this could
be computed more efficiently
"""
return np.sum(self.logpdf(f, y, Y_metadata=Y_metadata))
def logpdf(self, f, y, Y_metadata=None):
"""
@ -313,8 +344,11 @@ class Likelihood(Parameterized):
:returns: log likelihood evaluated for this point
:rtype: float
"""
inv_link_f = self.gp_link.transf(f)
return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata)
if isinstance(self.gp_link, link_functions.Identity):
return self.logpdf_link(f, y, Y_metadata=Y_metadata)
else:
inv_link_f = self.gp_link.transf(f)
return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata)
def dlogpdf_df(self, f, y, Y_metadata=None):
"""
@ -332,11 +366,15 @@ class Likelihood(Parameterized):
:returns: derivative of log likelihood evaluated for this point
:rtype: 1xN array
"""
inv_link_f = self.gp_link.transf(f)
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f)
return chain_1(dlogpdf_dlink, dlink_df)
if isinstance(self.gp_link, link_functions.Identity):
return self.dlogpdf_dlink(f, y, Y_metadata=Y_metadata)
else:
inv_link_f = self.gp_link.transf(f)
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f)
return chain_1(dlogpdf_dlink, dlink_df)
@blockify_hessian
def d2logpdf_df2(self, f, y, Y_metadata=None):
"""
Evaluates the link function link(f) then computes the second derivative of log likelihood using it
@ -353,13 +391,18 @@ class Likelihood(Parameterized):
:returns: second derivative of log likelihood evaluated for this point (diagonal only)
:rtype: 1xN array
"""
inv_link_f = self.gp_link.transf(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
d2link_df2 = self.gp_link.d2transf_df2(f)
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
if isinstance(self.gp_link, link_functions.Identity):
d2logpdf_df2 = self.d2logpdf_dlink2(f, y, Y_metadata=Y_metadata)
else:
inv_link_f = self.gp_link.transf(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
d2link_df2 = self.gp_link.d2transf_df2(f)
d2logpdf_df2 = chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
return d2logpdf_df2
@blockify_third
def d3logpdf_df3(self, f, y, Y_metadata=None):
"""
Evaluates the link function link(f) then computes the third derivative of log likelihood using it
@ -376,64 +419,96 @@ class Likelihood(Parameterized):
:returns: third derivative of log likelihood evaluated for this point
:rtype: float
"""
inv_link_f = self.gp_link.transf(f)
d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
d2link_df2 = self.gp_link.d2transf_df2(f)
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
d3link_df3 = self.gp_link.d3transf_df3(f)
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
if isinstance(self.gp_link, link_functions.Identity):
d3logpdf_df3 = self.d3logpdf_dlink3(f, y, Y_metadata=Y_metadata)
else:
inv_link_f = self.gp_link.transf(f)
d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
d2link_df2 = self.gp_link.d2transf_df2(f)
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
d3link_df3 = self.gp_link.d3transf_df3(f)
d3logpdf_df3 = chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
return d3logpdf_df3
def dlogpdf_dtheta(self, f, y, Y_metadata=None):
"""
TODO: Doc strings
"""
if self.size > 0:
inv_link_f = self.gp_link.transf(f)
return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
if self.not_block_really:
raise NotImplementedError("Need to make a decorator for this!")
if isinstance(self.gp_link, link_functions.Identity):
return self.dlogpdf_link_dtheta(f, y, Y_metadata=Y_metadata)
else:
inv_link_f = self.gp_link.transf(f)
return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
else:
# There are no parameters so return an empty array for derivatives
return np.zeros([1, 0])
return np.zeros((0, f.shape[0], f.shape[1]))
def dlogpdf_df_dtheta(self, f, y, Y_metadata=None):
"""
TODO: Doc strings
"""
if self.size > 0:
inv_link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
return chain_1(dlogpdf_dlink_dtheta, dlink_df)
if self.not_block_really:
raise NotImplementedError("Need to make a decorator for this!")
if isinstance(self.gp_link, link_functions.Identity):
return self.dlogpdf_dlink_dtheta(f, y, Y_metadata=Y_metadata)
else:
inv_link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
dlogpdf_df_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
#Chain each parameter of hte likelihood seperately
for p in range(self.size):
dlogpdf_df_dtheta[p, :, :] = chain_1(dlogpdf_dlink_dtheta[p,:,:], dlink_df)
return dlogpdf_df_dtheta
#return chain_1(dlogpdf_dlink_dtheta, dlink_df)
else:
# There are no parameters so return an empty array for derivatives
return np.zeros([f.shape[0], 0])
return np.zeros((0, f.shape[0], f.shape[1]))
def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None):
"""
TODO: Doc strings
"""
if self.size > 0:
inv_link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f)
d2link_df2 = self.gp_link.d2transf_df2(f)
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
if self.not_block_really:
raise NotImplementedError("Need to make a decorator for this!")
if isinstance(self.gp_link, link_functions.Identity):
return self.d2logpdf_dlink2_dtheta(f, y, Y_metadata=Y_metadata)
else:
inv_link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f)
d2link_df2 = self.gp_link.d2transf_df2(f)
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
d2logpdf_df2_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
#Chain each parameter of hte likelihood seperately
for p in range(self.size):
d2logpdf_df2_dtheta[p, :, :] = chain_2(d2logpdf_dlink2_dtheta[p,:,:], dlink_df, dlogpdf_dlink_dtheta[p,:,:], d2link_df2)
return d2logpdf_df2_dtheta
#return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
else:
# There are no parameters so return an empty array for derivatives
return np.zeros([f.shape[0], 0])
return np.zeros((0, f.shape[0], f.shape[1]))
def _laplace_gradients(self, f, y, Y_metadata=None):
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata).sum(axis=0)
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata)
dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, Y_metadata=Y_metadata)
d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, Y_metadata=Y_metadata)
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
# ensure we have gradients for every parameter we want to optimize
assert len(dlogpdf_dtheta) == self.size #1 x num_param array
assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix
assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix
assert dlogpdf_dtheta.shape[0] == self.size #f, d x num_param array
assert dlogpdf_df_dtheta.shape[0] == self.size #f x d x num_param matrix or just f x num_param
assert d2logpdf_df2_dtheta.shape[0] == self.size #f x num_param matrix or f x d x num_param matrix, f x f x num_param or f x f x d x num_param
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
@ -454,19 +529,98 @@ class Likelihood(Parameterized):
def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
#compute the quantiles by sampling!!!
N_samp = 1000
N_samp = 500
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
#ss_f = s.flatten()
#ss_y = self.samples(ss_f, Y_metadata)
#ss_y = self.samples(s, Y_metadata, samples=100)
ss_y = self.samples(s, Y_metadata)
#ss_y = ss_y.reshape(mu.shape[0], N_samp)
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]
def samples(self, gp, Y_metadata=None):
def samples(self, gp, Y_metadata=None, samples=1):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
:param samples: number of samples to take for each f location
"""
raise NotImplementedError
raise NotImplementedError("""May be possible to use MCMC with user-tuning, see
MCMC_pdf_samples in likelihood.py and write samples function
using this, beware this is a simple implementation
of Metropolis and will not work well for all likelihoods""")
def MCMC_pdf_samples(self, fNew, num_samples=1000, starting_loc=None, stepsize=0.1, burn_in=1000, Y_metadata=None):
"""
Simple implementation of Metropolis sampling algorithm
Will run a parallel chain for each input dimension (treats each f independently)
Thus assumes f*_1 independant of f*_2 etc.
:param num_samples: Number of samples to take
:param fNew: f at which to sample around
:param starting_loc: Starting locations of the independant chains (usually will be conditional_mean of likelihood), often link_f
:param stepsize: Stepsize for the normal proposal distribution (will need modifying)
:param burnin: number of samples to use for burnin (will need modifying)
:param Y_metadata: Y_metadata for pdf
"""
print "Warning, using MCMC for sampling y*, needs to be tuned!"
if starting_loc is None:
starting_loc = fNew
from functools import partial
logpdf = partial(self.logpdf, f=fNew, Y_metadata=Y_metadata)
pdf = lambda y_star: np.exp(logpdf(y=y_star[:, None]))
#Should be the link function of f is a good starting point
#(i.e. the point before you corrupt it with the likelihood)
par_chains = starting_loc.shape[0]
chain_values = np.zeros((par_chains, num_samples))
chain_values[:, 0][:,None] = starting_loc
#Use same stepsize for all par_chains
stepsize = np.ones(par_chains)*stepsize
accepted = np.zeros((par_chains, num_samples+burn_in))
accept_ratio = np.zeros(num_samples+burn_in)
#Whilst burning in, only need to keep the previous lot
burnin_cache = np.zeros(par_chains)
burnin_cache[:] = starting_loc.flatten()
burning_in = True
for i in xrange(burn_in+num_samples):
next_ind = i-burn_in
if burning_in:
old_y = burnin_cache
else:
old_y = chain_values[:,next_ind-1]
old_lik = pdf(old_y)
#Propose new y from Gaussian proposal
new_y = np.random.normal(loc=old_y, scale=stepsize)
new_lik = pdf(new_y)
#Accept using Metropolis (not hastings) acceptance
#Always accepts if new_lik > old_lik
accept_probability = np.minimum(1, new_lik/old_lik)
u = np.random.uniform(0,1,par_chains)
#print "Accept prob: ", accept_probability
accepts = u < accept_probability
if burning_in:
burnin_cache[accepts] = new_y[accepts]
burnin_cache[~accepts] = old_y[~accepts]
if i == burn_in:
burning_in = False
chain_values[:,0] = burnin_cache
else:
#If it was accepted then new_y becomes the latest sample
chain_values[accepts, next_ind] = new_y[accepts]
#Otherwise use old y as the sample
chain_values[~accepts, next_ind] = old_y[~accepts]
accepted[~accepts, i] = 0
accepted[accepts, i] = 1
accept_ratio[i] = np.sum(accepted[:,i])/float(par_chains)
#Show progress
if i % int((burn_in+num_samples)*0.1) == 0:
print "{}% of samples taken ({})".format((i/int((burn_in+num_samples)*0.1)*10), i)
print "Last run accept ratio: ", accept_ratio[i]
print "Average accept ratio: ", np.mean(accept_ratio)
return chain_values

View file

@ -35,8 +35,8 @@ class StudentT(Likelihood):
self.log_concave = False
def parameters_changed(self):
self.variance = (self.v / float(self.v - 2)) * self.sigma2
#def parameters_changed(self):
#self.variance = (self.v / float(self.v - 2)) * self.sigma2
def update_gradients(self, grads):
"""
@ -180,7 +180,8 @@ class StudentT(Likelihood):
:rtype: float
"""
e = y - inv_link_f
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
e2 = np.square(e)
dlogpdf_dvar = self.v*(e2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e2))
return dlogpdf_dvar
def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None):
@ -226,17 +227,18 @@ class StudentT(Likelihood):
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet
return np.hstack((dlogpdf_dvar, dlogpdf_dv))
return np.array((dlogpdf_dvar, dlogpdf_dv))
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet
return np.hstack((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
return np.array((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
return np.array((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
def predictive_mean(self, mu, sigma, Y_metadata=None):
# The comment here confuses mean and median.

View file

@ -4,4 +4,5 @@
from kernel import Kernel
from linear import Linear
from mlp import MLP
#from rbf import RBF
from additive import Additive
from compound import Compound

View file

@ -2,8 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core.mapping import Mapping
import GPy
from ..core import Mapping
class Additive(Mapping):
"""
@ -17,45 +16,23 @@ class Additive(Mapping):
:type mapping1: GPy.mappings.Mapping
:param mapping2: second mapping to add together.
:type mapping2: GPy.mappings.Mapping
:param tensor: whether or not to use the tensor product of input spaces
:type tensor: bool
"""
def __init__(self, mapping1, mapping2, tensor=False):
if tensor:
input_dim = mapping1.input_dim + mapping2.input_dim
else:
input_dim = mapping1.input_dim
assert(mapping1.input_dim==mapping2.input_dim)
def __init__(self, mapping1, mapping2):
assert(mapping1.input_dim==mapping2.input_dim)
assert(mapping1.output_dim==mapping2.output_dim)
output_dim = mapping1.output_dim
input_dim, output_dim = mapping1.input_dim, mapping1.output_dim
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
self.mapping1 = mapping1
self.mapping2 = mapping2
self.num_params = self.mapping1.num_params + self.mapping2.num_params
self.name = self.mapping1.name + '+' + self.mapping2.name
def _get_param_names(self):
return self.mapping1._get_param_names + self.mapping2._get_param_names
def _get_params(self):
return np.hstack((self.mapping1._get_params(), self.mapping2._get_params()))
def _set_params(self, x):
self.mapping1._set_params(x[:self.mapping1.num_params])
self.mapping2._set_params(x[self.mapping1.num_params:])
def randomize(self):
self.mapping1._randomize()
self.mapping2._randomize()
def f(self, X):
return self.mapping1.f(X) + self.mapping2.f(X)
def df_dtheta(self, dL_df, X):
self._df_dA = (dL_df[:, :, None]*self.kern.K(X, self.X)[:, None, :]).sum(0).T
self._df_dbias = (dL_df.sum(0))
return np.hstack((self._df_dA.flatten(), self._df_dbias))
def update_gradients(self, dL_dF, X):
self.mapping1.update_gradients(dL_dF, X)
self.mapping2.update_gradients(dL_dF, X)
def df_dX(self, dL_df, X):
return self.kern.dK_dX((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X)
def gradients_X(self, dL_dF, X):
return self.mapping1.gradients_X(dL_dF, X) + self.mapping2.gradients_X(dL_dF, X)

39
GPy/mappings/compound.py Normal file
View file

@ -0,0 +1,39 @@
# Copyright (c) 2015, James Hensman and Alan Saul
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from ..core import Mapping
class Compound(Mapping):
"""
Mapping based on passing one mapping through another
.. math::
f(\mathbf{x}) = f_2(f_1(\mathbf{x}))
:param mapping1: first mapping
:type mapping1: GPy.mappings.Mapping
:param mapping2: second mapping
:type mapping2: GPy.mappings.Mapping
"""
def __init__(self, mapping1, mapping2):
assert(mapping1.output_dim==mapping2.input_dim)
input_dim, output_dim = mapping1.input_dim, mapping2.output_dim
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
self.mapping1 = mapping1
self.mapping2 = mapping2
self.link_parameters(self.mapping1, self.mapping2)
def f(self, X):
return self.mapping2.f(self.mapping1.f(X))
def update_gradients(self, dL_dF, X):
hidden = self.mapping1.f(X)
self.mapping2.update_gradients(dL_dF, hidden)
self.mapping1.update_gradients(self.mapping2.gradients_X(dL_dF, hidden), X)
def gradients_X(self, dL_dF, X):
hidden = self.mapping1.f(X)
return self.mapping1.gradients_X(self.mapping2.gradients_X(dL_dF, hidden), X)

26
GPy/mappings/identity.py Normal file
View file

@ -0,0 +1,26 @@
# Copyright (c) 2015, James Hensman
from ..core.mapping import Mapping
from ..core import Param
class Identity(Mapping):
"""
A mapping that does nothing!
"""
def __init__(self, input_dim, output_dim, name='identity'):
Mapping.__init__(self, input_dim, output_dim, name)
def f(self, X):
return X
def update_gradients(self, dL_dF, X):
pass
def gradients_X(self, dL_dF, X):
return dL_dF

View file

@ -1,9 +1,10 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Copyright (c) 2015, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core.mapping import Mapping
import GPy
from ..core import Param
class Kernel(Mapping):
"""
@ -11,50 +12,41 @@ class Kernel(Mapping):
.. math::
f(\mathbf{x}*) = \mathbf{A}\mathbf{k}(\mathbf{X}, \mathbf{x}^*) + \mathbf{b}
f(\mathbf{x}) = \sum_i \alpha_i k(\mathbf{z}_i, \mathbf{x})
:param X: input observations containing :math:`\mathbf{X}`
:type X: ndarray
or for multple outputs
.. math::
f_i(\mathbf{x}) = \sum_j \alpha_{i,j} k(\mathbf{z}_i, \mathbf{x})
:param input_dim: dimension of input.
:type input_dim: int
:param output_dim: dimension of output.
:type output_dim: int
:param Z: input observations containing :math:`\mathbf{Z}`
:type Z: ndarray
:param kernel: a GPy kernel, defaults to GPy.kern.RBF
:type kernel: GPy.kern.kern
"""
def __init__(self, X, output_dim=1, kernel=None):
Mapping.__init__(self, input_dim=X.shape[1], output_dim=output_dim)
if kernel is None:
kernel = GPy.kern.RBF(self.input_dim)
def __init__(self, input_dim, output_dim, Z, kernel, name='kernmap'):
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
self.kern = kernel
self.X = X
self.num_data = X.shape[0]
self.num_params = self.output_dim*(self.num_data + 1)
self.A = np.array((self.num_data, self.output_dim))
self.bias = np.array(self.output_dim)
self.randomize()
self.name = 'kernel'
def _get_param_names(self):
return sum([['A_%i_%i' % (n, d) for d in range(self.output_dim)] for n in range(self.num_data)], []) + ['bias_%i' % d for d in range(self.output_dim)]
def _get_params(self):
return np.hstack((self.A.flatten(), self.bias))
def _set_params(self, x):
self.A = x[:self.num_data * self.output_dim].reshape(self.num_data, self.output_dim).copy()
self.bias = x[self.num_data*self.output_dim:].copy()
def randomize(self):
self.A = np.random.randn(self.num_data, self.output_dim)/np.sqrt(self.num_data+1)
self.bias = np.random.randn(self.output_dim)/np.sqrt(self.num_data+1)
self.Z = Z
self.num_bases, Zdim = Z.shape
assert Zdim == self.input_dim
self.A = Param('A', np.random.randn(self.num_bases, self.output_dim))
self.link_parameter(self.A)
def f(self, X):
return np.dot(self.kern.K(X, self.X),self.A) + self.bias
return np.dot(self.kern.K(X, self.Z), self.A)
def df_dtheta(self, dL_df, X):
self._df_dA = (dL_df[:, :, None]*self.kern.K(X, self.X)[:, None, :]).sum(0).T
self._df_dbias = (dL_df.sum(0))
return np.hstack((self._df_dA.flatten(), self._df_dbias))
def update_gradients(self, dL_dF, X):
self.kern.update_gradients_full(np.dot(dL_dF, self.A.T), X, self.Z)
self.A.gradient = np.dot( self.kern.K(self.Z, X), dL_dF)
def df_dX(self, dL_df, X):
return self.kern.gradients_X((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X)
def gradients_X(self, dL_dF, X):
return self.kern.gradients_X(np.dot(dL_dF, self.A.T), X, self.Z)

View file

@ -1,43 +1,39 @@
# Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt).
# Copyright (c) 2015, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core.mapping import Bijective_mapping
from ..core.mapping import Mapping
from ..core.parameterization import Param
class Linear(Bijective_mapping):
class Linear(Mapping):
"""
Mapping based on a linear model.
A Linear mapping.
.. math::
f(\mathbf{x}*) = \mathbf{W}\mathbf{x}^* + \mathbf{b}
F(\mathbf{x}) = \mathbf{A} \mathbf{x})
:param X: input observations
:type X: ndarray
:param input_dim: dimension of input.
:type input_dim: int
:param output_dim: dimension of output.
:type output_dim: int
:param kernel: a GPy kernel, defaults to GPy.kern.RBF
:type kernel: GPy.kern.kern
"""
def __init__(self, input_dim=1, output_dim=1, name='linear'):
Bijective_mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
self.W = Param('W',np.array((self.input_dim, self.output_dim)))
self.bias = Param('bias',np.array(self.output_dim))
self.link_parameters(self.W, self.bias)
def __init__(self, input_dim, output_dim, name='linmap'):
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
self.A = Param('A', np.random.randn(self.input_dim, self.output_dim))
self.link_parameter(self.A)
def f(self, X):
return np.dot(X,self.W) + self.bias
return np.dot(X, self.A)
def g(self, f):
V = np.linalg.solve(np.dot(self.W.T, self.W), W.T)
return np.dot(f-self.bias, V)
def update_gradients(self, dL_dF, X):
self.A.gradient = np.dot( X.T, dL_dF)
def df_dtheta(self, dL_df, X):
df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T
df_dbias = (dL_df.sum(0))
return np.hstack((df_dW.flatten(), df_dbias))
def dL_dX(self, partial, X):
"""The gradient of L with respect to the inputs to the mapping, where L is a function that is dependent on the output of the mapping, f."""
return (partial[:, None, :]*self.W[None, :, :]).sum(2)
def gradients_X(self, dL_dF, X):
return np.dot(dL_dF, self.A.T)

View file

@ -3,128 +3,53 @@
import numpy as np
from ..core.mapping import Mapping
from ..core import Param
class MLP(Mapping):
"""
Mapping based on a multi-layer perceptron neural network model.
.. math::
f(\\mathbf{x}*) = \\mathbf{W}^0\\boldsymbol{\\phi}(\\mathbf{W}^1\\mathbf{x}+\\mathbf{b}^1)^* + \\mathbf{b}^0
where
.. math::
\\phi(\\cdot) = \\text{tanh}(\\cdot)
:param X: input observations
:type X: ndarray
:param output_dim: dimension of output.
:type output_dim: int
:param hidden_dim: dimension of hidden layer. If it is an int, there is one hidden layer of the given dimension. If it is a list of ints there are as manny hidden layers as the length of the list, each with the given number of hidden nodes in it.
:type hidden_dim: int or list of ints.
Mapping based on a multi-layer perceptron neural network model, with a single hidden layer
"""
def __init__(self, input_dim=1, output_dim=1, hidden_dim=3):
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
self.name = 'mlp'
if isinstance(hidden_dim, int):
hidden_dim = [hidden_dim]
def __init__(self, input_dim=1, output_dim=1, hidden_dim=3, name='mlpmap'):
super(MLP, self).__init__(input_dim=input_dim, output_dim=output_dim, name=name)
self.hidden_dim = hidden_dim
self.activation = [None]*len(self.hidden_dim)
self.W = []
self._dL_dW = []
self.bias = []
self._dL_dbias = []
self.W.append(np.zeros((self.input_dim, self.hidden_dim[0])))
self._dL_dW.append(np.zeros((self.input_dim, self.hidden_dim[0])))
self.bias.append(np.zeros(self.hidden_dim[0]))
self._dL_dbias.append(np.zeros(self.hidden_dim[0]))
self.num_params = self.hidden_dim[0]*(self.input_dim+1)
for h1, h0 in zip(hidden_dim[1:], hidden_dim[0:-1]):
self.W.append(np.zeros((h0, h1)))
self._dL_dW.append(np.zeros((h0, h1)))
self.bias.append(np.zeros(h1))
self._dL_dbias.append(np.zeros(h1))
self.num_params += h1*(h0+1)
self.W.append(np.zeros((self.hidden_dim[-1], self.output_dim)))
self._dL_dW.append(np.zeros((self.hidden_dim[-1], self.output_dim)))
self.bias.append(np.zeros(self.output_dim))
self._dL_dbias.append(np.zeros(self.output_dim))
self.num_params += self.output_dim*(self.hidden_dim[-1]+1)
self.randomize()
self.W1 = Param('W1', np.random.randn(self.input_dim, self.hidden_dim))
self.b1 = Param('b1', np.random.randn(self.hidden_dim))
self.W2 = Param('W2', np.random.randn(self.hidden_dim, self.output_dim))
self.b2 = Param('b2', np.random.randn(self.output_dim))
self.link_parameters(self.W1, self.b1, self.W2, self.b2)
def _get_param_names(self):
return sum([['W%i_%i_%i' % (i, n, d) for n in range(self.W[i].shape[0]) for d in range(self.W[i].shape[1])] + ['bias%i_%i' % (i, d) for d in range(self.W[i].shape[1])] for i in range(len(self.W))], [])
def _get_params(self):
param = np.array([])
for W, bias in zip(self.W, self.bias):
param = np.hstack((param, W.flatten(), bias))
return param
def _set_params(self, x):
start = 0
for W, bias in zip(self.W, self.bias):
end = W.shape[0]*W.shape[1]+start
W[:] = x[start:end].reshape(W.shape[0], W.shape[1]).copy()
start = end
end = W.shape[1]+end
bias[:] = x[start:end].copy()
start = end
def randomize(self):
for W, bias in zip(self.W, self.bias):
W[:] = np.random.randn(W.shape[0], W.shape[1])/np.sqrt(W.shape[0]+1)
bias[:] = np.random.randn(W.shape[1])/np.sqrt(W.shape[0]+1)
def f(self, X):
self._f_computations(X)
return np.dot(np.tanh(self.activation[-1]), self.W[-1]) + self.bias[-1]
layer1 = np.dot(X, self.W1) + self.b1
activations = np.tanh(layer1)
return np.dot(activations, self.W2) + self.b2
def _f_computations(self, X):
W = self.W[0]
bias = self.bias[0]
self.activation[0] = np.dot(X,W) + bias
for W, bias, index in zip(self.W[1:-1], self.bias[1:-1], range(1, len(self.activation))):
self.activation[index] = np.dot(np.tanh(self.activation[index-1]), W)+bias
def update_gradients(self, dL_dF, X):
layer1 = np.dot(X,self.W1) + self.b1
activations = np.tanh(layer1)
#Evaluate second-layer gradients.
self.W2.gradient = np.dot(activations.T, dL_dF)
self.b2.gradient = np.sum(dL_dF, 0)
# Backpropagation to hidden layer.
dL_dact = np.dot(dL_dF, self.W2.T)
dL_dlayer1 = dL_dact / np.square(np.cosh(layer1))
# Finally, evaluate the first-layer gradients.
self.W1.gradient = np.dot(X.T,dL_dlayer1)
self.b1.gradient = np.sum(dL_dlayer1, 0)
def gradients_X(self, dL_dF, X):
layer1 = np.dot(X,self.W1) + self.b1
activations = np.tanh(layer1)
# Backpropagation to hidden layer.
dL_dact = np.dot(dL_dF, self.W2.T)
dL_dlayer1 = dL_dact / np.square(np.cosh(layer1))
return np.dot(dL_dlayer1, self.W1.T)
def df_dtheta(self, dL_df, X):
self._df_computations(dL_df, X)
g = np.array([])
for gW, gbias in zip(self._dL_dW, self._dL_dbias):
g = np.hstack((g, gW.flatten(), gbias))
return g
def _df_computations(self, dL_df, X):
self._f_computations(X)
a0 = self.activation[-1]
W = self.W[-1]
self._dL_dW[-1] = (dL_df[:, :, None]*np.tanh(a0[:, None, :])).sum(0).T
dL_dta=(dL_df[:, None, :]*W[None, :, :]).sum(2)
self._dL_dbias[-1] = (dL_df.sum(0))
for dL_dW, dL_dbias, W, bias, a0, a1 in zip(self._dL_dW[-2:0:-1],
self._dL_dbias[-2:0:-1],
self.W[-2:0:-1],
self.bias[-2:0:-1],
self.activation[-2::-1],
self.activation[-1:0:-1]):
ta = np.tanh(a1)
dL_da = dL_dta*(1-ta*ta)
dL_dW[:] = (dL_da[:, :, None]*np.tanh(a0[:, None, :])).sum(0).T
dL_dbias[:] = (dL_da.sum(0))
dL_dta = (dL_da[:, None, :]*W[None, :, :]).sum(2)
ta = np.tanh(self.activation[0])
dL_da = dL_dta*(1-ta*ta)
W = self.W[0]
self._dL_dW[0] = (dL_da[:, :, None]*X[:, None, :]).sum(0).T
self._dL_dbias[0] = (dL_da.sum(0))
self._dL_dX = (dL_da[:, None, :]*W[None, :, :]).sum(2)
def df_dX(self, dL_df, X):
self._df_computations(dL_df, X)
return self._dL_dX

View file

@ -0,0 +1,94 @@
from GPy.core.mapping import Mapping
from GPy.core import Param
import numpy as np
class PiecewiseLinear(Mapping):
"""
A piecewise-linear mapping.
The parameters of this mapping are the positions and values of the function where it is broken (self.breaks, self.values).
Outside the range of the breaks, the function is assumed to have gradient 1
"""
def __init__(self, input_dim, output_dim, values, breaks, name='piecewise_linear'):
assert input_dim==1
assert output_dim==1
Mapping.__init__(self, input_dim, output_dim, name)
values, breaks = np.array(values).flatten(), np.array(breaks).flatten()
assert values.size == breaks.size
self.values = Param('values', values)
self.breaks = Param('breaks', breaks)
self.link_parameter(self.values)
self.link_parameter(self.breaks)
def parameters_changed(self):
self.order = np.argsort(self.breaks)*1
self.reverse_order = np.zeros_like(self.order)
self.reverse_order[self.order] = np.arange(self.order.size)
self.sorted_breaks = self.breaks[self.order]
self.sorted_values = self.values[self.order]
self.grads = np.diff(self.sorted_values)/np.diff(self.sorted_breaks)
def f(self, X):
x = X.flatten()
y = x.copy()
#first adjus the points below the first value
y[x<self.sorted_breaks[0]] = x[x<self.sorted_breaks[0]] + self.sorted_values[0] - self.sorted_breaks[0]
#now all the points pas the last break
y[x>self.sorted_breaks[-1]] = x[x>self.sorted_breaks[-1]] + self.sorted_values[-1] - self.sorted_breaks[-1]
#loop throught the pairs of points
for low, up, g, v in zip(self. sorted_breaks[:-1], self.sorted_breaks[1:], self.grads, self.sorted_values[:-1]):
i = np.logical_and(x>low, x<up)
y[i] = v + (x[i]-low)*g
return y.reshape(-1,1)
def update_gradients(self, dL_dF, X):
x = X.flatten()
dL_dF = dL_dF.flatten()
dL_db = np.zeros(self.sorted_breaks.size)
dL_dv = np.zeros(self.sorted_values.size)
#loop across each interval, computing the gradient for each of the 4 parameters that define it
for i, (low, up, g, v) in enumerate(zip(self. sorted_breaks[:-1], self.sorted_breaks[1:], self.grads, self.sorted_values[:-1])):
index = np.logical_and(x>low, x<up)
xx = x[index]
grad = dL_dF[index]
span = up-low
dL_dv[i] += np.sum(grad*( (low - xx)/span + 1))
dL_dv[i+1] += np.sum(grad*(xx-low)/span)
dL_db[i] += np.sum(grad*g*(xx-up)/span)
dL_db[i+1] += np.sum(grad*g*(low-xx)/span)
#now the end parts
dL_db[0] -= np.sum(dL_dF[x<self.sorted_breaks[0]])
dL_db[-1] -= np.sum(dL_dF[x>self.sorted_breaks[-1]])
dL_dv[0] += np.sum(dL_dF[x<self.sorted_breaks[0]])
dL_dv[-1] += np.sum(dL_dF[x>self.sorted_breaks[-1]])
#now put the gradients back in the correct order!
self.breaks.gradient = dL_db[self.reverse_order]
self.values.gradient = dL_dv[self.reverse_order]
def gradients_X(self, dL_dF, X):
x = X.flatten()
#outside the range of the breakpoints, the function is just offset by a contant, so the partial derivative is 1.
dL_dX = dL_dF.copy().flatten()
#insude the breakpoints, the partial derivative is self.grads
for low, up, g, v in zip(self. sorted_breaks[:-1], self.sorted_breaks[1:], self.grads, self.sorted_values[:-1]):
i = np.logical_and(x>low, x<up)
dL_dX[i] = dL_dF[i]*g
return dL_dX.reshape(-1,1)

View file

@ -43,10 +43,11 @@ class SparseGPMiniBatch(SparseGP):
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
name='sparse gp', Y_metadata=None, normalizer=False,
missing_data=False, stochastic=False, batchsize=1):
#pick a sensible inference method
# pick a sensible inference method
if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian):
inference_method = var_dtc.VarDTC(limit=1 if not self.missing_data else Y.shape[1])
inference_method = var_dtc.VarDTC(limit=1 if not missing_data else Y.shape[1])
else:
#inference_method = ??
raise NotImplementedError, "what to do what to do?"

View file

@ -39,7 +39,10 @@ class SSGPLVM(SparseGP_MPI):
X_variance = np.random.uniform(0,.1,X.shape)
if Gamma is None:
gamma = np.random.randn(X.shape[0], input_dim)
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim)
gamma[gamma>1.-1e-9] = 1.-1e-9
gamma[gamma<1e-9] = 1e-9
else:
gamma = Gamma.copy()

View file

@ -1,7 +1,6 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..util.warping_functions import *
from ..core import GP
@ -10,14 +9,16 @@ from GPy.util.warping_functions import TanhWarpingFunction_d
from GPy import kern
class WarpedGP(GP):
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3, normalize_X=False, normalize_Y=False):
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3):
if kernel is None:
kernel = kern.rbf(X.shape[1])
kernel = kern.RBF(X.shape[1])
if warping_function == None:
self.warping_function = TanhWarpingFunction_d(warping_terms)
self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1,) * 1)
else:
self.warping_function = warping_function
self.scale_data = False
if self.scale_data:
@ -25,10 +26,10 @@ class WarpedGP(GP):
self.has_uncertain_inputs = False
self.Y_untransformed = Y.copy()
self.predict_in_warped_space = False
likelihood = likelihoods.Gaussian(self.transform_data(), normalize=normalize_Y)
likelihood = likelihoods.Gaussian()
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self._set_params(self._get_params())
GP.__init__(self, X, self.transform_data(), likelihood=likelihood, kernel=kernel)
self.link_parameter(self.warping_function)
def _scale_data(self, Y):
self._Ymax = Y.max()
@ -38,62 +39,55 @@ class WarpedGP(GP):
def _unscale_data(self, Y):
return (Y + 0.5) * (self._Ymax - self._Ymin) + self._Ymin
def _set_params(self, x):
self.warping_params = x[:self.warping_function.num_parameters]
Y = self.transform_data()
self.likelihood.set_data(Y)
GP._set_params(self, x[self.warping_function.num_parameters:].copy())
def parameters_changed(self):
self.Y[:] = self.transform_data()
super(WarpedGP, self).parameters_changed()
def _get_params(self):
return np.hstack((self.warping_params.flatten().copy(), GP._get_params(self).copy()))
Kiy = self.posterior.woodbury_vector.flatten()
def _get_param_names(self):
warping_names = self.warping_function._get_param_names()
param_names = GP._get_param_names(self)
return warping_names + param_names
def transform_data(self):
Y = self.warping_function.f(self.Y_untransformed.copy(), self.warping_params).copy()
return Y
def log_likelihood(self):
ll = GP.log_likelihood(self)
jacobian = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
return ll + np.log(jacobian).sum()
def _log_likelihood_gradients(self):
ll_grads = GP._log_likelihood_gradients(self)
alpha = np.dot(self.Ki, self.likelihood.Y.flatten())
warping_grads = self.warping_function_gradients(alpha)
warping_grads = np.append(warping_grads[:, :-1].flatten(), warping_grads[0, -1])
return np.hstack((warping_grads.flatten(), ll_grads.flatten()))
def warping_function_gradients(self, Kiy):
grad_y = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed, self.warping_params,
grad_y = self.warping_function.fgrad_y(self.Y_untransformed)
grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed,
return_covar_chain=True)
djac_dpsi = ((1.0 / grad_y[:, :, None, None]) * grad_y_psi).sum(axis=0).sum(axis=0)
dquad_dpsi = (Kiy[:, None, None, None] * grad_psi).sum(axis=0).sum(axis=0)
return -dquad_dpsi + djac_dpsi
warping_grads = -dquad_dpsi + djac_dpsi
self.warping_function.psi.gradient[:] = warping_grads[:, :-1]
self.warping_function.d.gradient[:] = warping_grads[0, -1]
def transform_data(self):
Y = self.warping_function.f(self.Y_untransformed.copy()).copy()
return Y
def log_likelihood(self):
ll = GP.log_likelihood(self)
jacobian = self.warping_function.fgrad_y(self.Y_untransformed)
return ll + np.log(jacobian).sum()
def plot_warping(self):
self.warping_function.plot(self.warping_params, self.Y_untransformed.min(), self.Y_untransformed.max())
self.warping_function.plot(self.Y_untransformed.min(), self.Y_untransformed.max())
def predict(self, Xnew, which_parts='all', full_cov=False, pred_init=None):
def predict(self, Xnew, which_parts='all', pred_init=None):
# normalize X values
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
mu, var = GP._raw_predict(self, Xnew, full_cov=full_cov, which_parts=which_parts)
# Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
mu, var = GP._raw_predict(self, Xnew)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
mean, var = self.likelihood.predictive_values(mu, var)
if self.predict_in_warped_space:
mean = self.warping_function.f_inv(mean, self.warping_params, y=pred_init)
var = self.warping_function.f_inv(var, self.warping_params)
mean = self.warping_function.f_inv(mean, y=pred_init)
var = self.warping_function.f_inv(var)
if self.scale_data:
mean = self._unscale_data(mean)
return mean, var, _025pm, _975pm
return mean, var
if __name__ == '__main__':
X = np.random.randn(100, 1)
Y = np.sin(X) + np.random.randn(100, 1)*0.05
m = WarpedGP(X, Y)

View file

@ -6,7 +6,11 @@ try:
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
#from matplotlib import cm
pb.ion()
try:
__IPYTHON__
pb.ion()
except NameError:
pass
except:
pass
import re

View file

@ -11,39 +11,38 @@ import GPy
class InferenceXTestCase(unittest.TestCase):
def genData(self):
D1,D2,N = 12,12,50
np.random.seed(1234)
x = np.linspace(0, 4 * np.pi, N)[:, None]
s1 = np.vectorize(lambda x: np.sin(x))
s2 = np.vectorize(lambda x: np.cos(x)**2)
s3 = np.vectorize(lambda x:-np.exp(-np.cos(2 * x)))
sS = np.vectorize(lambda x: np.cos(x))
s1 = s1(x)
s2 = s2(x)
s3 = s3(x)
sS = sS(x)
s1 -= s1.mean(); s1 /= s1.std(0)
s2 -= s2.mean(); s2 /= s2.std(0)
s3 -= s3.mean(); s3 /= s3.std(0)
sS -= sS.mean(); sS /= sS.std(0)
S1 = np.hstack([s1, sS])
S2 = np.hstack([s3, sS])
P1 = np.random.randn(S1.shape[1], D1)
P2 = np.random.randn(S2.shape[1], D2)
Y1 = S1.dot(P1)
Y2 = S2.dot(P2)
Y1 += .01 * np.random.randn(*Y1.shape)
Y2 += .01 * np.random.randn(*Y2.shape)
Y1 -= Y1.mean(0)
Y2 -= Y2.mean(0)
Y1 /= Y1.std(0)
@ -52,33 +51,34 @@ class InferenceXTestCase(unittest.TestCase):
slist = [s1, s2, s3, sS]
slist_names = ["s1", "s2", "s3", "sS"]
Ylist = [Y1, Y2]
return Ylist
def test_inferenceX_BGPLVM(self):
Ys = self.genData()
m = GPy.models.BayesianGPLVM(Ys[0],5,kernel=GPy.kern.Linear(5,ARD=True))
x,mi = m.infer_newX(m.Y, optimize=False)
self.assertTrue(mi.checkgrad())
m.optimize(max_iters=10000)
x,mi = m.infer_newX(m.Y)
self.assertTrue(np.allclose(m.X.mean, mi.X.mean))
self.assertTrue(np.allclose(m.X.variance, mi.X.variance))
m.optimize(max_iters=10000)
x, mi = m.infer_newX(m.Y)
print m.X.mean - mi.X.mean
self.assertTrue(np.allclose(m.X.mean, mi.X.mean, rtol=1e-4, atol=1e-4))
self.assertTrue(np.allclose(m.X.variance, mi.X.variance, rtol=1e-4, atol=1e-4))
def test_inferenceX_GPLVM(self):
Ys = self.genData()
m = GPy.models.GPLVM(Ys[0],3,kernel=GPy.kern.RBF(3,ARD=True))
x,mi = m.infer_newX(m.Y, optimize=False)
self.assertTrue(mi.checkgrad())
# m.optimize(max_iters=10000)
# x,mi = m.infer_newX(m.Y)
# self.assertTrue(np.allclose(m.X, x))
if __name__ == "__main__":
unittest.main()

View file

@ -256,13 +256,23 @@ class KernelGradientTestsContinuous(unittest.TestCase):
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Prod1(self):
k = GPy.kern.RBF(self.D) * GPy.kern.Linear(self.D)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Prod2(self):
k = (GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D))
k = GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Prod3(self):
k = (GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D))
k = GPy.kern.RBF(self.D) * GPy.kern.Linear(self.D) * GPy.kern.Bias(self.D)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Prod4(self):
k = GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D) * GPy.kern.Matern32(2, active_dims=[0,1])
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
@ -401,11 +411,27 @@ class Coregionalize_weave_test(unittest.TestCase):
GPy.util.config.config.set('weave', 'working', 'False')
class KernelTestsProductWithZeroValues(unittest.TestCase):
def setUp(self):
self.X = np.array([[0,1],[1,0]])
self.k = GPy.kern.Linear(2) * GPy.kern.Bias(2)
def test_zero_valued_kernel_full(self):
self.k.update_gradients_full(1, self.X)
self.assertFalse(np.isnan(self.k['linear.variances'].gradient),
"Gradient resulted in NaN")
def test_zero_valued_kernel_gradients_X(self):
target = self.k.gradients_X(1, self.X)
self.assertFalse(np.any(np.isnan(target)),
"Gradient resulted in NaN")
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()
# np.random.seed(0)
# N0 = 3
# N1 = 9

View file

@ -10,7 +10,7 @@ from GPy.likelihoods import link_functions
from GPy.core.parameterization import Param
from functools import partial
#np.random.seed(300)
#np.random.seed(7)
#np.random.seed(4)
#np.seterr(divide='raise')
def dparam_partial(inst_func, *args):
@ -52,8 +52,17 @@ def dparam_checkgrad(func, dfunc, params, params_names, args, constraints=None,
zipped_params = zip(params, params_names)
for param_ind, (param_val, param_name) in enumerate(zipped_params):
#Check one parameter at a time, make sure it is 2d (as some gradients only return arrays) then strip out the parameter
fnum = np.atleast_2d(partial_f(param_val, param_name))[:, param_ind].shape[0]
dfnum = np.atleast_2d(partial_df(param_val, param_name))[:, param_ind].shape[0]
f_ = partial_f(param_val, param_name)
df_ = partial_df(param_val, param_name)
#Reshape it such that we have a 3d matrix incase, that is we want it (?, N, D) regardless of whether ? is num_params or not
f_ = f_.reshape(-1, f_.shape[0], f_.shape[1])
df_ = df_.reshape(-1, f_.shape[0], f_.shape[1])
#Get the number of f and number of dimensions
fnum = f_.shape[-2]
fdim = f_.shape[-1]
dfnum = df_.shape[-2]
for fixed_val in range(dfnum):
#dlik and dlik_dvar gives back 1 value for each
f_ind = min(fnum, fixed_val+1) - 1
@ -61,9 +70,13 @@ def dparam_checkgrad(func, dfunc, params, params_names, args, constraints=None,
#Make grad checker with this param moving, note that set_params is NOT being called
#The parameter is being set directly with __setattr__
#Check only the parameter and function value we wish to check at a time
grad = GradientChecker(lambda p_val: np.atleast_2d(partial_f(p_val, param_name))[f_ind, param_ind],
lambda p_val: np.atleast_2d(partial_df(p_val, param_name))[fixed_val, param_ind],
param_val, [param_name])
#func = lambda p_val, fnum, fdim, param_ind, f_ind, param_ind: partial_f(p_val, param_name).reshape(-1, fnum, fdim)[param_ind, f_ind, :]
#dfunc_dparam = lambda d_val, fnum, fdim, param_ind, fixed_val: partial_df(d_val, param_name).reshape(-1, fnum, fdim)[param_ind, fixed_val, :]
#First we reshape the output such that it is (num_params, N, D) then we pull out the relavent parameter-findex and checkgrad just this index at a time
func = lambda p_val: partial_f(p_val, param_name).reshape(-1, fnum, fdim)[param_ind, f_ind, :]
dfunc_dparam = lambda d_val: partial_df(d_val, param_name).reshape(-1, fnum, fdim)[param_ind, fixed_val, :]
grad = GradientChecker(func, dfunc_dparam, param_val, [param_name])
if constraints is not None:
for constrain_param, constraint in constraints:
@ -104,37 +117,9 @@ class TestNoiseModels(object):
self.var = 0.2
self.var = np.random.rand(1)
#Make a bigger step as lower bound can be quite curved
self.step = 1e-4
def tearDown(self):
self.Y = None
self.f = None
self.X = None
def test_scale2_models(self):
self.setUp()
####################################################
# Constraint wrappers so we can just list them off #
####################################################
def constrain_fixed(regex, model):
model[regex].constrain_fixed()
def constrain_negative(regex, model):
model[regex].constrain_negative()
def constrain_positive(regex, model):
model[regex].constrain_positive()
def constrain_bounded(regex, model, lower, upper):
"""
Used like: partial(constrain_bounded, lower=0, upper=1)
"""
model[regex].constrain_bounded(lower, upper)
"""
Dictionary where we nest models we would like to check
Name: {
@ -149,136 +134,170 @@ class TestNoiseModels(object):
"link_f_constraints": [constraint_wrappers, listed_here]
}
"""
noise_models = {"Student_t_default": {
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"grad_params": {
"names": [".*t_scale2"],
"vals": [self.var],
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
#"constraints": [("t_scale2", constrain_positive), ("deg_free", partial(constrain_fixed, value=5))]
},
"laplace": True
},
"Student_t_1_var": {
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"grad_params": {
"names": [".*t_scale2"],
"vals": [1.0],
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
},
"laplace": True
},
"Student_t_small_deg_free": {
"model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var),
"grad_params": {
"names": [".*t_scale2"],
"vals": [self.var],
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
},
"laplace": True
},
"Student_t_small_var": {
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"grad_params": {
"names": [".*t_scale2"],
"vals": [0.001],
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
},
"laplace": True
},
"Student_t_large_var": {
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"grad_params": {
"names": [".*t_scale2"],
"vals": [10.0],
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
},
"laplace": True
},
"Student_t_approx_gauss": {
"model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var),
"grad_params": {
"names": [".*t_scale2"],
"vals": [self.var],
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
},
"laplace": True
},
"Student_t_log": {
"model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var),
"grad_params": {
"names": [".*t_scale2"],
"vals": [self.var],
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
},
"laplace": True
},
"Gaussian_default": {
"model": GPy.likelihoods.Gaussian(variance=self.var),
"grad_params": {
"names": [".*variance"],
"vals": [self.var],
"constraints": [(".*variance", constrain_positive)]
},
"laplace": True,
"ep": False # FIXME: Should be True when we have it working again
},
#"Gaussian_log": {
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log(), variance=self.var, D=self.D, N=self.N),
#"grad_params": {
#"names": ["noise_model_variance"],
#"vals": [self.var],
#"constraints": [constrain_positive]
#},
#"laplace": True
#},
#"Gaussian_probit": {
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Probit(), variance=self.var, D=self.D, N=self.N),
#"grad_params": {
#"names": ["noise_model_variance"],
#"vals": [self.var],
#"constraints": [constrain_positive]
#},
#"laplace": True
#},
#"Gaussian_log_ex": {
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log_ex_1(), variance=self.var, D=self.D, N=self.N),
#"grad_params": {
#"names": ["noise_model_variance"],
#"vals": [self.var],
#"constraints": [constrain_positive]
#},
#"laplace": True
#},
"Bernoulli_default": {
"model": GPy.likelihoods.Bernoulli(),
"link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)],
"laplace": True,
"Y": self.binary_Y,
"ep": False # FIXME: Should be True when we have it working again
},
"Exponential_default": {
"model": GPy.likelihoods.Exponential(),
"link_f_constraints": [constrain_positive],
"Y": self.positive_Y,
"laplace": True,
},
"Poisson_default": {
"model": GPy.likelihoods.Poisson(),
"link_f_constraints": [constrain_positive],
"Y": self.integer_Y,
"laplace": True,
"ep": False #Should work though...
}#,
#GAMMA needs some work!"Gamma_default": {
#"model": GPy.likelihoods.Gamma(),
#"link_f_constraints": [constrain_positive],
#"Y": self.positive_Y,
#"laplace": True
#}
}
self.noise_models = {"Student_t_default": {
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"grad_params": {
"names": [".*t_scale2"],
"vals": [self.var],
"constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
},
"laplace": True
},
"Student_t_1_var": {
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"grad_params": {
"names": [".*t_scale2"],
"vals": [1.0],
"constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
},
"laplace": True
},
"Student_t_small_deg_free": {
"model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var),
"grad_params": {
"names": [".*t_scale2"],
"vals": [self.var],
"constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
},
"laplace": True
},
"Student_t_small_var": {
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"grad_params": {
"names": [".*t_scale2"],
"vals": [0.001],
"constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
},
"laplace": True
},
"Student_t_large_var": {
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"grad_params": {
"names": [".*t_scale2"],
"vals": [10.0],
"constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
},
"laplace": True
},
"Student_t_approx_gauss": {
"model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var),
"grad_params": {
"names": [".*t_scale2"],
"vals": [self.var],
"constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
},
"laplace": True
},
#"Student_t_log": {
#"model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var),
#"grad_params": {
#"names": [".*t_noise"],
#"vals": [self.var],
#"constraints": [(".*t_noise", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
#},
#"laplace": True
#},
"Gaussian_default": {
"model": GPy.likelihoods.Gaussian(variance=self.var),
"grad_params": {
"names": [".*variance"],
"vals": [self.var],
"constraints": [(".*variance", self.constrain_positive)]
},
"laplace": True,
"ep": False # FIXME: Should be True when we have it working again
},
"Gaussian_log": {
"model": GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var),
"grad_params": {
"names": [".*variance"],
"vals": [self.var],
"constraints": [(".*variance", self.constrain_positive)]
},
"laplace": True
},
#"Gaussian_probit": {
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Probit(), variance=self.var, D=self.D, N=self.N),
#"grad_params": {
#"names": ["noise_model_variance"],
#"vals": [self.var],
#"constraints": [constrain_positive]
#},
#"laplace": True
#},
#"Gaussian_log_ex": {
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log_ex_1(), variance=self.var, D=self.D, N=self.N),
#"grad_params": {
#"names": ["noise_model_variance"],
#"vals": [self.var],
#"constraints": [constrain_positive]
#},
#"laplace": True
#},
"Bernoulli_default": {
"model": GPy.likelihoods.Bernoulli(),
"link_f_constraints": [partial(self.constrain_bounded, lower=0, upper=1)],
"laplace": True,
"Y": self.binary_Y,
"ep": False # FIXME: Should be True when we have it working again
},
"Exponential_default": {
"model": GPy.likelihoods.Exponential(),
"link_f_constraints": [self.constrain_positive],
"Y": self.positive_Y,
"laplace": True,
},
"Poisson_default": {
"model": GPy.likelihoods.Poisson(),
"link_f_constraints": [self.constrain_positive],
"Y": self.integer_Y,
"laplace": True,
"ep": False #Should work though...
},
#,
#GAMMA needs some work!"Gamma_default": {
#"model": GPy.likelihoods.Gamma(),
#"link_f_constraints": [constrain_positive],
#"Y": self.positive_Y,
#"laplace": True
#}
}
for name, attributes in noise_models.iteritems():
####################################################
# Constraint wrappers so we can just list them off #
####################################################
def constrain_fixed(self, regex, model):
model[regex].constrain_fixed()
def constrain_negative(self, regex, model):
model[regex].constrain_negative()
def constrain_positive(self, regex, model):
model[regex].constrain_positive()
def constrain_fixed_below(self, regex, model, up_to):
model[regex][0:up_to].constrain_fixed()
def constrain_fixed_above(self, regex, model, above):
model[regex][above:].constrain_fixed()
def constrain_bounded(self, regex, model, lower, upper):
"""
Used like: partial(constrain_bounded, lower=0, upper=1)
"""
model[regex].constrain_bounded(lower, upper)
def tearDown(self):
self.Y = None
self.f = None
self.X = None
def test_scale2_models(self):
self.setUp()
for name, attributes in self.noise_models.iteritems():
model = attributes["model"]
if "grad_params" in attributes:
params = attributes["grad_params"]
@ -290,7 +309,7 @@ class TestNoiseModels(object):
param_vals = []
param_names = []
constrain_positive = []
param_constraints = [] # ??? TODO: Saul to Fix.
param_constraints = []
if "link_f_constraints" in attributes:
link_f_constraints = attributes["link_f_constraints"]
else:
@ -303,6 +322,10 @@ class TestNoiseModels(object):
f = attributes["f"].copy()
else:
f = self.f.copy()
if "Y_metadata" in attributes:
Y_metadata = attributes["Y_metadata"].copy()
else:
Y_metadata = None
if "laplace" in attributes:
laplace = attributes["laplace"]
else:
@ -317,30 +340,30 @@ class TestNoiseModels(object):
#Required by all
#Normal derivatives
yield self.t_logpdf, model, Y, f
yield self.t_dlogpdf_df, model, Y, f
yield self.t_d2logpdf_df2, model, Y, f
yield self.t_logpdf, model, Y, f, Y_metadata
yield self.t_dlogpdf_df, model, Y, f, Y_metadata
yield self.t_d2logpdf_df2, model, Y, f, Y_metadata
#Link derivatives
yield self.t_dlogpdf_dlink, model, Y, f, link_f_constraints
yield self.t_d2logpdf_dlink2, model, Y, f, link_f_constraints
yield self.t_dlogpdf_dlink, model, Y, f, Y_metadata, link_f_constraints
yield self.t_d2logpdf_dlink2, model, Y, f, Y_metadata, link_f_constraints
if laplace:
#Laplace only derivatives
yield self.t_d3logpdf_df3, model, Y, f
yield self.t_d3logpdf_dlink3, model, Y, f, link_f_constraints
yield self.t_d3logpdf_df3, model, Y, f, Y_metadata
yield self.t_d3logpdf_dlink3, model, Y, f, Y_metadata, link_f_constraints
#Params
yield self.t_dlogpdf_dparams, model, Y, f, param_vals, param_names, param_constraints
yield self.t_dlogpdf_df_dparams, model, Y, f, param_vals, param_names, param_constraints
yield self.t_d2logpdf2_df2_dparams, model, Y, f, param_vals, param_names, param_constraints
yield self.t_dlogpdf_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
yield self.t_dlogpdf_df_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
yield self.t_d2logpdf2_df2_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
#Link params
yield self.t_dlogpdf_link_dparams, model, Y, f, param_vals, param_names, param_constraints
yield self.t_dlogpdf_dlink_dparams, model, Y, f, param_vals, param_names, param_constraints
yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, param_vals, param_names, param_constraints
yield self.t_dlogpdf_link_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
yield self.t_dlogpdf_dlink_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
#laplace likelihood gradcheck
yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, Y_metadata, self.step, param_vals, param_names, param_constraints
if ep:
#ep likelihood gradcheck
yield self.t_ep_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
yield self.t_ep_fit_rbf_white, model, self.X, Y, f, Y_metadata, self.step, param_vals, param_names, param_constraints
self.tearDown()
@ -349,41 +372,41 @@ class TestNoiseModels(object):
# dpdf_df's #
#############
@with_setup(setUp, tearDown)
def t_logpdf(self, model, Y, f):
def t_logpdf(self, model, Y, f, Y_metadata):
print "\n{}".format(inspect.stack()[0][3])
print model
#print model._get_params()
np.testing.assert_almost_equal(
model.pdf(f.copy(), Y.copy()).prod(),
np.exp(model.logpdf(f.copy(), Y.copy()).sum())
model.pdf(f.copy(), Y.copy(), Y_metadata=Y_metadata).prod(),
np.exp(model.logpdf(f.copy(), Y.copy(), Y_metadata=Y_metadata).sum())
)
@with_setup(setUp, tearDown)
def t_dlogpdf_df(self, model, Y, f):
def t_dlogpdf_df(self, model, Y, f, Y_metadata):
print "\n{}".format(inspect.stack()[0][3])
self.description = "\n{}".format(inspect.stack()[0][3])
logpdf = functools.partial(np.sum(model.logpdf), y=Y)
dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y)
logpdf = functools.partial(np.sum(model.logpdf), y=Y, Y_metadata=Y_metadata)
dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y, Y_metadata=Y_metadata)
grad = GradientChecker(logpdf, dlogpdf_df, f.copy(), 'g')
grad.randomize()
print model
assert grad.checkgrad(verbose=1)
@with_setup(setUp, tearDown)
def t_d2logpdf_df2(self, model, Y, f):
def t_d2logpdf_df2(self, model, Y, f, Y_metadata):
print "\n{}".format(inspect.stack()[0][3])
dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y)
d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y)
dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y, Y_metadata=Y_metadata)
d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y, Y_metadata=Y_metadata)
grad = GradientChecker(dlogpdf_df, d2logpdf_df2, f.copy(), 'g')
grad.randomize()
print model
assert grad.checkgrad(verbose=1)
@with_setup(setUp, tearDown)
def t_d3logpdf_df3(self, model, Y, f):
def t_d3logpdf_df3(self, model, Y, f, Y_metadata):
print "\n{}".format(inspect.stack()[0][3])
d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y)
d3logpdf_df3 = functools.partial(model.d3logpdf_df3, y=Y)
d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y, Y_metadata=Y_metadata)
d3logpdf_df3 = functools.partial(model.d3logpdf_df3, y=Y, Y_metadata=Y_metadata)
grad = GradientChecker(d2logpdf_df2, d3logpdf_df3, f.copy(), 'g')
grad.randomize()
print model
@ -393,32 +416,32 @@ class TestNoiseModels(object):
# df_dparams #
##############
@with_setup(setUp, tearDown)
def t_dlogpdf_dparams(self, model, Y, f, params, params_names, param_constraints):
def t_dlogpdf_dparams(self, model, Y, f, Y_metadata, params, params_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3])
print model
assert (
dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta,
params, params_names, args=(f, Y), constraints=param_constraints,
params, params_names, args=(f, Y, Y_metadata), constraints=param_constraints,
randomize=False, verbose=True)
)
@with_setup(setUp, tearDown)
def t_dlogpdf_df_dparams(self, model, Y, f, params, params_names, param_constraints):
def t_dlogpdf_df_dparams(self, model, Y, f, Y_metadata, params, params_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3])
print model
assert (
dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta,
params, params_names, args=(f, Y), constraints=param_constraints,
params, params_names, args=(f, Y, Y_metadata), constraints=param_constraints,
randomize=False, verbose=True)
)
@with_setup(setUp, tearDown)
def t_d2logpdf2_df2_dparams(self, model, Y, f, params, params_names, param_constraints):
def t_d2logpdf2_df2_dparams(self, model, Y, f, Y_metadata, params, params_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3])
print model
assert (
dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta,
params, params_names, args=(f, Y), constraints=param_constraints,
params, params_names, args=(f, Y, Y_metadata), constraints=param_constraints,
randomize=False, verbose=True)
)
@ -426,10 +449,10 @@ class TestNoiseModels(object):
# dpdf_dlink's #
################
@with_setup(setUp, tearDown)
def t_dlogpdf_dlink(self, model, Y, f, link_f_constraints):
def t_dlogpdf_dlink(self, model, Y, f, Y_metadata, link_f_constraints):
print "\n{}".format(inspect.stack()[0][3])
logpdf = functools.partial(model.logpdf_link, y=Y)
dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y)
logpdf = functools.partial(model.logpdf_link, y=Y, Y_metadata=Y_metadata)
dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y, Y_metadata=Y_metadata)
grad = GradientChecker(logpdf, dlogpdf_dlink, f.copy(), 'g')
#Apply constraints to link_f values
@ -442,10 +465,10 @@ class TestNoiseModels(object):
assert grad.checkgrad(verbose=1)
@with_setup(setUp, tearDown)
def t_d2logpdf_dlink2(self, model, Y, f, link_f_constraints):
def t_d2logpdf_dlink2(self, model, Y, f, Y_metadata, link_f_constraints):
print "\n{}".format(inspect.stack()[0][3])
dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y)
d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y)
dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y, Y_metadata=Y_metadata)
d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y, Y_metadata=Y_metadata)
grad = GradientChecker(dlogpdf_dlink, d2logpdf_dlink2, f.copy(), 'g')
#Apply constraints to link_f values
@ -458,10 +481,10 @@ class TestNoiseModels(object):
assert grad.checkgrad(verbose=1)
@with_setup(setUp, tearDown)
def t_d3logpdf_dlink3(self, model, Y, f, link_f_constraints):
def t_d3logpdf_dlink3(self, model, Y, f, Y_metadata, link_f_constraints):
print "\n{}".format(inspect.stack()[0][3])
d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y)
d3logpdf_dlink3 = functools.partial(model.d3logpdf_dlink3, y=Y)
d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y, Y_metadata=Y_metadata)
d3logpdf_dlink3 = functools.partial(model.d3logpdf_dlink3, y=Y, Y_metadata=Y_metadata)
grad = GradientChecker(d2logpdf_dlink2, d3logpdf_dlink3, f.copy(), 'g')
#Apply constraints to link_f values
@ -477,32 +500,32 @@ class TestNoiseModels(object):
# dlink_dparams #
#################
@with_setup(setUp, tearDown)
def t_dlogpdf_link_dparams(self, model, Y, f, params, param_names, param_constraints):
def t_dlogpdf_link_dparams(self, model, Y, f, Y_metadata, params, param_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3])
print model
assert (
dparam_checkgrad(model.logpdf_link, model.dlogpdf_link_dtheta,
params, param_names, args=(f, Y), constraints=param_constraints,
params, param_names, args=(f, Y, Y_metadata), constraints=param_constraints,
randomize=False, verbose=True)
)
@with_setup(setUp, tearDown)
def t_dlogpdf_dlink_dparams(self, model, Y, f, params, param_names, param_constraints):
def t_dlogpdf_dlink_dparams(self, model, Y, f, Y_metadata, params, param_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3])
print model
assert (
dparam_checkgrad(model.dlogpdf_dlink, model.dlogpdf_dlink_dtheta,
params, param_names, args=(f, Y), constraints=param_constraints,
params, param_names, args=(f, Y, Y_metadata), constraints=param_constraints,
randomize=False, verbose=True)
)
@with_setup(setUp, tearDown)
def t_d2logpdf2_dlink2_dparams(self, model, Y, f, params, param_names, param_constraints):
def t_d2logpdf2_dlink2_dparams(self, model, Y, f, Y_metadata, params, param_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3])
print model
assert (
dparam_checkgrad(model.d2logpdf_dlink2, model.d2logpdf_dlink2_dtheta,
params, param_names, args=(f, Y), constraints=param_constraints,
params, param_names, args=(f, Y, Y_metadata), constraints=param_constraints,
randomize=False, verbose=True)
)
@ -510,14 +533,15 @@ class TestNoiseModels(object):
# laplace test #
################
@with_setup(setUp, tearDown)
def t_laplace_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints):
def t_laplace_fit_rbf_white(self, model, X, Y, f, Y_metadata, step, param_vals, param_names, constraints):
print "\n{}".format(inspect.stack()[0][3])
#Normalize
Y = Y/Y.max()
white_var = 1e-6
white_var = 1e-5
kernel = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1])
laplace_likelihood = GPy.inference.latent_function_inference.Laplace()
m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, inference_method=laplace_likelihood)
m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, Y_metadata=Y_metadata, inference_method=laplace_likelihood)
m['.*white'].constrain_fixed(white_var)
#Set constraints
@ -526,6 +550,7 @@ class TestNoiseModels(object):
print m
m.randomize()
m.randomize()
#Set params
for param_num in range(len(param_names)):
@ -545,14 +570,15 @@ class TestNoiseModels(object):
# EP test #
###########
@with_setup(setUp, tearDown)
def t_ep_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints):
def t_ep_fit_rbf_white(self, model, X, Y, f, Y_metadata, step, param_vals, param_names, constraints):
print "\n{}".format(inspect.stack()[0][3])
#Normalize
Y = Y/Y.max()
white_var = 1e-6
kernel = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1])
ep_inf = GPy.inference.latent_function_inference.EP()
m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, inference_method=ep_inf)
m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, Y_metadata=Y_metadata, inference_method=ep_inf)
m['.*white'].constrain_fixed(white_var)
for param_num in range(len(param_names)):
@ -571,8 +597,8 @@ class LaplaceTests(unittest.TestCase):
"""
def setUp(self):
self.N = 5
self.D = 3
self.N = 15
self.D = 1
self.X = np.random.rand(self.N, self.D)*10
self.real_std = 0.1
@ -636,20 +662,20 @@ class LaplaceTests(unittest.TestCase):
exact_inf = GPy.inference.latent_function_inference.ExactGaussianInference()
m1 = GPy.core.GP(X, Y.copy(), kernel=kernel1, likelihood=gauss_distr1, inference_method=exact_inf)
m1['.*white'].constrain_fixed(1e-6)
m1['.*rbf.variance'] = initial_var_guess
m1['.*rbf.variance'].constrain_bounded(1e-4, 10)
m1['.*Gaussian_noise.variance'].constrain_bounded(1e-4, 10)
m1.randomize()
gauss_distr2 = GPy.likelihoods.Gaussian(variance=initial_var_guess)
laplace_inf = GPy.inference.latent_function_inference.Laplace()
m2 = GPy.core.GP(X, Y.copy(), kernel=kernel2, likelihood=gauss_distr2, inference_method=laplace_inf)
m2['.*white'].constrain_fixed(1e-6)
m2['.*rbf.variance'].constrain_bounded(1e-4, 10)
m2['.*Gaussian_noise.variance'].constrain_bounded(1e-4, 10)
m2.randomize()
if debug:
print m1
print m2
optimizer = 'scg'
print "Gaussian"
m1.optimize(optimizer, messages=debug, ipython_notebook=False)
@ -687,8 +713,6 @@ class LaplaceTests(unittest.TestCase):
pb.scatter(X, m1.likelihood.Y, c='g')
pb.scatter(X, m2.likelihood.Y, c='r', marker='x')
#Check Y's are the same
np.testing.assert_almost_equal(m1.Y, m2.Y, decimal=5)
#Check marginals are the same

View file

@ -0,0 +1,72 @@
# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import unittest
import numpy as np
import GPy
class MappingGradChecker(GPy.core.Model):
"""
This class has everything we need to check the gradient of a mapping. It
implement a simple likelihood which is a weighted sum of the outputs of the
mapping. the gradients are checked against the parameters of the mapping
and the input.
"""
def __init__(self, mapping, X, name='map_grad_check'):
super(MappingGradChecker, self).__init__(name)
self.mapping = mapping
self.link_parameter(self.mapping)
self.X = GPy.core.Param('X',X)
self.link_parameter(self.X)
self.dL_dY = np.random.randn(self.X.shape[0], self.mapping.output_dim)
def log_likelihood(self):
return np.sum(self.mapping.f(self.X) * self.dL_dY)
def parameters_changed(self):
self.X.gradient = self.mapping.gradients_X(self.dL_dY, self.X)
self.mapping.update_gradients(self.dL_dY, self.X)
class MappingTests(unittest.TestCase):
def test_kernelmapping(self):
X = np.random.randn(100,3)
Z = np.random.randn(10,3)
mapping = GPy.mappings.Kernel(3, 2, Z, GPy.kern.RBF(3))
self.assertTrue(MappingGradChecker(mapping, X).checkgrad())
def test_linearmapping(self):
mapping = GPy.mappings.Linear(3, 2)
X = np.random.randn(100,3)
self.assertTrue(MappingGradChecker(mapping, X).checkgrad())
def test_mlpmapping(self):
mapping = GPy.mappings.MLP(input_dim=3, hidden_dim=5, output_dim=2)
X = np.random.randn(100,3)
self.assertTrue(MappingGradChecker(mapping, X).checkgrad())
def test_addmapping(self):
m1 = GPy.mappings.MLP(input_dim=3, hidden_dim=5, output_dim=2)
m2 = GPy.mappings.Linear(input_dim=3, output_dim=2)
mapping = GPy.mappings.Additive(m1, m2)
X = np.random.randn(100,3)
self.assertTrue(MappingGradChecker(mapping, X).checkgrad())
def test_compoundmapping(self):
m1 = GPy.mappings.MLP(input_dim=3, hidden_dim=5, output_dim=2)
Z = np.random.randn(10,2)
m2 = GPy.mappings.Kernel(2, 4, Z, GPy.kern.RBF(2))
mapping = GPy.mappings.Compound(m1, m2)
X = np.random.randn(100,3)
self.assertTrue(MappingGradChecker(mapping, X).checkgrad())
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

View file

@ -0,0 +1,56 @@
# Copyright (c) 2015, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import unittest
import numpy as np
import GPy
class MFtests(unittest.TestCase):
def simple_mean_function():
"""
The simplest possible mean function. No parameters, just a simple Sinusoid.
"""
#create simple mean function
mf = GPy.core.Mapping(1,1)
mf.f = np.sin
mf.update_gradients = lambda a,b: None
X = np.linspace(0,10,50).reshape(-1,1)
Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape)
k =GPy.kern.RBF(1)
lik = GPy.likelihoods.Gaussian()
m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
self.assertTrue(m.checkgrad())
def test_parametric_mean_function(self):
"""
A linear mean function with parameters that we'll learn alongside the kernel
"""
X = np.linspace(0,10,50).reshape(-1,1)
Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape) + 3*X
mf = GPy.mappings.Linear(1,1)
k =GPy.kern.RBF(1)
lik = GPy.likelihoods.Gaussian()
m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
self.assertTrue(m.checkgrad())
def test_svgp_mean_function(self):
# an instance of the SVIGOP with a men function
X = np.linspace(0,10,500).reshape(-1,1)
Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape)
Y = np.where(Y>0, 1,0) # make aclassificatino problem
mf = GPy.mappings.Linear(1,1)
Z = np.linspace(0,10,50).reshape(-1,1)
lik = GPy.likelihoods.Bernoulli()
k =GPy.kern.RBF(1) + GPy.kern.White(1, 1e-4)
m = GPy.core.SVGP(X, Y,Z=Z, kernel=k, likelihood=lik, mean_function=mf)
self.assertTrue(m.checkgrad())

18
GPy/testing/misc_tests.py Normal file
View file

@ -0,0 +1,18 @@
import numpy as np
import scipy as sp
import GPy
class MiscTests(np.testing.TestCase):
"""
Testing some utilities of misc
"""
def setUp(self):
self._lim_val = np.finfo(np.float64).max
self._lim_val_exp = np.log(self._lim_val)
def test_safe_exp_upper(self):
assert np.exp(self._lim_val_exp + 1) == np.inf
assert GPy.util.misc.safe_exp(self._lim_val_exp + 1) < np.inf
def test_safe_exp_lower(self):
assert GPy.util.misc.safe_exp(1e-10) < np.inf

54
GPy/testing/svgp_tests.py Normal file
View file

@ -0,0 +1,54 @@
import numpy as np
import scipy as sp
import GPy
class SVGP_nonconvex(np.testing.TestCase):
"""
Inference in the SVGP with a student-T likelihood
"""
def setUp(self):
X = np.linspace(0,10,100).reshape(-1,1)
Z = np.linspace(0,10,10).reshape(-1,1)
Y = np.sin(X) + np.random.randn(*X.shape)*0.1
Y[50] += 3
lik = GPy.likelihoods.StudentT(deg_free=2)
k = GPy.kern.RBF(1, lengthscale=5.) + GPy.kern.White(1, 1e-6)
self.m = GPy.core.SVGP(X, Y, Z=Z, likelihood=lik, kernel=k)
def test_grad(self):
assert self.m.checkgrad(step=1e-4)
class SVGP_classification(np.testing.TestCase):
"""
Inference in the SVGP with a Bernoulli likelihood
"""
def setUp(self):
X = np.linspace(0,10,100).reshape(-1,1)
Z = np.linspace(0,10,10).reshape(-1,1)
Y = np.where((np.sin(X) + np.random.randn(*X.shape)*0.1)>0, 1,0)
lik = GPy.likelihoods.Bernoulli()
k = GPy.kern.RBF(1, lengthscale=5.) + GPy.kern.White(1, 1e-6)
self.m = GPy.core.SVGP(X, Y, Z=Z, likelihood=lik, kernel=k)
def test_grad(self):
assert self.m.checkgrad(step=1e-4)
class SVGP_Poisson_with_meanfunction(np.testing.TestCase):
"""
Inference in the SVGP with a Bernoulli likelihood
"""
def setUp(self):
X = np.linspace(0,10,100).reshape(-1,1)
Z = np.linspace(0,10,10).reshape(-1,1)
latent_f = np.exp(0.1*X * 0.05*X**2)
Y = np.array([np.random.poisson(f) for f in latent_f.flatten()]).reshape(-1,1)
mf = GPy.mappings.Linear(1,1)
lik = GPy.likelihoods.Poisson()
k = GPy.kern.RBF(1, lengthscale=5.) + GPy.kern.White(1, 1e-6)
self.m = GPy.core.SVGP(X, Y, Z=Z, likelihood=lik, kernel=k, mean_function=mf)
def test_grad(self):
assert self.m.checkgrad(step=1e-4)

View file

@ -17,6 +17,54 @@ def get_blocks(A, blocksizes):
count_i += i
return B
def get_block_shapes(B):
assert B.dtype is np.dtype('object'), "Must be a block matrix"
return [B[b,b].shape[0] for b in range(0, B.shape[0])]
def unblock(B):
assert B.dtype is np.dtype('object'), "Must be a block matrix"
block_shapes = get_block_shapes(B)
num_elements = np.sum(block_shapes)
A = np.empty(shape=(num_elements, num_elements))
count_i = 0
for Bi, i in enumerate(block_shapes):
count_j = 0
for Bj, j in enumerate(block_shapes):
A[count_i:count_i + i, count_j:count_j + j] = B[Bi, Bj]
count_j += j
count_i += i
return A
def block_dot(A, B):
"""
Element wise dot product on block matricies
+------+------+ +------+------+ +-------+-------+
| | | | | | |A11.B11|B12.B12|
| A11 | A12 | | B11 | B12 | | | |
+------+------+ o +------+------| = +-------+-------+
| | | | | | |A21.B21|A22.B22|
| A21 | A22 | | B21 | B22 | | | |
+-------------+ +------+------+ +-------+-------+
..Note
If either (A or B) of the diagonal matrices are stored as vectors then a more
efficient dot product using numpy broadcasting will be used, i.e. A11*B11
"""
#Must have same number of blocks and be a block matrix
assert A.dtype is np.dtype('object'), "Must be a block matrix"
assert B.dtype is np.dtype('object'), "Must be a block matrix"
Ashape = A.shape
Bshape = B.shape
assert Ashape == Bshape
def f(A,B):
if Ashape[0] == Ashape[1] or Bshape[0] == Bshape[1]:
#FIXME: Careful if one is transpose of other, would make a matrix
return A*B
else:
return np.dot(A,B)
dot = np.vectorize(f, otypes = [np.object])
return dot(A,B)
if __name__=='__main__':
@ -24,3 +72,5 @@ if __name__=='__main__':
B = get_blocks(A,[2,3])
B[0,0] += 7
print B
assert np.all(unblock(B) == A)

View file

@ -96,16 +96,21 @@ def jitchol(A, maxtries=5):
num_tries = 1
while num_tries <= maxtries and np.isfinite(jitter):
try:
print jitter
L = linalg.cholesky(A + np.eye(A.shape[0]) * jitter, lower=True)
logging.warning('Added {} rounds of jitter, jitter of {:.10e}\n'.format(num_tries, jitter))
return L
except:
jitter *= 10
finally:
num_tries += 1
raise linalg.LinAlgError, "not positive definite, even with jitter."
import traceback
logging.warning('\n'.join(['Added {} rounds of jitter, jitter of {:.10e}'.format(num_tries-1, jitter),
' in '+traceback.format_list(traceback.extract_stack(limit=2)[-2:-1])[0][2:]]))
raise linalg.LinAlgError, "not positive definite, even with jitter."
try: raise
except:
logging.warning('\n'.join(['Added jitter of {:.10e}'.format(jitter),
' in '+traceback.format_list(traceback.extract_stack(limit=2)[-2:-1])[0][2:]]))
import ipdb;ipdb.set_trace()
return L
# def dtrtri(L, lower=1):
# """

View file

@ -4,6 +4,16 @@
import numpy as np
from config import *
_lim_val = np.finfo(np.float64).max
_lim_val_exp = np.log(_lim_val)
_lim_val_square = np.sqrt(_lim_val)
_lim_val_cube = np.power(_lim_val, -3)
def safe_exp(f):
clip_f = np.clip(f, -np.inf, _lim_val_exp)
return np.exp(clip_f)
def chain_1(df_dg, dg_dx):
"""
Generic chaining function for first derivative
@ -11,6 +21,11 @@ def chain_1(df_dg, dg_dx):
.. math::
\\frac{d(f . g)}{dx} = \\frac{df}{dg} \\frac{dg}{dx}
"""
if np.all(dg_dx==1.):
return df_dg
if len(df_dg) > 1 and df_dg.shape[-1] > 1:
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
raise NotImplementedError('Not implemented for matricies yet')
return df_dg * dg_dx
def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
@ -20,7 +35,13 @@ def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
.. math::
\\frac{d^{2}(f . g)}{dx^{2}} = \\frac{d^{2}f}{dg^{2}}(\\frac{dg}{dx})^{2} + \\frac{df}{dg}\\frac{d^{2}g}{dx^{2}}
"""
return d2f_dg2*(dg_dx**2) + df_dg*d2g_dx2
if np.all(dg_dx==1.) and np.all(d2g_dx2 == 0):
return d2f_dg2
if len(d2f_dg2) > 1 and d2f_dg2.shape[-1] > 1:
raise NotImplementedError('Not implemented for matricies yet')
#dg_dx_2 = np.clip(dg_dx, 1e-12, _lim_val_square)**2
dg_dx_2 = dg_dx**2
return d2f_dg2*(dg_dx_2) + df_dg*d2g_dx2
def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3):
"""
@ -29,11 +50,18 @@ def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3):
.. math::
\\frac{d^{3}(f . g)}{dx^{3}} = \\frac{d^{3}f}{dg^{3}}(\\frac{dg}{dx})^{3} + 3\\frac{d^{2}f}{dg^{2}}\\frac{dg}{dx}\\frac{d^{2}g}{dx^{2}} + \\frac{df}{dg}\\frac{d^{3}g}{dx^{3}}
"""
return d3f_dg3*(dg_dx**3) + 3*d2f_dg2*dg_dx*d2g_dx2 + df_dg*d3g_dx3
if np.all(dg_dx==1.) and np.all(d2g_dx2==0) and np.all(d3g_dx3==0):
return d3f_dg3
if ( (len(d2f_dg2) > 1 and d2f_dg2.shape[-1] > 1)
or (len(d3f_dg3) > 1 and d3f_dg3.shape[-1] > 1)):
raise NotImplementedError('Not implemented for matricies yet')
#dg_dx_3 = np.clip(dg_dx, 1e-12, _lim_val_cube)**3
dg_dx_3 = dg_dx**3
return d3f_dg3*(dg_dx_3) + 3*d2f_dg2*dg_dx*d2g_dx2 + df_dg*d3g_dx3
def opt_wrapper(m, **kwargs):
"""
This function just wraps the optimization procedure of a GPy
Thit function just wraps the optimization procedure of a GPy
object so that optimize() pickleable (necessary for multiprocessing).
"""
m.optimize(**kwargs)
@ -96,3 +124,47 @@ from :class:ndarray)"""
if len(param) == 1:
return param[0].view(np.ndarray)
return [x.view(np.ndarray) for x in param]
def blockify_hessian(func):
def wrapper_func(self, *args, **kwargs):
# Invoke the wrapped function first
retval = func(self, *args, **kwargs)
# Now do something here with retval and/or action
if self.not_block_really and (retval.shape[0] != retval.shape[1]):
return np.diagflat(retval)
else:
return retval
return wrapper_func
def blockify_third(func):
def wrapper_func(self, *args, **kwargs):
# Invoke the wrapped function first
retval = func(self, *args, **kwargs)
# Now do something here with retval and/or action
if self.not_block_really and (len(retval.shape) < 3):
num_data = retval.shape[0]
d3_block_cache = np.zeros((num_data, num_data, num_data))
diag_slice = range(num_data)
d3_block_cache[diag_slice, diag_slice, diag_slice] = np.squeeze(retval)
return d3_block_cache
else:
return retval
return wrapper_func
def blockify_dhess_dtheta(func):
def wrapper_func(self, *args, **kwargs):
# Invoke the wrapped function first
retval = func(self, *args, **kwargs)
# Now do something here with retval and/or action
if self.not_block_really and (len(retval.shape) < 3):
num_data = retval.shape[0]
num_params = retval.shape[-1]
dhess_dtheta = np.zeros((num_data, num_data, num_params))
diag_slice = range(num_data)
for param_ind in range(num_params):
dhess_dtheta[diag_slice, diag_slice, param_ind] = np.squeeze(retval[:,param_ind])
return dhess_dtheta
else:
return retval
return wrapper_func

View file

@ -1,17 +1,18 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from GPy.core.parameterization import Parameterized, Param
from ..core.parameterization.transformations import Logexp
class WarpingFunction(object):
class WarpingFunction(Parameterized):
"""
abstract function for warping
z = f(y)
"""
def __init__(self):
raise NotImplementedError
def __init__(self, name):
super(WarpingFunction, self).__init__(name=name)
def f(self,y,psi):
"""function transformation
@ -34,9 +35,10 @@ class WarpingFunction(object):
def _get_param_names(self):
raise NotImplementedError
def plot(self, psi, xmin, xmax):
def plot(self, xmin, xmax):
psi = self.psi
y = np.arange(xmin, xmax, 0.01)
f_y = self.f(y, psi)
f_y = self.f(y)
from matplotlib import pyplot as plt
plt.figure()
plt.plot(y, f_y)
@ -50,6 +52,7 @@ class TanhWarpingFunction(WarpingFunction):
"""n_terms specifies the number of tanh terms to be used"""
self.n_terms = n_terms
self.num_parameters = 3 * self.n_terms
super(TanhWarpingFunction, self).__init__(name='warp_tanh')
def f(self,y,psi):
"""
@ -163,8 +166,18 @@ class TanhWarpingFunction_d(WarpingFunction):
"""n_terms specifies the number of tanh terms to be used"""
self.n_terms = n_terms
self.num_parameters = 3 * self.n_terms + 1
self.psi = np.ones((self.n_terms, 3))
def f(self,y,psi):
super(TanhWarpingFunction_d, self).__init__(name='warp_tanh')
self.psi = Param('psi', self.psi)
self.psi[:, :2].constrain_positive()
self.d = Param('%s' % ('d'), 1.0, Logexp())
self.link_parameter(self.psi)
self.link_parameter(self.d)
def f(self,y):
"""
Transform y with f using parameter vector psi
psi = [[a,b,c]]
@ -175,9 +188,9 @@ class TanhWarpingFunction_d(WarpingFunction):
#1. check that number of params is consistent
# assert psi.shape[0] == self.n_terms, 'inconsistent parameter dimensions'
# assert psi.shape[1] == 4, 'inconsistent parameter dimensions'
mpsi = psi.copy()
d = psi[-1]
mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
d = self.d
mpsi = self.psi
#3. transform data
z = d*y.copy()
@ -187,7 +200,7 @@ class TanhWarpingFunction_d(WarpingFunction):
return z
def f_inv(self, z, psi, max_iterations=1000, y=None):
def f_inv(self, z, max_iterations=1000, y=None):
"""
calculate the numerical inverse of f
@ -198,12 +211,12 @@ class TanhWarpingFunction_d(WarpingFunction):
z = z.copy()
if y is None:
y = np.ones_like(z)
it = 0
update = np.inf
while it == 0 or (np.abs(update).sum() > 1e-10 and it < max_iterations):
update = (self.f(y, psi) - z)/self.fgrad_y(y, psi)
update = (self.f(y) - z)/self.fgrad_y(y)
y -= update
it += 1
if it == max_iterations:
@ -212,7 +225,7 @@ class TanhWarpingFunction_d(WarpingFunction):
return y
def fgrad_y(self, y, psi, return_precalc = False):
def fgrad_y(self, y,return_precalc = False):
"""
gradient of f w.r.t to y ([N x 1])
@ -221,9 +234,8 @@ class TanhWarpingFunction_d(WarpingFunction):
"""
mpsi = psi.copy()
d = psi[-1]
mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
d = self.d
mpsi = self.psi
# vectorized version
@ -240,7 +252,7 @@ class TanhWarpingFunction_d(WarpingFunction):
return GRAD
def fgrad_y_psi(self, y, psi, return_covar_chain = False):
def fgrad_y_psi(self, y, return_covar_chain = False):
"""
gradient of f w.r.t to y and psi
@ -248,10 +260,10 @@ class TanhWarpingFunction_d(WarpingFunction):
"""
mpsi = psi.copy()
mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
w, s, r, d = self.fgrad_y(y, psi, return_precalc = True)
mpsi = self.psi
w, s, r, d = self.fgrad_y(y, return_precalc = True)
gradients = np.zeros((y.shape[0], y.shape[1], len(mpsi), 4))
for i in range(len(mpsi)):