REFACTORING: model names, lowercase, classes uppercase

This commit is contained in:
Max Zwiessele 2013-06-05 13:02:03 +01:00
parent 2a39440619
commit 2e5e8ac026
50 changed files with 436 additions and 3307 deletions

View file

@ -1,150 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import linalg
import pylab as pb
from .. import kern
from ..util.linalg import pdinv, mdot, tdot
#from ..util.plot import gpplot, Tango
from ..likelihoods import EP
from gp_base import GPBase
class GP(GPBase):
"""
Gaussian Process model for regression and EP
:param X: input observations
:param kernel: a GPy kernel, defaults to rbf+white
:parm likelihood: a GPy likelihood
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:rtype: model object
:param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
:param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.]
:type powerep: list
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self, X, likelihood, kernel, normalize_X=False):
GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self._set_params(self._get_params())
def _set_params(self, p):
self.kern._set_params_transformed(p[:self.kern.Nparam_transformed()])
self.likelihood._set_params(p[self.kern.Nparam_transformed():])
self.K = self.kern.K(self.X)
self.K += self.likelihood.covariance_matrix
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
# the gradient of the likelihood wrt the covariance matrix
if self.likelihood.YYT is None:
#alpha = np.dot(self.Ki, self.likelihood.Y)
alpha,_ = linalg.lapack.flapack.dpotrs(self.L, self.likelihood.Y,lower=1)
self.dL_dK = 0.5 * (tdot(alpha) - self.D * self.Ki)
else:
#tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1)
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
self.dL_dK = 0.5 * (tmp - self.D * self.Ki)
def _get_params(self):
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
def _get_param_names(self):
return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian likelihood, no iteration is required:
this function does nothing
"""
self.likelihood.fit_full(self.kern.K(self.X))
self._set_params(self._get_params()) # update the GP
def _model_fit_term(self):
"""
Computes the model fit using YYT if it's available
"""
if self.likelihood.YYT is None:
tmp, _ = linalg.lapack.flapack.dtrtrs(self.L, np.asfortranarray(self.likelihood.Y), lower=1)
return -0.5 * np.sum(np.square(tmp))
#return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y)))
else:
return -0.5 * np.sum(np.multiply(self.Ki, self.likelihood.YYT))
def log_likelihood(self):
"""
The log marginal likelihood of the GP.
For an EP model, can be written as the log likelihood of a regression
model for a new variable Y* = v_tilde/tau_tilde, with a covariance
matrix K* = K + diag(1./tau_tilde) plus a normalization term.
"""
return -0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z
def _log_likelihood_gradients(self):
"""
The gradient of all parameters.
Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta
"""
return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False,stop=False):
"""
Internal helper function for making predictions, does not account
for normalization or likelihood
"""
Kx = self.kern.K(_Xnew,self.X,which_parts=which_parts).T
#KiKx = np.dot(self.Ki, Kx)
KiKx, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(Kx), lower=1)
mu = np.dot(KiKx.T, self.likelihood.Y)
if full_cov:
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
var = Kxx - np.dot(KiKx.T, Kx)
else:
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
var = Kxx - np.sum(np.multiply(KiKx, Kx), 0)
var = var[:, None]
if stop:
debug_this
return mu, var
def predict(self, Xnew, which_parts='all', full_cov=False):
"""
Predict the function(s) at the new point(s) Xnew.
Arguments
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.D
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.D
If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
This is to allow for different normalizations of the output dimensions.
"""
# normalize X values
Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
return mean, var, _025pm, _975pm

View file

@ -1,8 +1,8 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from GP import GP
from sparse_GP import sparse_GP
from model import *
from parameterised import *
import priors
from GPy.core.gp import GP
from GPy.core.sparse_gp import SparseGP

View file

@ -18,15 +18,15 @@ class GPBase(model.model):
self.kern = kernel
self.likelihood = likelihood
assert self.X.shape[0] == self.likelihood.data.shape[0]
self.N, self.D = self.likelihood.data.shape
self.N, self.output_dim = self.likelihood.data.shape
if normalize_X:
self._Xmean = X.mean(0)[None, :]
self._Xstd = X.std(0)[None, :]
self.X = (X.copy() - self._Xmean) / self._Xstd
else:
self._Xmean = np.zeros((1,self.input_dim))
self._Xstd = np.ones((1,self.input_dim))
self._Xmean = np.zeros((1, self.input_dim))
self._Xstd = np.ones((1, self.input_dim))
model.model.__init__(self)
@ -70,7 +70,7 @@ class GPBase(model.model):
else:
m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True)
Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None,], axes=ax)
gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
for i in range(samples):
ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
@ -108,19 +108,19 @@ class GPBase(model.model):
if self.X.shape[1] == 1:
Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now
Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
for d in range(m.shape[1]):
gpplot(Xnew, m[:,d], lower[:,d], upper[:,d],axes=ax)
ax.plot(Xu[which_data], self.likelihood.data[which_data,d], 'kx', mew=1.5)
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax)
ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5)
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 2: # FIXME
elif self.X.shape[1] == 2: # FIXME
resolution = resolution or 50
Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)

View file

@ -99,9 +99,9 @@ class MultivariateGaussian:
assert len(self.var.shape) == 2
assert self.var.shape[0] == self.var.shape[1]
assert self.var.shape[0] == self.mu.size
self.D = self.mu.size
self.input_dim = self.mu.size
self.inv, self.hld = pdinv(self.var)
self.constant = -0.5 * self.D * np.log(2 * np.pi) - self.hld
self.constant = -0.5 * self.input_dim * np.log(2 * np.pi) - self.hld
def summary(self):
raise NotImplementedError
@ -121,7 +121,7 @@ class MultivariateGaussian:
return np.random.multivariate_normal(self.mu, self.var, n)
def plot(self):
if self.D == 2:
if self.input_dim == 2:
rvs = self.rvs(200)
pb.plot(rvs[:, 0], rvs[:, 1], 'kx', mew=1.5)
xmin, xmax = pb.xlim()

View file

@ -1,304 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides, chol_inv
from scipy import linalg
from ..likelihoods import Gaussian
from gp_base import GPBase
class sparse_GP(GPBase):
"""
Variational sparse GP model
:param X: inputs
:type X: np.ndarray (N x input_dim)
:param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
:param kernel : the kernel (covariance function). See link kernels
:type kernel: a GPy.kern.kern instance
:param X_variance: The uncertainty in the measurements of X (Gaussian variance)
:type X_variance: np.ndarray (N x input_dim) | None
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x input_dim) | None
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
"""
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self.Z = Z
self.M = Z.shape[0]
self.likelihood = likelihood
if X_variance is None:
self.has_uncertain_inputs = False
else:
assert X_variance.shape == X.shape
self.has_uncertain_inputs = True
self.X_variance = X_variance
if normalize_X:
self.Z = (self.Z.copy() - self._Xmean) / self._Xstd
# normalize X uncertainty also
if self.has_uncertain_inputs:
self.X_variance /= np.square(self._Xstd)
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z, self.X, self.X_variance)
self.psi1 = self.kern.psi1(self.Z, self.X, self.X_variance).T
self.psi2 = self.kern.psi2(self.Z, self.X, self.X_variance)
else:
self.psi0 = self.kern.Kdiag(self.X)
self.psi1 = self.kern.K(self.Z, self.X)
self.psi2 = None
def _computations(self):
# factor Kmm
self.Lm = jitchol(self.Kmm)
# The rather complex computations of self.A
if self.has_uncertain_inputs:
if self.likelihood.is_heteroscedastic:
psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1))).sum(0)
else:
psi2_beta = self.psi2.sum(0) * self.likelihood.precision
evals, evecs = linalg.eigh(psi2_beta)
clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
tmp = evecs * np.sqrt(clipped_evals)
else:
if self.likelihood.is_heteroscedastic:
tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N)))
else:
tmp = self.psi1 * (np.sqrt(self.likelihood.precision))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
# factor B
self.B = np.eye(self.M) + self.A
self.LB = jitchol(self.B)
# TODO: make a switch for either first compute psi1V, or VV.T
self.psi1V = np.dot(self.psi1, self.likelihood.V)
# back substutue C into psi1V
tmp, info1 = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(tmp), lower=1, trans=0)
tmp, info2 = linalg.lapack.flapack.dpotrs(self.LB, tmp, lower=1)
self.Cpsi1V, info3 = linalg.lapack.flapack.dtrtrs(self.Lm, tmp, lower=1, trans=1)
# Compute dL_dKmm
tmp = tdot(self._LBi_Lmi_psi1V)
self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.D * np.eye(self.M) + tmp)
tmp = -0.5 * self.DBi_plus_BiPBi
tmp += -0.5 * self.B * self.D
tmp += self.D * np.eye(self.M)
self.dL_dKmm = backsub_both_sides(self.Lm, tmp)
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
self.dL_dpsi0 = -0.5 * self.D * (self.likelihood.precision * np.ones([self.N, 1])).flatten()
self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T)
dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.D * np.eye(self.M) - self.DBi_plus_BiPBi)
if self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs:
self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
else:
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, self.psi1 * self.likelihood.precision.reshape(1, self.N))
self.dL_dpsi2 = None
else:
dL_dpsi2 = self.likelihood.precision * dL_dpsi2_beta
if self.has_uncertain_inputs:
# repeat for each of the N psi_2 matrices
self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.N, axis=0)
else:
# subsume back into psi1 (==Kmn)
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2, self.psi1)
self.dL_dpsi2 = None
# the partial derivative vector for the likelihood
if self.likelihood.Nparams == 0:
# save computation here.
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented"
else:
# likelihood is not heterscedatic
self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
if self.likelihood.is_heteroscedastic:
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y)
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
else:
A = -0.5 * self.N * self.D * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + B + C + D + self.likelihood.Z
def _set_params(self, p):
self.Z = p[:self.M * self.input_dim].reshape(self.M, self.input_dim)
self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:])
self._compute_kernel_matrices()
self._computations()
def _get_params(self):
return np.hstack([self.Z.flatten(), self.kern._get_params_transformed(), self.likelihood._get_params()])
def _get_param_names(self):
return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])],[])\
+ self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian likelihood, no iteration is required:
this function does nothing
"""
if not isinstance(self.likelihood, Gaussian): # Updates not needed for Gaussian likelihood
self.likelihood.restart() # TODO check consistency with pseudo_EP
if self.has_uncertain_inputs:
Lmi = chol_inv(self.Lm)
Kmmi = tdot(Lmi.T)
diag_tr_psi2Kmmi = np.array([np.trace(psi2_Kmmi) for psi2_Kmmi in np.dot(self.psi2, Kmmi)])
self.likelihood.fit_FITC(self.Kmm, self.psi1, diag_tr_psi2Kmmi) # This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion
# raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
else:
self.likelihood.fit_DTC(self.Kmm, self.psi1)
# self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP
def _log_likelihood_gradients(self):
return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
def dL_dtheta(self):
"""
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
"""
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else:
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.Z, self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
return dL_dtheta
def dL_dZ(self):
"""
The derivative of the bound wrt the inducing inputs Z
"""
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else:
dL_dZ += self.kern.dK_dX(self.dL_dpsi1, self.Z, self.X)
return dL_dZ
def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
"""Internal helper function for making predictions, does not account for normalization"""
Bi, _ = linalg.lapack.flapack.dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work!
symmetrify(Bi)
Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.M) - Bi)
if X_variance_new is None:
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
mu = np.dot(Kx.T, self.Cpsi1V)
if full_cov:
Kxx = self.kern.K(Xnew, which_parts=which_parts)
var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
else:
# assert which_parts=='all', "swithching out parts of variational kernels is not implemented"
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts
mu = np.dot(Kx, self.Cpsi1V)
if full_cov:
raise NotImplementedError, "TODO"
else:
Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new)
psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new)
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
return mu, var[:, None]
def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
"""
Predict the function(s) at the new point(s) Xnew.
Arguments
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param X_variance_new: The uncertainty in the prediction points
:type X_variance_new: np.ndarray, Nnew x self.input_dim
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.D
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.D
If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
This is to allow for different normalizations of the output dimensions.
"""
# normalize X values
Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
if X_variance_new is not None:
X_variance_new = X_variance_new / self._Xstd ** 2
# here's the actual prediction by the GP model
mu, var = self._raw_predict(Xnew, X_variance_new, full_cov=full_cov, which_parts=which_parts)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
return mean, var, _025pm, _975pm
def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None):
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if which_data is 'all':
which_data = slice(None)
GPBase.plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax)
if self.X.shape[1] == 1:
if self.has_uncertain_inputs:
Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now
ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
Zu = self.Z * self._Xstd + self._Xmean
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
elif self.X.shape[1] == 2:
Zu = self.Z * self._Xstd + self._Xmean
ax.plot(Zu[:, 0], Zu[:, 1], 'wo')

View file

@ -5,9 +5,9 @@ import numpy as np
from matplotlib import pyplot as plt
import GPy
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
from GPy.util.datasets import swiss_roll_generated
from GPy.core.transformations import logexp
from GPy.models.bayesian_gplvm import BayesianGPLVM
default_seed = np.random.seed(123344)
@ -20,14 +20,14 @@ def BGPLVM(seed=default_seed):
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N), K, D).T
Y = np.random.multivariate_normal(np.zeros(N), K, Q).T
k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.linear(Q, ARD=True) + GPy.kern.rbf(Q, ARD=True) + GPy.kern.white(Q)
# k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
# k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M)
m = GPy.models.BayesianGPLVM(Y, Q, kernel=k, M=M)
m.constrain_positive('(rbf|bias|noise|white|S)')
# m.constrain_fixed('S', 1)
@ -105,7 +105,7 @@ def swiss_roll(optimize=True, N=1000, M=15, Q=4, sigma=.2, plot=False):
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2))
m = Bayesian_GPLVM(Y, Q, X=X, X_variance=S, M=M, Z=Z, kernel=kernel)
m = BayesianGPLVM(Y, Q, X=X, X_variance=S, M=M, Z=Z, kernel=kernel)
m.data_colors = c
m.data_t = t
@ -129,7 +129,7 @@ def BGPLVM_oil(optimize=True, N=100, Q=5, M=25, max_f_eval=4e3, plot=False, **k)
Yn = Y - Y.mean(0)
Yn /= Yn.std(0)
m = GPy.models.Bayesian_GPLVM(Yn, Q, kernel=kernel, M=M, **k)
m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, M=M, **k)
m.data_labels = data['Y'][:N].argmax(axis=1)
# m.constrain('variance|leng', logexp_clipped())
@ -234,7 +234,7 @@ def bgplvm_simulation_matlab_compare():
from GPy import kern
reload(mrd); reload(kern)
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k,
m = BayesianGPLVM(Y, Q, init="PCA", M=M, kernel=k,
# X=mu,
# X_variance=S,
_debug=False)
@ -259,7 +259,7 @@ def bgplvm_simulation(optimize='scg',
Y = Ylist[0]
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q)
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True)
m = BayesianGPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True)
# m.constrain('variance|noise', logexp_clipped())
m.ensure_default_constraints()
m['noise'] = Y.var() / 100.
@ -285,7 +285,7 @@ def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw):
reload(mrd); reload(kern)
k = kern.linear(Q, [.05] * Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
m = mrd.MRD(Ylist, Q=Q, M=M, kernels=k, initx="", initz='permute', **kw)
m = mrd.MRD(Ylist, input_dim=Q, M=M, kernels=k, initx="", initz='permute', **kw)
for i, Y in enumerate(Ylist):
m['{}_noise'.format(i + 1)] = Y.var() / 100.
@ -313,7 +313,7 @@ def brendan_faces():
Yn /= Yn.std()
m = GPy.models.GPLVM(Yn, Q)
# m = GPy.models.Bayesian_GPLVM(Yn, Q, M=100)
# m = GPy.models.BayesianGPLVM(Yn, Q, M=100)
# optimize
m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped())
@ -380,7 +380,7 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True):
# M = 30
#
# kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
# m = GPy.models.Bayesian_GPLVM(X, Q, kernel=kernel, M=M)
# m = GPy.models.BayesianGPLVM(X, Q, kernel=kernel, M=M)
# # m.scale_factor = 100.0
# m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)')
# from sklearn import cluster

View file

@ -15,7 +15,7 @@ def toy_rbf_1d(max_nb_eval_optim=100):
data = GPy.util.datasets.toy_rbf_1d()
# create simple GP model
m = GPy.models.GP_regression(data['X'],data['Y'])
m = GPy.models.GPRegression(data['X'],data['Y'])
# optimize
m.ensure_default_constraints()
@ -30,7 +30,7 @@ def rogers_girolami_olympics(max_nb_eval_optim=100):
data = GPy.util.datasets.rogers_girolami_olympics()
# create simple GP model
m = GPy.models.GP_regression(data['X'],data['Y'])
m = GPy.models.GPRegression(data['X'],data['Y'])
#set the lengthscale to be something sensible (defaults to 1)
m['rbf_lengthscale'] = 10
@ -49,7 +49,7 @@ def toy_rbf_1d_50(max_nb_eval_optim=100):
data = GPy.util.datasets.toy_rbf_1d_50()
# create simple GP model
m = GPy.models.GP_regression(data['X'],data['Y'])
m = GPy.models.GPRegression(data['X'],data['Y'])
# optimize
m.ensure_default_constraints()
@ -65,7 +65,7 @@ def silhouette(max_nb_eval_optim=100):
data = GPy.util.datasets.silhouette()
# create simple GP model
m = GPy.models.GP_regression(data['X'],data['Y'])
m = GPy.models.GPRegression(data['X'],data['Y'])
# optimize
m.ensure_default_constraints()
@ -87,9 +87,9 @@ def coregionalisation_toy2(max_nb_eval_optim=100):
Y = np.vstack((Y1,Y2))
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
k2 = GPy.kern.coregionalise(2,1)
k2 = GPy.kern.Coregionalise(2,1)
k = k1.prod(k2,tensor=True)
m = GPy.models.GP_regression(X,Y,kernel=k)
m = GPy.models.GPRegression(X,Y,kernel=k)
m.constrain_fixed('.*rbf_var',1.)
#m.constrain_positive('.*kappa')
m.ensure_default_constraints()
@ -119,9 +119,9 @@ def coregionalisation_toy(max_nb_eval_optim=100):
Y = np.vstack((Y1,Y2))
k1 = GPy.kern.rbf(1)
k2 = GPy.kern.coregionalise(2,2)
k2 = GPy.kern.Coregionalise(2,2)
k = k1.prod(k2,tensor=True)
m = GPy.models.GP_regression(X,Y,kernel=k)
m = GPy.models.GPRegression(X,Y,kernel=k)
m.constrain_fixed('.*rbf_var',1.)
#m.constrain_positive('kappa')
m.ensure_default_constraints()
@ -155,10 +155,10 @@ def coregionalisation_sparse(max_nb_eval_optim=100):
Z = np.hstack((np.random.rand(M,1)*8,np.random.randint(0,2,M)[:,None]))
k1 = GPy.kern.rbf(1)
k2 = GPy.kern.coregionalise(2,2)
k2 = GPy.kern.Coregionalise(2,2)
k = k1.prod(k2,tensor=True) + GPy.kern.white(2,0.001)
m = GPy.models.sparse_GP_regression(X,Y,kernel=k,Z=Z)
m = GPy.models.SparseGPRegression(X,Y,kernel=k,Z=Z)
m.scale_factor = 10000.
m.constrain_fixed('.*rbf_var',1.)
#m.constrain_positive('kappa')
@ -213,7 +213,7 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
for i in range(0, model_restarts):
kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) + GPy.kern.white(1,variance=np.random.exponential(1.))
m = GPy.models.GP_regression(data['X'],data['Y'], kernel=kern)
m = GPy.models.GPRegression(data['X'],data['Y'], kernel=kern)
optim_point_x[0] = m.get('rbf_lengthscale')
optim_point_y[0] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance'));
@ -260,7 +260,7 @@ def _contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf
kernel = signal_kernel_call(1, variance=signal_var, lengthscale=length_scale) + GPy.kern.white(1, variance=noise_var)
model = GPy.models.GP_regression(data['X'], data['Y'], kernel=kernel)
model = GPy.models.GPRegression(data['X'], data['Y'], kernel=kernel)
model.constrain_positive('')
length_scale_lls.append(model.log_likelihood())
lls.append(length_scale_lls)
@ -276,7 +276,7 @@ def sparse_GP_regression_1D(N = 400, M = 5, max_nb_eval_optim=100):
noise = GPy.kern.white(1)
kernel = rbf + noise
# create simple GP model
m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
m = GPy.models.SparseGPRegression(X, Y, kernel, M=M)
m.ensure_default_constraints()
@ -296,7 +296,7 @@ def sparse_GP_regression_2D(N = 400, M = 50, max_nb_eval_optim=100):
kernel = rbf + noise
# create simple GP model
m = GPy.models.sparse_GP_regression(X,Y,kernel, M = M)
m = GPy.models.SparseGPRegression(X,Y,kernel, M = M)
# contrain all parameters to be positive (but not inducing inputs)
m.ensure_default_constraints()
@ -325,7 +325,7 @@ def uncertain_inputs_sparse_regression(max_nb_eval_optim=100):
k = GPy.kern.rbf(1) + GPy.kern.white(1)
# create simple GP model - no input uncertainty on this one
m = GPy.models.sparse_GP_regression(X, Y, kernel=k, Z=Z)
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z)
m.ensure_default_constraints()
m.optimize('scg', messages=1, max_f_eval=max_nb_eval_optim)
m.plot(ax=axes[0])
@ -333,7 +333,7 @@ def uncertain_inputs_sparse_regression(max_nb_eval_optim=100):
#the same model with uncertainty
m = GPy.models.sparse_GP_regression(X, Y, kernel=k, Z=Z, X_variance=S)
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z, X_variance=S)
m.ensure_default_constraints()
m.optimize('scg', messages=1, max_f_eval=max_nb_eval_optim)
m.plot(ax=axes[1])

View file

@ -1,169 +0,0 @@
# Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012)
# Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
# HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import numpy as np
import sys
def print_out(len_maxiters, display, fnow, current_grad, beta, iteration):
if display:
print '\r',
print '{0:>0{mi}g} {1:> 12e} {2:> 12e} {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len_maxiters), # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush()
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=None, ftol=None, gtol=None):
"""
Optimisation through Scaled Conjugate Gradients (SCG)
f: the objective function
gradf : the gradient function (should return a 1D np.ndarray)
x : the initial condition
Returns
x the optimal value for x
flog : a list of all the objective values
function_eval number of fn evaluations
status: string describing convergence status
"""
if xtol is None:
xtol = 1e-6
if ftol is None:
ftol = 1e-6
if gtol is None:
gtol = 1e-5
sigma0 = 1.0e-8
fold = f(x, *optargs) # Initial function value.
function_eval = 1
fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient.
current_grad = np.dot(gradnew, gradnew)
gradold = gradnew.copy()
d = -gradnew # Initial search direction.
success = True # Force calculation of directional derivs.
nsuccess = 0 # nsuccess counts number of successes.
beta = 1.0 # Initial scale parameter.
betamin = 1.0e-60 # Lower bound on scale.
betamax = 1.0e100 # Upper bound on scale.
status = "Not converged"
flog = [fold]
iteration = 0
len_maxiters = len(str(maxiters))
if display:
print ' {0:{mi}s} {1:11s} {2:11s} {3:11s}'.format("I", "F", "Scale", "|g|", mi=len_maxiters)
# Main optimization loop.
while iteration < maxiters:
# Calculate first and second directional derivatives.
if success:
mu = np.dot(d, gradnew)
if mu >= 0:
d = -gradnew
mu = np.dot(d, gradnew)
kappa = np.dot(d, d)
sigma = sigma0 / np.sqrt(kappa)
xplus = x + sigma * d
gplus = gradf(xplus, *optargs)
theta = np.dot(d, (gplus - gradnew)) / sigma
# Increase effective curvature and evaluate step size alpha.
delta = theta + beta * kappa
if delta <= 0:
delta = beta * kappa
beta = beta - theta / kappa
alpha = -mu / delta
# Calculate the comparison ratio.
xnew = x + alpha * d
fnew = f(xnew, *optargs)
function_eval += 1
if function_eval >= max_f_eval:
status = "Maximum number of function evaluations exceeded"
break
# return x, flog, function_eval, status
Delta = 2.*(fnew - fold) / (alpha * mu)
if Delta >= 0.:
success = True
nsuccess += 1
x = xnew
fnow = fnew
else:
success = False
fnow = fold
# Store relevant variables
flog.append(fnow) # Current function value
iteration += 1
print_out(len_maxiters, display, fnow, current_grad, beta, iteration)
if success:
# Test for termination
if (np.max(np.abs(alpha * d)) < xtol) or (np.abs(fnew - fold) < ftol):
status = 'converged'
break
# return x, flog, function_eval, status
else:
# Update variables for new position
gradnew = gradf(x, *optargs)
current_grad = np.dot(gradnew, gradnew)
gradold = gradnew
fold = fnew
# If the gradient is zero then we are done.
if current_grad <= gtol:
status = 'converged'
break
# return x, flog, function_eval, status
# Adjust beta according to comparison ratio.
if Delta < 0.25:
beta = min(4.0 * beta, betamax)
if Delta > 0.75:
beta = max(0.5 * beta, betamin)
# Update search direction using Polak-Ribiere formula, or re-start
# in direction of negative gradient after nparams steps.
if nsuccess == x.size:
d = -gradnew
# beta = 1. # TODO: betareset!!
nsuccess = 0
elif success:
Gamma = np.dot(gradold - gradnew, gradnew) / (mu)
d = Gamma * d - gradnew
else:
# If we get here, then we haven't terminated in the given number of
# iterations.
status = "maxiter exceeded"
if display:
print_out(len_maxiters, display, fnow, current_grad, beta, iteration)
print ""
return x, flog, function_eval, status

View file

@ -1,356 +0,0 @@
import numpy as np
import scipy as sp
import scipy.sparse
from optimization import Optimizer
from scipy import linalg, optimize
import pylab as plt
import copy, sys, pickle
class opt_SGD(Optimizer):
"""
Optimize using stochastic gradient descent.
*** Parameters ***
model: reference to the model object
iterations: number of iterations
learning_rate: learning rate
momentum: momentum
"""
def __init__(self, start, iterations = 10, learning_rate = 1e-4, momentum = 0.9, model = None, messages = False, batch_size = 1, self_paced = False, center = True, iteration_file = None, learning_rate_adaptation=None, actual_iter=None, schedule=None, **kwargs):
self.opt_name = "Stochastic Gradient Descent"
self.model = model
self.iterations = iterations
self.momentum = momentum
self.learning_rate = learning_rate
self.x_opt = None
self.f_opt = None
self.messages = messages
self.batch_size = batch_size
self.self_paced = self_paced
self.center = center
self.param_traces = [('noise',[])]
self.iteration_file = iteration_file
self.learning_rate_adaptation = learning_rate_adaptation
self.actual_iter = actual_iter
if self.learning_rate_adaptation != None:
if self.learning_rate_adaptation == 'annealing':
self.learning_rate_0 = self.learning_rate
else:
self.learning_rate_0 = self.learning_rate.mean()
self.schedule = schedule
# if len([p for p in self.model.kern.parts if p.name == 'bias']) == 1:
# self.param_traces.append(('bias',[]))
# if len([p for p in self.model.kern.parts if p.name == 'linear']) == 1:
# self.param_traces.append(('linear',[]))
# if len([p for p in self.model.kern.parts if p.name == 'rbf']) == 1:
# self.param_traces.append(('rbf_var',[]))
self.param_traces = dict(self.param_traces)
self.fopt_trace = []
num_params = len(self.model._get_params())
if isinstance(self.learning_rate, float):
self.learning_rate = np.ones((num_params,)) * self.learning_rate
assert (len(self.learning_rate) == num_params), "there must be one learning rate per parameter"
def __str__(self):
status = "\nOptimizer: \t\t\t %s\n" % self.opt_name
status += "f(x_opt): \t\t\t %.4f\n" % self.f_opt
status += "Number of iterations: \t\t %d\n" % self.iterations
status += "Learning rate: \t\t\t max %.3f, min %.3f\n" % (self.learning_rate.max(), self.learning_rate.min())
status += "Momentum: \t\t\t %.3f\n" % self.momentum
status += "Batch size: \t\t\t %d\n" % self.batch_size
status += "Time elapsed: \t\t\t %s\n" % self.time
return status
def plot_traces(self):
plt.figure()
plt.subplot(211)
plt.title('Parameters')
for k in self.param_traces.keys():
plt.plot(self.param_traces[k], label=k)
plt.legend(loc=0)
plt.subplot(212)
plt.title('Objective function')
plt.plot(self.fopt_trace)
def non_null_samples(self, data):
return (np.isnan(data).sum(axis=1) == 0)
def check_for_missing(self, data):
if sp.sparse.issparse(self.model.likelihood.Y):
return True
else:
return np.isnan(data).sum() > 0
def subset_parameter_vector(self, x, samples, param_shapes):
subset = np.array([], dtype = int)
x = np.arange(0, len(x))
i = 0
for s in param_shapes:
N, input_dim = s
X = x[i:i+N*input_dim].reshape(N, input_dim)
X = X[samples]
subset = np.append(subset, X.flatten())
i += N*input_dim
subset = np.append(subset, x[i:])
return subset
def shift_constraints(self, j):
constrained_indices = copy.deepcopy(self.model.constrained_indices)
for c, constraint in enumerate(constrained_indices):
mask = (np.ones_like(constrained_indices[c]) == 1)
for i in range(len(constrained_indices[c])):
pos = np.where(j == constrained_indices[c][i])[0]
if len(pos) == 1:
self.model.constrained_indices[c][i] = pos
else:
mask[i] = False
self.model.constrained_indices[c] = self.model.constrained_indices[c][mask]
return constrained_indices
# back them up
# bounded_i = copy.deepcopy(self.model.constrained_bounded_indices)
# bounded_l = copy.deepcopy(self.model.constrained_bounded_lowers)
# bounded_u = copy.deepcopy(self.model.constrained_bounded_uppers)
# for b in range(len(bounded_i)): # for each group of constraints
# for bc in range(len(bounded_i[b])):
# pos = np.where(j == bounded_i[b][bc])[0]
# if len(pos) == 1:
# pos2 = np.where(self.model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0]
# self.model.constrained_bounded_indices[b][pos2] = pos[0]
# else:
# if len(self.model.constrained_bounded_indices[b]) == 1:
# # if it's the last index to be removed
# # the logic here is just a mess. If we remove the last one, then all the
# # b-indices change and we have to iterate through everything to find our
# # current index. Can't deal with this right now.
# raise NotImplementedError
# else: # just remove it from the indices
# mask = self.model.constrained_bounded_indices[b] != bc
# self.model.constrained_bounded_indices[b] = self.model.constrained_bounded_indices[b][mask]
# # here we shif the positive constraints. We cycle through each positive
# # constraint
# positive = self.model.constrained_positive_indices.copy()
# mask = (np.ones_like(positive) == 1)
# for p in range(len(positive)):
# # we now check whether the constrained index appears in the j vector
# # (the vector of the "active" indices)
# pos = np.where(j == self.model.constrained_positive_indices[p])[0]
# if len(pos) == 1:
# self.model.constrained_positive_indices[p] = pos
# else:
# mask[p] = False
# self.model.constrained_positive_indices = self.model.constrained_positive_indices[mask]
# return (bounded_i, bounded_l, bounded_u), positive
def restore_constraints(self, c):#b, p):
# self.model.constrained_bounded_indices = b[0]
# self.model.constrained_bounded_lowers = b[1]
# self.model.constrained_bounded_uppers = b[2]
# self.model.constrained_positive_indices = p
self.model.constrained_indices = c
def get_param_shapes(self, N = None, input_dim = None):
model_name = self.model.__class__.__name__
if model_name == 'GPLVM':
return [(N, input_dim)]
if model_name == 'Bayesian_GPLVM':
return [(N, input_dim), (N, input_dim)]
else:
raise NotImplementedError
def step_with_missing_data(self, f_fp, X, step, shapes):
N, input_dim = X.shape
if not sp.sparse.issparse(self.model.likelihood.Y):
Y = self.model.likelihood.Y
samples = self.non_null_samples(self.model.likelihood.Y)
self.model.N = samples.sum()
Y = Y[samples]
else:
samples = self.model.likelihood.Y.nonzero()[0]
self.model.N = len(samples)
Y = np.asarray(self.model.likelihood.Y[samples].todense(), dtype = np.float64)
if self.model.N == 0 or Y.std() == 0.0:
return 0, step, self.model.N
self.model.likelihood._offset = Y.mean()
self.model.likelihood._scale = Y.std()
self.model.likelihood.set_data(Y)
# self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision
sigma = self.model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache
self.model.likelihood._set_params(sigma)
j = self.subset_parameter_vector(self.x_opt, samples, shapes)
self.model.X = X[samples]
model_name = self.model.__class__.__name__
if model_name == 'Bayesian_GPLVM':
self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T)
self.model.likelihood.trYYT = np.trace(self.model.likelihood.YYT)
ci = self.shift_constraints(j)
f, fp = f_fp(self.x_opt[j])
step[j] = self.momentum * step[j] + self.learning_rate[j] * fp
self.x_opt[j] -= step[j]
self.restore_constraints(ci)
self.model.grads[j] = fp
# restore likelihood _offset and _scale, otherwise when we call set_data(y) on
# the next feature, it will get normalized with the mean and std of this one.
self.model.likelihood._offset = 0
self.model.likelihood._scale = 1
return f, step, self.model.N
def adapt_learning_rate(self, t, D):
if self.learning_rate_adaptation == 'adagrad':
if t > 0:
g_k = self.model.grads
self.s_k += np.square(g_k)
t0 = 100.0
self.learning_rate = 0.1/(t0 + np.sqrt(self.s_k))
import pdb; pdb.set_trace()
else:
self.learning_rate = np.zeros_like(self.learning_rate)
self.s_k = np.zeros_like(self.x_opt)
elif self.learning_rate_adaptation == 'annealing':
#self.learning_rate = self.learning_rate_0/(1+float(t+1)/10)
self.learning_rate = np.ones_like(self.learning_rate) * self.schedule[t]
elif self.learning_rate_adaptation == 'semi_pesky':
if self.model.__class__.__name__ == 'Bayesian_GPLVM':
g_t = self.model.grads
if t == 0:
self.hbar_t = 0.0
self.tau_t = 100.0
self.gbar_t = 0.0
self.gbar_t = (1-1/self.tau_t)*self.gbar_t + 1/self.tau_t * g_t
self.hbar_t = (1-1/self.tau_t)*self.hbar_t + 1/self.tau_t * np.dot(g_t.T, g_t)
self.learning_rate = np.ones_like(self.learning_rate)*(np.dot(self.gbar_t.T, self.gbar_t) / self.hbar_t)
tau_t = self.tau_t*(1-self.learning_rate) + 1
def opt(self, f_fp=None, f=None, fp=None):
self.x_opt = self.model._get_params_transformed()
self.grads = []
X, Y = self.model.X.copy(), self.model.likelihood.Y.copy()
self.model.likelihood.YYT = 0
self.model.likelihood.trYYT = 0
self.model.likelihood._offset = 0.0
self.model.likelihood._scale = 1.0
N, input_dim = self.model.X.shape
D = self.model.likelihood.Y.shape[1]
num_params = self.model._get_params()
self.trace = []
missing_data = self.check_for_missing(self.model.likelihood.Y)
step = np.zeros_like(num_params)
for it in range(self.iterations):
if self.actual_iter != None:
it = self.actual_iter
self.model.grads = np.zeros_like(self.x_opt) # TODO this is ugly
if it == 0 or self.self_paced is False:
features = np.random.permutation(Y.shape[1])
else:
features = np.argsort(NLL)
b = len(features)/self.batch_size
features = [features[i::b] for i in range(b)]
NLL = []
import pylab as plt
for count, j in enumerate(features):
self.model.D = len(j)
self.model.likelihood.D = len(j)
self.model.likelihood.set_data(Y[:, j])
# self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision
sigma = self.model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache
self.model.likelihood._set_params(sigma)
if missing_data:
shapes = self.get_param_shapes(N, input_dim)
f, step, Nj = self.step_with_missing_data(f_fp, X, step, shapes)
else:
self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T)
self.model.likelihood.trYYT = np.trace(self.model.likelihood.YYT)
Nj = N
f, fp = f_fp(self.x_opt)
self.model.grads = fp.copy()
step = self.momentum * step + self.learning_rate * fp
self.x_opt -= step
if self.messages == 2:
noise = self.model.likelihood._variance
status = "evaluating {feature: 5d}/{tot: 5d} \t f: {f: 2.3f} \t non-missing: {nm: 4d}\t noise: {noise: 2.4f}\r".format(feature = count, tot = len(features), f = f, nm = Nj, noise = noise)
sys.stdout.write(status)
sys.stdout.flush()
self.param_traces['noise'].append(noise)
self.adapt_learning_rate(it+count, D)
NLL.append(f)
self.fopt_trace.append(NLL[-1])
# fig = plt.figure('traces')
# plt.clf()
# plt.plot(self.param_traces['noise'])
# for k in self.param_traces.keys():
# self.param_traces[k].append(self.model.get(k)[0])
self.grads.append(self.model.grads.tolist())
# should really be a sum(), but earlier samples in the iteration will have a very crappy ll
self.f_opt = np.mean(NLL)
self.model.N = N
self.model.X = X
self.model.D = D
self.model.likelihood.N = N
self.model.likelihood.D = D
self.model.likelihood.Y = Y
sigma = self.model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache
self.model.likelihood._set_params(sigma)
self.trace.append(self.f_opt)
if self.iteration_file is not None:
f = open(self.iteration_file + "iteration%d.pickle" % it, 'w')
data = [self.x_opt, self.fopt_trace, self.param_traces]
pickle.dump(data, f)
f.close()
if self.messages != 0:
sys.stdout.write('\r' + ' '*len(status)*2 + ' \r')
status = "SGD Iteration: {0: 3d}/{1: 3d} f: {2: 2.3f} max eta: {3: 1.5f}\n".format(it+1, self.iterations, self.f_opt, self.learning_rate.max())
sys.stdout.write(status)
sys.stdout.flush()

View file

@ -1,18 +1,16 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import pdb
import pylab as pb
import datetime as dt
from scipy import optimize
import numpy as np
try:
import rasmussens_minimize as rasm
rasm_available = True
except ImportError:
rasm_available = False
from SCG import SCG
from scg import SCG
class Optimizer():
"""
@ -51,9 +49,9 @@ class Optimizer():
start = dt.datetime.now()
self.opt(**kwargs)
end = dt.datetime.now()
self.time = str(end-start)
self.time = str(end - start)
def opt(self, f_fp = None, f = None, fp = None):
def opt(self, f_fp=None, f=None, fp=None):
raise NotImplementedError, "this needs to be implemented to use the optimizer class"
def plot(self):
@ -78,7 +76,7 @@ class opt_tnc(Optimizer):
Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "TNC (Scipy implementation)"
def opt(self, f_fp = None, f = None, fp = None):
def opt(self, f_fp=None, f=None, fp=None):
"""
Run the TNC optimizer
@ -96,8 +94,8 @@ class opt_tnc(Optimizer):
if self.gtol is not None:
opt_dict['pgtol'] = self.gtol
opt_result = optimize.fmin_tnc(f_fp, self.x_init, messages = self.messages,
maxfun = self.max_f_eval, **opt_dict)
opt_result = optimize.fmin_tnc(f_fp, self.x_init, messages=self.messages,
maxfun=self.max_f_eval, **opt_dict)
self.x_opt = opt_result[0]
self.f_opt = f_fp(self.x_opt)[0]
self.funct_eval = opt_result[1]
@ -108,7 +106,7 @@ class opt_lbfgsb(Optimizer):
Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "L-BFGS-B (Scipy implementation)"
def opt(self, f_fp = None, f = None, fp = None):
def opt(self, f_fp=None, f=None, fp=None):
"""
Run the optimizer
@ -130,8 +128,8 @@ class opt_lbfgsb(Optimizer):
if self.gtol is not None:
opt_dict['pgtol'] = self.gtol
opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint = iprint,
maxfun = self.max_f_eval, **opt_dict)
opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint=iprint,
maxfun=self.max_f_eval, **opt_dict)
self.x_opt = opt_result[0]
self.f_opt = f_fp(self.x_opt)[0]
self.funct_eval = opt_result[2]['funcalls']
@ -142,12 +140,12 @@ class opt_simplex(Optimizer):
Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "Nelder-Mead simplex routine (via Scipy)"
def opt(self, f_fp = None, f = None, fp = None):
def opt(self, f_fp=None, f=None, fp=None):
"""
The simplex optimizer does not require gradients.
"""
statuses = ['Converged', 'Maximum number of function evaluations made','Maximum number of iterations reached']
statuses = ['Converged', 'Maximum number of function evaluations made', 'Maximum number of iterations reached']
opt_dict = {}
if self.xtol is not None:
@ -157,8 +155,8 @@ class opt_simplex(Optimizer):
if self.gtol is not None:
print "WARNING: simplex doesn't have an gtol arg, so I'm going to ignore it"
opt_result = optimize.fmin(f, self.x_init, (), disp = self.messages,
maxfun = self.max_f_eval, full_output=True, **opt_dict)
opt_result = optimize.fmin(f, self.x_init, (), disp=self.messages,
maxfun=self.max_f_eval, full_output=True, **opt_dict)
self.x_opt = opt_result[0]
self.f_opt = opt_result[1]
@ -172,7 +170,7 @@ class opt_rasm(Optimizer):
Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "Rasmussen's Conjugate Gradient"
def opt(self, f_fp = None, f = None, fp = None):
def opt(self, f_fp=None, f=None, fp=None):
"""
Run Rasmussen's Conjugate Gradient optimizer
"""
@ -189,8 +187,8 @@ class opt_rasm(Optimizer):
if self.gtol is not None:
print "WARNING: minimize doesn't have an gtol arg, so I'm going to ignore it"
opt_result = rasm.minimize(self.x_init, f_fp, (), messages = self.messages,
maxnumfuneval = self.max_f_eval)
opt_result = rasm.minimize(self.x_init, f_fp, (), messages=self.messages,
maxnumfuneval=self.max_f_eval)
self.x_opt = opt_result[0]
self.f_opt = opt_result[1][-1]
self.funct_eval = opt_result[2]
@ -203,7 +201,7 @@ class opt_SCG(Optimizer):
Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "Scaled Conjugate Gradients"
def opt(self, f_fp = None, f = None, fp = None):
def opt(self, f_fp=None, f=None, fp=None):
assert not f is None
assert not fp is None
opt_result = SCG(f, fp, self.x_init, display=self.messages,
@ -218,7 +216,7 @@ class opt_SCG(Optimizer):
self.status = opt_result[3]
def get_optimizer(f_min):
from SGD import opt_SGD
from sgd import opt_SGD
optimizers = {'fmin_tnc': opt_tnc,
'simplex': opt_simplex,

View file

@ -13,14 +13,14 @@ class Brownian(kernpart):
"""
Brownian Motion kernel.
:param D: the number of input dimensions
:type D: int
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance:
:type variance: float
"""
def __init__(self,D,variance=1.):
self.D = D
assert self.D==1, "Brownian motion in 1D only"
def __init__(self,input_dim,variance=1.):
self.input_dim = input_dim
assert self.input_dim==1, "Brownian motion in 1D only"
self.Nparam = 1.
self.name = 'Brownian'
self._set_params(np.array([variance]).flatten())

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, Coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
try:
from constructors import rbf_sympy, sympykern # these depend on sympy
except:

View file

@ -7,14 +7,14 @@ import numpy as np
import hashlib
class bias(kernpart):
def __init__(self,D,variance=1.):
def __init__(self,input_dim,variance=1.):
"""
:param D: the number of input dimensions
:type D: int
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
self.D = D
self.input_dim = input_dim
self.Nparam = 1
self.name = 'bias'
self._set_params(np.array([variance]).flatten())

View file

@ -21,7 +21,7 @@ from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part
from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part
from prod import prod as prodpart
from symmetric import symmetric as symmetric_part
from coregionalise import coregionalise as coregionalise_part
from coregionalise import Coregionalise as coregionalise_part
from rational_quadratic import rational_quadratic as rational_quadraticpart
from rbfcos import rbfcos as rbfcospart
from independent_outputs import independent_outputs as independent_output_part
@ -33,8 +33,8 @@ def rbf(D,variance=1., lengthscale=None,ARD=False):
"""
Construct an RBF kernel
:param D: dimensionality of the kernel, obligatory
:type D: int
:param input_dim: dimensionality of the kernel, obligatory
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
@ -51,7 +51,7 @@ def linear(D,variances=None,ARD=False):
Arguments
---------
D (int), obligatory
input_dimD (int), obligatory
variances (np.ndarray)
ARD (boolean)
"""
@ -64,7 +64,7 @@ def white(D,variance=1.):
Arguments
---------
D (int), obligatory
input_dimD (int), obligatory
variance (float)
"""
part = whitepart(D,variance)
@ -74,8 +74,8 @@ def exponential(D,variance=1., lengthscale=None, ARD=False):
"""
Construct an exponential kernel
:param D: dimensionality of the kernel, obligatory
:type D: int
:param input_dim: dimensionality of the kernel, obligatory
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
@ -90,8 +90,8 @@ def Matern32(D,variance=1., lengthscale=None, ARD=False):
"""
Construct a Matern 3/2 kernel.
:param D: dimensionality of the kernel, obligatory
:type D: int
:param input_dim: dimensionality of the kernel, obligatory
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
@ -106,8 +106,8 @@ def Matern52(D,variance=1., lengthscale=None, ARD=False):
"""
Construct a Matern 5/2 kernel.
:param D: dimensionality of the kernel, obligatory
:type D: int
:param input_dim: dimensionality of the kernel, obligatory
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
@ -124,7 +124,7 @@ def bias(D,variance=1.):
Arguments
---------
D (int), obligatory
input_dim (int), obligatory
variance (float)
"""
part = biaspart(D,variance)
@ -133,7 +133,7 @@ def bias(D,variance=1.):
def finite_dimensional(D,F,G,variances=1.,weights=None):
"""
Construct a finite dimensional kernel.
D: int - the number of input dimensions
input_dim: int - the number of input dimensions
F: np.array of functions with shape (n,) - the n basis functions
G: np.array with shape (n,n) - the Gram matrix associated to F
variances : np.ndarray with shape (n,)
@ -145,8 +145,8 @@ def spline(D,variance=1.):
"""
Construct a spline kernel.
:param D: Dimensionality of the kernel
:type D: int
:param input_dim: Dimensionality of the kernel
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
@ -157,8 +157,8 @@ def Brownian(D,variance=1.):
"""
Construct a Brownian motion kernel.
:param D: Dimensionality of the kernel
:type D: int
:param input_dim: Dimensionality of the kernel
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
@ -204,8 +204,8 @@ def periodic_exponential(D=1,variance=1., lengthscale=None, period=2*np.pi,n_fre
"""
Construct an periodic exponential kernel
:param D: dimensionality, only defined for D=1
:type D: int
:param input_dim: dimensionality, only defined for input_dim=1
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
@ -222,8 +222,8 @@ def periodic_Matern32(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,
"""
Construct a periodic Matern 3/2 kernel.
:param D: dimensionality, only defined for D=1
:type D: int
:param input_dim: dimensionality, only defined for input_dim=1
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
@ -240,8 +240,8 @@ def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,
"""
Construct a periodic Matern 5/2 kernel.
:param D: dimensionality, only defined for D=1
:type D: int
:param input_dim: dimensionality, only defined for input_dim=1
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
@ -256,14 +256,14 @@ def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,
def prod(k1,k2,tensor=False):
"""
Construct a product kernel over D from two kernels over D
Construct a product kernel over input_dim from two kernels over input_dim
:param k1, k2: the kernels to multiply
:type k1, k2: kernpart
:rtype: kernel object
"""
part = prodpart(k1,k2,tensor)
return kern(part.D, [part])
return kern(part.input_dim, [part])
def symmetric(k):
"""
@ -273,7 +273,7 @@ def symmetric(k):
k_.parts = [symmetric_part(p) for p in k.parts]
return k_
def coregionalise(Nout,R=1, W=None, kappa=None):
def Coregionalise(Nout,R=1, W=None, kappa=None):
p = coregionalise_part(Nout,R,W,kappa)
return kern(1,[p])
@ -282,8 +282,8 @@ def rational_quadratic(D,variance=1., lengthscale=1., power=1.):
"""
Construct rational quadratic kernel.
:param D: the number of input dimensions
:type D: int (D=1 is the only value currently supported)
:param input_dim: the number of input dimensions
:type input_dim: int (input_dim=1 is the only value currently supported)
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param lengthscale: the lengthscale :math:`\ell`
@ -300,7 +300,7 @@ def fixed(D, K, variance=1.):
Arguments
---------
D (int), obligatory
input_dim (int), obligatory
K (np.array), obligatory
variance (float)
"""
@ -321,6 +321,6 @@ def independent_outputs(k):
for sl in k.input_slices:
assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
parts = [independent_output_part(p) for p in k.parts]
return kern(k.D+1,parts)
return kern(k.input_dim+1,parts)

View file

@ -7,12 +7,12 @@ from GPy.util.linalg import mdot, pdinv
import pdb
from scipy import weave
class coregionalise(kernpart):
class Coregionalise(kernpart):
"""
Kernel for Intrinsic Corregionalization Models
"""
def __init__(self,Nout,R=1, W=None, kappa=None):
self.D = 1
self.input_dim = 1
self.name = 'coregion'
self.Nout = Nout
self.R = R

View file

@ -11,14 +11,14 @@ from prod import prod
from ..util.linalg import symmetrify
class kern(parameterised):
def __init__(self, D, parts=[], input_slices=None):
def __init__(self, input_dim, parts=[], input_slices=None):
"""
This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where.
The technical code for kernels is divided into _parts_ (see e.g. rbf.py). This obnject contains a list of parts, which are computed additively. For multiplication, special _prod_ parts are used.
:param D: The dimensioality of the kernel's input space
:type D: int
:param input_dim: The dimensioality of the kernel's input space
:type input_dim: int
:param parts: the 'parts' (PD functions) of the kernel
:type parts: list of kernpart objects
:param input_slices: the slices on the inputs which apply to each kernel
@ -29,7 +29,7 @@ class kern(parameterised):
self.Nparts = len(parts)
self.Nparam = sum([p.Nparam for p in self.parts])
self.D = D
self.input_dim = input_dim
# deal with input_slices
if input_slices is None:
@ -96,10 +96,10 @@ class kern(parameterised):
:type other: GPy.kern
"""
if tensor:
D = self.D + other.D
self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices]
D = self.input_dim + other.input_dim
self_input_slices = [slice(*sl.indices(self.input_dim)) for sl in self.input_slices]
other_input_indices = [sl.indices(other.input_dim) for sl in other.input_slices]
other_input_slices = [slice(i[0] + self.input_dim, i[1] + self.input_dim, i[2]) for i in other_input_indices]
newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
@ -111,8 +111,8 @@ class kern(parameterised):
newkern.constraints = self.constraints + other.constraints
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
else:
assert self.D == other.D
newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices)
assert self.input_dim == other.input_dim
newkern = kern(self.input_dim, self.parts + other.parts, self.input_slices + other.input_slices)
# transfer constraints:
newkern.constrained_indices = self.constrained_indices + [i + self.Nparam for i in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints
@ -138,16 +138,16 @@ class kern(parameterised):
slices = []
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
s1, s2 = [False] * K1.D, [False] * K2.D
s1, s2 = [False] * K1.input_dim, [False] * K2.input_dim
s1[sl1], s2[sl2] = [True], [True]
slices += [s1 + s2]
newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)]
if tensor:
newkern = kern(K1.D + K2.D, newkernparts, slices)
newkern = kern(K1.input_dim + K2.input_dim, newkernparts, slices)
else:
newkern = kern(K1.D, newkernparts, slices)
newkern = kern(K1.input_dim, newkernparts, slices)
newkern._follow_constrains(K1, K2)
return newkern
@ -211,7 +211,7 @@ class kern(parameterised):
def K(self, X, X2=None, which_parts='all'):
if which_parts == 'all':
which_parts = [True] * self.Nparts
assert X.shape[1] == self.D
assert X.shape[1] == self.input_dim
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0]))
[p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
@ -225,11 +225,11 @@ class kern(parameterised):
:param dL_dK: An array of dL_dK derivaties, dL_dK
:type dL_dK: Np.ndarray (N x M)
:param X: Observed data inputs
:type X: np.ndarray (N x D)
:type X: np.ndarray (N x input_dim)
:param X2: Observed dara inputs (optional, defaults to X)
:type X2: np.ndarray (M x D)
:type X2: np.ndarray (M x input_dim)
"""
assert X.shape[1] == self.D
assert X.shape[1] == self.input_dim
target = np.zeros(self.Nparam)
if X2 is None:
[p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)]
@ -251,20 +251,20 @@ class kern(parameterised):
def Kdiag(self, X, which_parts='all'):
if which_parts == 'all':
which_parts = [True] * self.Nparts
assert X.shape[1] == self.D
assert X.shape[1] == self.input_dim
target = np.zeros(X.shape[0])
[p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self.parts, self.input_slices, which_parts) if part_on]
return target
def dKdiag_dtheta(self, dL_dKdiag, X):
assert X.shape[1] == self.D
assert X.shape[1] == self.input_dim
assert dL_dKdiag.size == X.shape[0]
target = np.zeros(self.Nparam)
[p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)]
return self._transform_gradients(target)
def dKdiag_dX(self, dL_dKdiag, X):
assert X.shape[1] == self.D
assert X.shape[1] == self.input_dim
target = np.zeros_like(X)
[p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
return target
@ -386,7 +386,7 @@ class kern(parameterised):
def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs):
if which_parts == 'all':
which_parts = [True] * self.Nparts
if self.D == 1:
if self.input_dim == 1:
if x is None:
x = np.zeros((1, 1))
else:
@ -408,7 +408,7 @@ class kern(parameterised):
pb.xlabel("x")
pb.ylabel("k(x,%0.1f)" % x)
elif self.D == 2:
elif self.input_dim == 2:
if x is None:
x = np.zeros((1, 2))
else:

View file

@ -3,16 +3,16 @@
class kernpart(object):
def __init__(self,D):
def __init__(self,input_dim):
"""
The base class for a kernpart: a positive definite function which forms part of a kernel
:param D: the number of input dimensions to the function
:type D: int
:param input_dim: the number of input dimensions to the function
:type input_dim: int
Do not instantiate.
"""
self.D = D
self.input_dim = input_dim
self.Nparam = 1
self.name = 'unnamed'

View file

@ -13,10 +13,10 @@ class linear(kernpart):
.. math::
k(x,y) = \sum_{i=1}^D \sigma^2_i x_iy_i
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i x_iy_i
:param D: the number of input dimensions
:type D: int
:param input_dim: the number of input dimensions
:type input_dim: int
:param variances: the vector of variances :math:`\sigma^2_i`
:type variances: array or list of the appropriate size (or float if there is only one variance parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension.
@ -24,8 +24,8 @@ class linear(kernpart):
:rtype: kernel object
"""
def __init__(self, D, variances=None, ARD=False):
self.D = D
def __init__(self, input_dim, variances=None, ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if ARD == False:
self.Nparam = 1
@ -37,13 +37,13 @@ class linear(kernpart):
variances = np.ones(1)
self._Xcache, self._X2cache = np.empty(shape=(2,))
else:
self.Nparam = self.D
self.Nparam = self.input_dim
self.name = 'linear'
if variances is not None:
variances = np.asarray(variances)
assert variances.size == self.D, "bad number of lengthscales"
assert variances.size == self.input_dim, "bad number of lengthscales"
else:
variances = np.ones(self.D)
variances = np.ones(self.input_dim)
self._set_params(variances.flatten())
# initialize cache
@ -82,7 +82,7 @@ class linear(kernpart):
def dK_dtheta(self, dL_dK, X, X2, target):
if self.ARD:
if X2 is None:
[np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.D)]
[np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.input_dim)]
else:
product = X[:, None, :] * X2[None, :, :]
target += (dL_dK[:, :, None] * product).sum(0).sum(0)
@ -153,7 +153,7 @@ class linear(kernpart):
# psi2_real[n, m, m_prime] = np.dot(tmp, (
# self._Z[m_prime:m_prime + 1] * self.variances).T)
# mu2_S = (self._mu[:, None, :] * self._mu[:, :, None])
# mu2_S[:, np.arange(self.D), np.arange(self.D)] += self._S
# mu2_S[:, np.arange(self.input_dim), np.arange(self.input_dim)] += self._S
# psi2 = (self.ZA[None, :, None, :] * mu2_S[:, None]).sum(-1)
# psi2 = (psi2[:, :, None] * self.ZA[None, None]).sum(-1)
# psi2_tensor = np.tensordot(self.ZZ[None, :, :, :] * np.square(self.variances), self.mu2_S[:, None, None, :], ((3), (3))).squeeze().T

View file

@ -22,14 +22,14 @@ class prod(kernpart):
self.k1 = k1
self.k2 = k2
if tensor:
self.D = k1.D + k2.D
self.slice1 = slice(0,self.k1.D)
self.slice2 = slice(self.k1.D,self.k1.D+self.k2.D)
self.input_dim = k1.input_dim + k2.input_dim
self.slice1 = slice(0,self.k1.input_dim)
self.slice2 = slice(self.k1.input_dim,self.k1.input_dim+self.k2.input_dim)
else:
assert k1.D == k2.D, "Error: The input spaces of the kernels to sum don't have the same dimension."
self.D = k1.D
self.slice1 = slice(0,self.D)
self.slice2 = slice(0,self.D)
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to sum don't have the same dimension."
self.input_dim = k1.input_dim
self.slice1 = slice(0,self.input_dim)
self.slice2 = slice(0,self.input_dim)
self._X, self._X2, self._params = np.empty(shape=(3,1))
self._set_params(np.hstack((k1._get_params(),k2._get_params())))

View file

@ -18,8 +18,8 @@ class rbf(kernpart):
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
:param D: the number of input dimensions
:type D: int
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the vector of lengthscale of the kernel
@ -31,8 +31,8 @@ class rbf(kernpart):
.. Note: this object implements both the ARD and 'spherical' version of the function
"""
def __init__(self,D,variance=1.,lengthscale=None,ARD=False):
self.D = D
def __init__(self,input_dim,variance=1.,lengthscale=None,ARD=False):
self.input_dim = input_dim
self.name = 'rbf'
self.ARD = ARD
if not ARD:
@ -43,12 +43,12 @@ class rbf(kernpart):
else:
lengthscale = np.ones(1)
else:
self.Nparam = self.D + 1
self.Nparam = self.input_dim + 1
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == self.D, "bad number of lengthscales"
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
lengthscale = np.ones(self.D)
lengthscale = np.ones(self.input_dim)
self._set_params(np.hstack((variance,lengthscale.flatten())))
@ -100,7 +100,7 @@ class rbf(kernpart):
code = """
int q,i,j;
double tmp;
for(q=0; q<D; q++){
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<N; i++){
for(j=0; j<i; j++){
@ -110,12 +110,12 @@ class rbf(kernpart):
target(q+1) += var_len3(q)*tmp;
}
"""
N,M,D = X.shape[0], X.shape[0], self.D
N, M, input_dim = X.shape[0], X.shape[0], self.input_dim
else:
code = """
int q,i,j;
double tmp;
for(q=0; q<D; q++){
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<N; i++){
for(j=0; j<M; j++){
@ -125,9 +125,9 @@ class rbf(kernpart):
target(q+1) += var_len3(q)*tmp;
}
"""
N,M,D = X.shape[0], X2.shape[0], self.D
#[np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.D)]
weave.inline(code, arg_names=['N','M','D','X','X2','target','dvardLdK','var_len3'],
N, M, input_dim = X.shape[0], X2.shape[0], self.input_dim
#[np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)]
weave.inline(code, arg_names=['N','M','input_dim','X','X2','target','dvardLdK','var_len3'],
type_converters=weave.converters.blitz,**self.weave_options)
else:
target[1] += (self.variance/self.lengthscale)*np.sum(self._K_dvar*self._K_dist2*dL_dK)
@ -278,8 +278,8 @@ class rbf(kernpart):
psi2 = np.empty((N,M,M))
psi2_Zdist_sq = self._psi2_Zdist_sq
_psi2_denom = self._psi2_denom.squeeze().reshape(N,self.D)
half_log_psi2_denom = 0.5*np.log(self._psi2_denom).squeeze().reshape(N,self.D)
_psi2_denom = self._psi2_denom.squeeze().reshape(N,self.input_dim)
half_log_psi2_denom = 0.5*np.log(self._psi2_denom).squeeze().reshape(N,self.input_dim)
variance_sq = float(np.square(self.variance))
if self.ARD:
lengthscale2 = self.lengthscale2

View file

@ -8,13 +8,13 @@ class white(kernpart):
"""
White noise kernel.
:param D: the number of input dimensions
:type D: int
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance:
:type variance: float
"""
def __init__(self,D,variance=1.):
self.D = D
def __init__(self,input_dim,variance=1.):
self.input_dim = input_dim
self.Nparam = 1
self.name = 'white'
self._set_params(np.array([variance]).flatten())

View file

@ -1,336 +0,0 @@
import numpy as np
from scipy import stats, linalg
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot
from likelihood import likelihood
class EP(likelihood):
def __init__(self,data,likelihood_function,epsilon=1e-3,power_ep=[1.,1.]):
"""
Expectation Propagation
Arguments
---------
epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
likelihood_function : a likelihood function (see likelihood_functions.py)
"""
self.likelihood_function = likelihood_function
self.epsilon = epsilon
self.eta, self.delta = power_ep
self.data = data
self.N, self.D = self.data.shape
self.is_heteroscedastic = True
self.Nparams = 0
self._transf_data = self.likelihood_function._preprocess_values(data)
#Initial values - Likelihood approximation parameters:
#p(y|f) = t(f|tau_tilde,v_tilde)
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
#initial values for the GP variables
self.Y = np.zeros((self.N,1))
self.covariance_matrix = np.eye(self.N)
self.precision = np.ones(self.N)[:,None]
self.Z = 0
self.YYT = None
self.V = self.precision * self.Y
def restart(self):
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
self.Y = np.zeros((self.N,1))
self.covariance_matrix = np.eye(self.N)
self.precision = np.ones(self.N)[:,None]
self.Z = 0
self.YYT = None
self.V = self.precision * self.Y
def predictive_values(self,mu,var,full_cov):
if full_cov:
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
return self.likelihood_function.predictive_values(mu,var)
def _get_params(self):
return np.zeros(0)
def _get_param_names(self):
return []
def _set_params(self,p):
pass # TODO: the EP likelihood might want to take some parameters...
def _gradients(self,partial):
return np.zeros(0) # TODO: the EP likelihood might want to take some parameters...
def _compute_GP_variables(self):
#Variables to be called from GP
mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model
sigma_sum = 1./self.tau_ + 1./self.tau_tilde
mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
self.Y = mu_tilde[:,None]
self.YYT = np.dot(self.Y,self.Y.T)
self.covariance_matrix = np.diag(1./self.tau_tilde)
self.precision = self.tau_tilde[:,None]
self.V = self.precision * self.Y
def fit_full(self,K):
"""
The expectation-propagation algorithm.
For nomenclature see Rasmussen & Williams 2006.
"""
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(self.N)
Sigma = K.copy()
"""
Initial values - Cavity distribution parameters:
q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
#Approximation
epsilon_np1 = self.epsilon + 1.
epsilon_np2 = self.epsilon + 1.
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i])))
mu = np.dot(Sigma,self.v_tilde)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
L = jitchol(B)
V,info = linalg.lapack.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
Sigma = K - np.dot(V.T,V)
mu = np.dot(Sigma,self.v_tilde)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())
return self._compute_GP_variables()
def fit_DTC(self, Kmm, Kmn):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see ... 2013.
"""
M = Kmm.shape[0]
#TODO: this doesn't work with uncertain inputs!
"""
Prior approximation parameters:
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = Qnn = Knm*Kmmi*Kmn
"""
KmnKnm = np.dot(Kmn,Kmn.T)
Lm = jitchol(Kmm)
Lmi = chol_inv(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
KmmiKmn = np.dot(Kmmi,Kmn)
Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
LLT0 = Kmm.copy()
#Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
#KmnKnm = np.dot(Kmn, Kmn.T)
#KmmiKmn = np.dot(Kmmi,Kmn)
#Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
#LLT0 = Kmm.copy()
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*Gamma
"""
mu = np.zeros(self.N)
LLT = Kmm.copy()
Sigma_diag = Qnn_diag.copy()
"""
Initial values - Cavity distribution parameters:
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
#Approximation
epsilon_np1 = 1
epsilon_np2 = 1
self.iterations = 0
np1 = [self.tau_tilde.copy()]
np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
L = jitchol(LLT)
#cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau))
V,info = linalg.lapack.flapack.dtrtrs(L,Kmn,lower=1)
Sigma_diag = np.sum(V*V,-2)
si = np.sum(V.T*V[:,i],-1)
mu += (Delta_v-Delta_tau*mu[i])*si
self.iterations += 1
#Sigma recomputation with Cholesky decompositon
LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
L = jitchol(LLT)
V,info = linalg.lapack.flapack.dtrtrs(L,Kmn,lower=1)
V2,info = linalg.lapack.flapack.dtrtrs(L.T,V,lower=0)
Sigma_diag = np.sum(V*V,-2)
Knmv_tilde = np.dot(Kmn,self.v_tilde)
mu = np.dot(V2.T,Knmv_tilde)
epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.N
np1.append(self.tau_tilde.copy())
np2.append(self.v_tilde.copy())
self._compute_GP_variables()
def fit_FITC(self, Kmm, Kmn, Knn_diag):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see Naish-Guzman and Holden, 2008.
"""
M = Kmm.shape[0]
"""
Prior approximation parameters:
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
"""
Lm = jitchol(Kmm)
Lmi = chol_inv(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
P0 = Kmn.T
KmnKnm = np.dot(P0.T, P0)
KmmiKmn = np.dot(Kmmi,P0.T)
Qnn_diag = np.sum(P0.T*KmmiKmn,-2)
Diag0 = Knn_diag - Qnn_diag
R0 = jitchol(Kmmi).T
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*Gamma
"""
self.w = np.zeros(self.N)
self.Gamma = np.zeros(M)
mu = np.zeros(self.N)
P = P0.copy()
R = R0.copy()
Diag = Diag0.copy()
Sigma_diag = Knn_diag
RPT0 = np.dot(R0,P0.T)
"""
Initial values - Cavity distribution parameters:
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
#Approximation
epsilon_np1 = 1
epsilon_np2 = 1
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
dtd1 = Delta_tau*Diag[i] + 1.
dii = Diag[i]
Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
pi_ = P[i,:].reshape(1,M)
P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
Rp_i = np.dot(R,pi_.T)
RTR = np.dot(R.T,np.dot(np.eye(M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
R = jitchol(RTR).T
self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1
self.Gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
mu = self.w + np.dot(P,self.Gamma)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde)
Diag = Diag0 * Iplus_Dprod_i
P = Iplus_Dprod_i[:,None] * P0
safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0)
L = jitchol(np.eye(M) + np.dot(RPT0,safe_diag[:,None]*RPT0.T))
R,info = linalg.lapack.flapack.dtrtrs(L,R0,lower=1)
RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
self.w = Diag * self.v_tilde
self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
mu = self.w + np.dot(P,self.Gamma)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())
return self._compute_GP_variables()

View file

@ -1,93 +0,0 @@
import numpy as np
from likelihood import likelihood
class Gaussian(likelihood):
"""
Likelihood class for doing Expectation propagation
:param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the likelihood_function used
:param variance :
:param normalize: whether to normalize the data before computing (predictions will be in original scales)
:type normalize: False|True
"""
def __init__(self, data, variance=1., normalize=False):
self.is_heteroscedastic = False
self.Nparams = 1
self.Z = 0. # a correction factor which accounts for the approximation made
N, self.D = data.shape
# normalization
if normalize:
self._offset = data.mean(0)[None, :]
self._scale = data.std(0)[None, :]
# Don't scale outputs which have zero variance to zero.
self._scale[np.nonzero(self._scale == 0.)] = 1.0e-3
else:
self._offset = np.zeros((1, self.D))
self._scale = np.ones((1, self.D))
self.set_data(data)
self._variance = np.asarray(variance) + 1.
self._set_params(np.asarray(variance))
def set_data(self, data):
self.data = data
self.N, D = data.shape
assert D == self.D
self.Y = (self.data - self._offset) / self._scale
if D > self.N:
self.YYT = np.dot(self.Y, self.Y.T)
self.trYYT = np.trace(self.YYT)
else:
self.YYT = None
self.trYYT = np.sum(np.square(self.Y))
def _get_params(self):
return np.asarray(self._variance)
def _get_param_names(self):
return ["noise_variance"]
def _set_params(self, x):
x = np.float64(x)
if self._variance != x:
if x == 0.:
self.precision = None
self.V = None
else:
self.precision = 1. / x
self.V = (self.precision) * self.Y
self.covariance_matrix = np.eye(self.N) * x
self._variance = x
def predictive_values(self, mu, var, full_cov):
"""
Un-normalize the prediction and add the likelihood variance, then return the 5%, 95% interval
"""
mean = mu * self._scale + self._offset
if full_cov:
if self.D > 1:
raise NotImplementedError, "TODO"
# Note. for D>1, we need to re-normalise all the outputs independently.
# This will mess up computations of diag(true_var), below.
# note that the upper, lower quantiles should be the same shape as mean
# Augment the output variance with the likelihood variance and rescale.
true_var = (var + np.eye(var.shape[0]) * self._variance) * self._scale ** 2
_5pc = mean - 2.*np.sqrt(np.diag(true_var))
_95pc = mean + 2.*np.sqrt(np.diag(true_var))
else:
true_var = (var + self._variance) * self._scale ** 2
_5pc = mean - 2.*np.sqrt(true_var)
_95pc = mean + 2.*np.sqrt(true_var)
return mean, true_var, _5pc, _95pc
def fit_full(self):
"""
No approximations needed
"""
pass
def _gradients(self, partial):
return np.sum(partial)

View file

@ -1,4 +1,4 @@
from EP import EP
from Gaussian import Gaussian
from ep import EP
from gaussian import Gaussian
# TODO: from Laplace import Laplace
import likelihood_functions as functions

View file

@ -10,12 +10,12 @@ from ..util.plot import gpplot
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import link_functions
class likelihood_function(object):
class LikelihoodFunction(object):
"""
Likelihood class for doing Expectation propagation
:param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the likelihood_function used
..Note:: Y values allowed depend on the LikelihoodFunction used
"""
def __init__(self,link):
if link == self._analytical:
@ -69,7 +69,7 @@ class likelihood_function(object):
sigma2_hat = m2 - mu_hat**2 # Second central moment
return float(Z_hat), float(mu_hat), float(sigma2_hat)
class binomial(likelihood_function):
class Binomial(LikelihoodFunction):
"""
Probit likelihood
Y is expected to take values in {-1,1}
@ -82,7 +82,7 @@ class binomial(likelihood_function):
self._analytical = link_functions.probit
if not link:
link = self._analytical
super(binomial, self).__init__(link)
super(Binomial, self).__init__(link)
def _distribution(self,gp,obs):
pass
@ -134,7 +134,7 @@ class binomial(likelihood_function):
p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var))
return mean[:,None], np.nan*var, p_025[:,None], p_975[:,None] # TODO: var
class Poisson(likelihood_function):
class Poisson(LikelihoodFunction):
"""
Poisson likelihood
Y is expected to take values in {0,1,2,...}

View file

@ -1,588 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import sys, pdb
from GPLVM import GPLVM
from ..core import sparse_GP
from GPy.util.linalg import pdinv
from ..likelihoods import Gaussian
from .. import kern
from numpy.linalg.linalg import LinAlgError
import itertools
from matplotlib.colors import colorConverter
from matplotlib.figure import SubplotParams
from GPy.inference.optimization import SCG
from GPy.util import plot_latent
class Bayesian_GPLVM(sparse_GP, GPLVM):
"""
Bayesian Gaussian Process Latent Variable Model
:param Y: observed data (np.ndarray) or GPy.likelihood
:type Y: np.ndarray| GPy.likelihood instance
:param input_dim: latent dimensionality
:type input_dim: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
"""
def __init__(self, likelihood_or_Y, input_dim, X=None, X_variance=None, init='PCA', M=10,
Z=None, kernel=None, oldpsave=10, _debug=False,
**kwargs):
if type(likelihood_or_Y) is np.ndarray:
likelihood = Gaussian(likelihood_or_Y)
else:
likelihood = likelihood_or_Y
if X == None:
X = self.initialise_latent(init, input_dim, likelihood.Y)
self.init = init
if X_variance is None:
X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1)
if Z is None:
Z = np.random.permutation(X.copy())[:M]
assert Z.shape[1] == X.shape[1]
if kernel is None:
kernel = kern.rbf(input_dim) + kern.white(input_dim)
self.oldpsave = oldpsave
self._oldps = []
self._debug = _debug
if self._debug:
self.f_call = 0
self._count = itertools.count()
self._savedklll = []
self._savedparams = []
self._savedgradients = []
self._savederrors = []
self._savedpsiKmm = []
self._savedABCD = []
sparse_GP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs)
self._set_params(self._get_params())
@property
def oldps(self):
return self._oldps
@oldps.setter
def oldps(self, p):
if len(self._oldps) == (self.oldpsave + 1):
self._oldps.pop()
# if len(self._oldps) == 0 or not np.any([np.any(np.abs(p - op) > 1e-5) for op in self._oldps]):
self._oldps.insert(0, p.copy())
def _get_param_names(self):
X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], [])
S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], [])
return (X_names + S_names + sparse_GP._get_param_names(self))
def _get_params(self):
"""
Horizontally stacks the parameters in order to present them to the optimizer.
The resulting 1-D array has this structure:
===============================================================
| mu | S | Z | theta | beta |
===============================================================
"""
x = np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self)))
return x
def _clipped(self, x):
return x # np.clip(x, -1e300, 1e300)
def _set_params(self, x, save_old=True, save_count=0):
# try:
x = self._clipped(x)
N, input_dim = self.N, self.input_dim
self.X = x[:self.X.size].reshape(N, input_dim).copy()
self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy()
sparse_GP._set_params(self, x[(2 * N * input_dim):])
# self.oldps = x
# except (LinAlgError, FloatingPointError, ZeroDivisionError):
# print "\rWARNING: Caught LinAlgError, continueing without setting "
# if self._debug:
# self._savederrors.append(self.f_call)
# if save_count > 10:
# raise
# self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1)
def dKL_dmuS(self):
dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
dKL_dmu = self.X
return dKL_dmu, dKL_dS
def dL_dmuS(self):
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.dL_dpsi0, self.Z, self.X, self.X_variance)
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2, self.Z, self.X, self.X_variance)
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
return dL_dmu, dL_dS
def KL_divergence(self):
var_mean = np.square(self.X).sum()
var_S = np.sum(self.X_variance - np.log(self.X_variance))
return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.N
def log_likelihood(self):
ll = sparse_GP.log_likelihood(self)
kl = self.KL_divergence()
# if ll < -2E4:
# ll = -2E4 + np.random.randn()
# if kl > 5E4:
# kl = 5E4 + np.random.randn()
if self._debug:
self.f_call = self._count.next()
if self.f_call % 1 == 0:
self._savedklll.append([self.f_call, ll, kl])
self._savedparams.append([self.f_call, self._get_params()])
self._savedgradients.append([self.f_call, self._log_likelihood_gradients()])
self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]])
# sf2 = self.scale_factor ** 2
if self.likelihood.is_heteroscedastic:
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y)
# B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
else:
A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
# B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
self._savedABCD.append([self.f_call, A, B, C, D])
# print "\nkl:", kl, "ll:", ll
return ll - kl
def _log_likelihood_gradients(self):
dKL_dmu, dKL_dS = self.dKL_dmuS()
dL_dmu, dL_dS = self.dL_dmuS()
# TODO: find way to make faster
d_dmu = (dL_dmu - dKL_dmu).flatten()
d_dS = (dL_dS - dKL_dS).flatten()
# TEST KL: ====================
# d_dmu = (dKL_dmu).flatten()
# d_dS = (dKL_dS).flatten()
# ========================
# TEST L: ====================
# d_dmu = (dL_dmu).flatten()
# d_dS = (dL_dS).flatten()
# ========================
self.dbound_dmuS = np.hstack((d_dmu, d_dS))
self.dbound_dZtheta = sparse_GP._log_likelihood_gradients(self)
return self._clipped(np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta)))
def plot_latent(self, *args, **kwargs):
return plot_latent.plot_latent_indices(self, *args, **kwargs)
def do_test_latents(self, Y):
"""
Compute the latent representation for a set of new points Y
Notes:
This will only work with a univariate Gaussian likelihood (for now)
"""
assert not self.likelihood.is_heteroscedastic
N_test = Y.shape[0]
input_dim = self.Z.shape[1]
means = np.zeros((N_test, input_dim))
covars = np.zeros((N_test, input_dim))
dpsi0 = -0.5 * self.D * self.likelihood.precision
dpsi2 = self.dL_dpsi2[0][None, :, :] # TODO: this may change if we ignore het. likelihoods
V = self.likelihood.precision * Y
dpsi1 = np.dot(self.Cpsi1V, V.T)
start = np.zeros(self.input_dim * 2)
for n, dpsi1_n in enumerate(dpsi1.T[:, :, None]):
args = (self.kern, self.Z, dpsi0, dpsi1_n, dpsi2)
xopt, fopt, neval, status = SCG(f=latent_cost, gradf=latent_grad, x=start, optargs=args, display=False)
mu, log_S = xopt.reshape(2, 1, -1)
means[n] = mu[0].copy()
covars[n] = np.exp(log_S[0]).copy()
return means, covars
def plot_X_1d(self, fignum=None, ax=None, colors=None):
"""
Plot latent space X in 1D:
-if fig is given, create input_dim subplots in fig and plot in these
-if ax is given plot input_dim 1D latent space plots of X into each `axis`
-if neither fig nor ax is given create a figure with fignum and plot in there
colors:
colors of different latent space dimensions input_dim
"""
import pylab
if ax is None:
fig = pylab.figure(num=fignum, figsize=(8, min(12, (2 * self.X.shape[1]))))
if colors is None:
colors = pylab.gca()._get_lines.color_cycle
pylab.clf()
else:
colors = iter(colors)
plots = []
x = np.arange(self.X.shape[0])
for i in range(self.X.shape[1]):
if ax is None:
a = fig.add_subplot(self.X.shape[1], 1, i + 1)
elif isinstance(ax, (tuple, list)):
a = ax[i]
else:
raise ValueError("Need one ax per latent dimnesion input_dim")
a.plot(self.X, c='k', alpha=.3)
plots.extend(a.plot(x, self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
a.fill_between(x,
self.X.T[i] - 2 * np.sqrt(self.X_variance.T[i]),
self.X.T[i] + 2 * np.sqrt(self.X_variance.T[i]),
facecolor=plots[-1].get_color(),
alpha=.3)
a.legend(borderaxespad=0.)
a.set_xlim(x.min(), x.max())
if i < self.X.shape[1] - 1:
a.set_xticklabels('')
pylab.draw()
fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
return fig
def __getstate__(self):
return (self.likelihood, self.input_dim, self.X, self.X_variance,
self.init, self.M, self.Z, self.kern,
self.oldpsave, self._debug)
def __setstate__(self, state):
self.__init__(*state)
def _debug_filter_params(self, x):
start, end = 0, self.X.size,
X = x[start:end].reshape(self.N, self.input_dim)
start, end = end, end + self.X_variance.size
X_v = x[start:end].reshape(self.N, self.input_dim)
start, end = end, end + (self.M * self.input_dim)
Z = x[start:end].reshape(self.M, self.input_dim)
start, end = end, end + self.input_dim
theta = x[start:]
return X, X_v, Z, theta
def _debug_get_axis(self, figs):
if figs[-1].axes:
ax1 = figs[-1].axes[0]
ax1.cla()
else:
ax1 = figs[-1].add_subplot(111)
return ax1
def _debug_plot(self):
assert self._debug, "must enable _debug, to debug-plot"
import pylab
# from mpl_toolkits.mplot3d import Axes3D
figs = [pylab.figure('BGPLVM DEBUG', figsize=(12, 4))]
# fig.clf()
# log like
# splotshape = (6, 4)
# ax1 = pylab.subplot2grid(splotshape, (0, 0), 1, 4)
ax1 = self._debug_get_axis(figs)
ax1.text(.5, .5, "Optimization", alpha=.3, transform=ax1.transAxes,
ha='center', va='center')
kllls = np.array(self._savedklll)
LL, = ax1.plot(kllls[:, 0], kllls[:, 1] - kllls[:, 2], '-', label=r'$\log p(\mathbf{Y})$', mew=1.5)
KL, = ax1.plot(kllls[:, 0], kllls[:, 2], '-', label=r'$\mathcal{KL}(p||q)$', mew=1.5)
L, = ax1.plot(kllls[:, 0], kllls[:, 1], '-', label=r'$L$', mew=1.5) # \mathds{E}_{q(\mathbf{X})}[p(\mathbf{Y|X})\frac{p(\mathbf{X})}{q(\mathbf{X})}]
param_dict = dict(self._savedparams)
gradient_dict = dict(self._savedgradients)
# kmm_dict = dict(self._savedpsiKmm)
iters = np.array(param_dict.keys())
ABCD_dict = np.array(self._savedABCD)
self.showing = 0
# ax2 = pylab.subplot2grid(splotshape, (1, 0), 2, 4)
figs.append(pylab.figure("BGPLVM DEBUG X", figsize=(12, 4)))
ax2 = self._debug_get_axis(figs)
ax2.text(.5, .5, r"$\mathbf{X}$", alpha=.5, transform=ax2.transAxes,
ha='center', va='center')
figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(0, 0, 1, .86))
# ax3 = pylab.subplot2grid(splotshape, (3, 0), 2, 4, sharex=ax2)
figs.append(pylab.figure("BGPLVM DEBUG S", figsize=(12, 4)))
ax3 = self._debug_get_axis(figs)
ax3.text(.5, .5, r"$\mathbf{S}$", alpha=.5, transform=ax3.transAxes,
ha='center', va='center')
figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(0, 0, 1, .86))
# ax4 = pylab.subplot2grid(splotshape, (5, 0), 2, 2)
figs.append(pylab.figure("BGPLVM DEBUG Z", figsize=(6, 4)))
ax4 = self._debug_get_axis(figs)
ax4.text(.5, .5, r"$\mathbf{Z}$", alpha=.5, transform=ax4.transAxes,
ha='center', va='center')
figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(0, 0, 1, .86))
# ax5 = pylab.subplot2grid(splotshape, (5, 2), 2, 2)
figs.append(pylab.figure("BGPLVM DEBUG theta", figsize=(6, 4)))
ax5 = self._debug_get_axis(figs)
ax5.text(.5, .5, r"${\theta}$", alpha=.5, transform=ax5.transAxes,
ha='center', va='center')
figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(.15, 0, 1, .86))
# figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6)))
# fig = figs[-1]
# ax6 = fig.add_subplot(121)
# ax6.text(.5, .5, r"${\mathbf{K}_{mm}}$", color='magenta', alpha=.5, transform=ax6.transAxes,
# ha='center', va='center')
# ax7 = fig.add_subplot(122)
# ax7.text(.5, .5, r"${\frac{dL}{dK_{mm}}}$", color='magenta', alpha=.5, transform=ax7.transAxes,
# ha='center', va='center')
figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6)))
fig = figs[-1]
ax8 = fig.add_subplot(121)
ax8.text(.5, .5, r"${\mathbf{A,B,C,D}}$", color='k', alpha=.5, transform=ax8.transAxes,
ha='center', va='center')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 1], label='A')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 2], label='B')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 3], label='C')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 4], label='D')
ax8.legend()
figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(.15, 0, 1, .86))
X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
# Xg, Sg, Zg, thetag = -Xg, -Sg, -Zg, -thetag
quiver_units = 'xy'
quiver_scale = 1
quiver_scale_units = 'xy'
Xlatentplts = ax2.plot(X, ls="-", marker="x")
colors = colorConverter.to_rgba_array([p.get_color() for p in Xlatentplts], .4)
Ulatent = np.zeros_like(X)
xlatent = np.tile(np.arange(0, X.shape[0])[:, None], X.shape[1])
Xlatentgrads = ax2.quiver(xlatent, X, Ulatent, Xg, color=colors,
units=quiver_units, scale_units=quiver_scale_units,
scale=quiver_scale)
Slatentplts = ax3.plot(S, ls="-", marker="x")
Slatentgrads = ax3.quiver(xlatent, S, Ulatent, Sg, color=colors,
units=quiver_units, scale_units=quiver_scale_units,
scale=quiver_scale)
ax3.set_ylim(0, 1.)
xZ = np.tile(np.arange(0, Z.shape[0])[:, None], Z.shape[1])
UZ = np.zeros_like(Z)
Zplts = ax4.plot(Z, ls="-", marker="x")
Zgrads = ax4.quiver(xZ, Z, UZ, Zg, color=colors,
units=quiver_units, scale_units=quiver_scale_units,
scale=quiver_scale)
xtheta = np.arange(len(theta))
Utheta = np.zeros_like(theta)
thetaplts = ax5.bar(xtheta - .4, theta, color=colors)
thetagrads = ax5.quiver(xtheta, theta, Utheta, thetag, color=colors,
units=quiver_units, scale_units=quiver_scale_units,
scale=quiver_scale,
edgecolors=('k',), linewidths=[1])
pylab.setp(thetaplts, zorder=0)
pylab.setp(thetagrads, zorder=10)
ax5.set_xticks(np.arange(len(theta)))
ax5.set_xticklabels(self._get_param_names()[-len(theta):], rotation=17)
# imkmm = ax6.imshow(kmm_dict[self.showing][0])
# from mpl_toolkits.axes_grid1 import make_axes_locatable
# divider = make_axes_locatable(ax6)
# caxkmm = divider.append_axes("right", "5%", pad="1%")
# cbarkmm = pylab.colorbar(imkmm, cax=caxkmm)
#
# imkmmdl = ax7.imshow(kmm_dict[self.showing][1])
# divider = make_axes_locatable(ax7)
# caxkmmdl = divider.append_axes("right", "5%", pad="1%")
# cbarkmmdl = pylab.colorbar(imkmmdl, cax=caxkmmdl)
# input_dimleg = ax1.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
# loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.15, 1, 1.15),
# borderaxespad=0, mode="expand")
ax2.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.1, 1, 1.1),
borderaxespad=0, mode="expand")
ax3.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.1, 1, 1.1),
borderaxespad=0, mode="expand")
ax4.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.1, 1, 1.1),
borderaxespad=0, mode="expand")
ax5.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.1, 1, 1.1),
borderaxespad=0, mode="expand")
Lleg = ax1.legend()
Lleg.draggable()
# ax1.add_artist(input_dimleg)
indicatorKL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 2], 'o', c=KL.get_color())
indicatorLL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1] - kllls[self.showing, 2], 'o', c=LL.get_color())
indicatorL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1], 'o', c=L.get_color())
# for err in self._savederrors:
# if err < kllls.shape[0]:
# ax1.scatter(kllls[err, 0], kllls[err, 2], s=50, marker=(5, 2), c=KL.get_color())
# ax1.scatter(kllls[err, 0], kllls[err, 1] - kllls[err, 2], s=50, marker=(5, 2), c=LL.get_color())
# ax1.scatter(kllls[err, 0], kllls[err, 1], s=50, marker=(5, 2), c=L.get_color())
# try:
# for f in figs:
# f.canvas.draw()
# f.tight_layout(box=(0, .15, 1, .9))
# # pylab.draw()
# # pylab.tight_layout(box=(0, .1, 1, .9))
# except:
# pass
# parameter changes
# ax2 = pylab.subplot2grid((4, 1), (1, 0), 3, 1, projection='3d')
button_options = [0, 0] # [0]: clicked -- [1]: dragged
def update_plots(event):
if button_options[0] and not button_options[1]:
# event.button, event.x, event.y, event.xdata, event.ydata)
tmp = np.abs(iters - event.xdata)
closest_hit = iters[tmp == tmp.min()][0]
if closest_hit != self.showing:
self.showing = closest_hit
# print closest_hit, iters, event.xdata
indicatorLL.set_data(self.showing, kllls[self.showing, 1] - kllls[self.showing, 2])
indicatorKL.set_data(self.showing, kllls[self.showing, 2])
indicatorL.set_data(self.showing, kllls[self.showing, 1])
X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
# Xg, Sg, Zg, thetag = -Xg, -Sg, -Zg, -thetag
for i, Xlatent in enumerate(Xlatentplts):
Xlatent.set_ydata(X[:, i])
Xlatentgrads.set_offsets(np.array([xlatent.ravel(), X.ravel()]).T)
Xlatentgrads.set_UVC(Ulatent, Xg)
for i, Slatent in enumerate(Slatentplts):
Slatent.set_ydata(S[:, i])
Slatentgrads.set_offsets(np.array([xlatent.ravel(), S.ravel()]).T)
Slatentgrads.set_UVC(Ulatent, Sg)
for i, Zlatent in enumerate(Zplts):
Zlatent.set_ydata(Z[:, i])
Zgrads.set_offsets(np.array([xZ.ravel(), Z.ravel()]).T)
Zgrads.set_UVC(UZ, Zg)
for p, t in zip(thetaplts, theta):
p.set_height(t)
thetagrads.set_offsets(np.array([xtheta.ravel(), theta.ravel()]).T)
thetagrads.set_UVC(Utheta, thetag)
# imkmm.set_data(kmm_dict[self.showing][0])
# imkmm.autoscale()
# cbarkmm.update_normal(imkmm)
#
# imkmmdl.set_data(kmm_dict[self.showing][1])
# imkmmdl.autoscale()
# cbarkmmdl.update_normal(imkmmdl)
ax2.relim()
# ax3.relim()
ax4.relim()
ax5.relim()
ax2.autoscale()
# ax3.autoscale()
ax4.autoscale()
ax5.autoscale()
[fig.canvas.draw() for fig in figs]
button_options[0] = 0
button_options[1] = 0
def onclick(event):
if event.inaxes is ax1 and event.button == 1:
button_options[0] = 1
def motion(event):
if button_options[0]:
button_options[1] = 1
cidr = figs[0].canvas.mpl_connect('button_release_event', update_plots)
cidp = figs[0].canvas.mpl_connect('button_press_event', onclick)
cidd = figs[0].canvas.mpl_connect('motion_notify_event', motion)
return ax1, ax2, ax3, ax4, ax5 # , ax6, ax7
def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
"""
objective function for fitting the latent variables for test points
(negative log-likelihood: should be minimised!)
"""
mu, log_S = mu_S.reshape(2, 1, -1)
S = np.exp(log_S)
psi0 = kern.psi0(Z, mu, S)
psi1 = kern.psi1(Z, mu, S)
psi2 = kern.psi2(Z, mu, S)
lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S)
mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S)
mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S)
dmu = mu0 + mu1 + mu2 - mu
# dS = S0 + S1 + S2 -0.5 + .5/S
dlnS = S * (S0 + S1 + S2 - 0.5) + .5
return -lik, -np.hstack((dmu.flatten(), dlnS.flatten()))
def latent_cost(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
"""
objective function for fitting the latent variables (negative log-likelihood: should be minimised!)
This is the same as latent_cost_and_grad but only for the objective
"""
mu, log_S = mu_S.reshape(2, 1, -1)
S = np.exp(log_S)
psi0 = kern.psi0(Z, mu, S)
psi1 = kern.psi1(Z, mu, S)
psi2 = kern.psi2(Z, mu, S)
lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
return -float(lik)
def latent_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
"""
This is the same as latent_cost_and_grad but only for the grad
"""
mu, log_S = mu_S.reshape(2, 1, -1)
S = np.exp(log_S)
mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S)
mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S)
mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S)
dmu = mu0 + mu1 + mu2 - mu
# dS = S0 + S1 + S2 -0.5 + .5/S
dlnS = S * (S0 + S1 + S2 - 0.5) + .5
return -np.hstack((dmu.flatten(), dlnS.flatten()))

View file

@ -1,252 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, tdot, symmetrify,pdinv
from ..util.plot import gpplot
from .. import kern
from scipy import stats, linalg
from ..core import sparse_GP
def backsub_both_sides(L,X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1)
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T
class FITC(sparse_GP):
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
super(FITC, self).__init__(X, likelihood, kernel, normalize_X=normalize_X)
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
this function does nothing
Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in sparse_GP.
The true precison is now 'true_precision' not 'precision'.
"""
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP
def _computations(self):
#factor Kmm
self.Lm = jitchol(self.Kmm)
self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1)
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1).copy()
self.Diag0 = self.psi0 - np.diag(self.Qnn)
self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #Includes Diag0 in the precision
self.V_star = self.beta_star * self.likelihood.Y
# The rather complex computations of self.A
if self.has_uncertain_inputs:
raise NotImplementedError
else:
if self.likelihood.is_heteroscedastic:
assert self.likelihood.D == 1
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.N)))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
# factor B
self.B = np.eye(self.M) + self.A
self.LB = jitchol(self.B)
self.LBi = chol_inv(self.LB)
self.psi1V = np.dot(self.psi1, self.V_star)
Lmi_psi1V, info = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(Lmi_psi1V), lower=1, trans=0)
Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1)
b_psi1_Ki = self.beta_star * Kmmipsi1.T
Ki_pbp_Ki = np.dot(Kmmipsi1,b_psi1_Ki)
Kmmi = np.dot(self.Lmi.T,self.Lmi)
LBiLmi = np.dot(self.LBi,self.Lmi)
LBL_inv = np.dot(LBiLmi.T,LBiLmi)
VVT = np.outer(self.V_star,self.V_star)
VV_p_Ki = np.dot(VVT,Kmmipsi1.T)
Ki_pVVp_Ki = np.dot(Kmmipsi1,VV_p_Ki)
psi1beta = self.psi1*self.beta_star.T
H = self.Kmm + mdot(self.psi1,psi1beta.T)
LH = jitchol(H)
LHi = chol_inv(LH)
Hi = np.dot(LHi.T,LHi)
betapsi1TLmiLBi = np.dot(psi1beta.T,LBiLmi.T)
alpha = np.array([np.dot(a.T,a) for a in betapsi1TLmiLBi])[:,None]
gamma_1 = mdot(VVT,self.psi1.T,Hi)
pHip = mdot(self.psi1.T,Hi,self.psi1)
gamma_2 = mdot(self.beta_star*pHip,self.V_star)
gamma_3 = self.V_star * gamma_2
self._dL_dpsi0 = -0.5 * self.beta_star#dA_dpsi0: logdet(self.beta_star)
self._dL_dpsi0 += .5 * self.V_star**2 #dA_psi0: yT*beta_star*y
self._dL_dpsi0 += .5 *alpha #dC_dpsi0
self._dL_dpsi0 += 0.5*mdot(self.beta_star*pHip,self.V_star)**2 - self.V_star * mdot(self.V_star.T,pHip*self.beta_star).T #dD_dpsi0
self._dL_dpsi1 = b_psi1_Ki.copy() #dA_dpsi1: logdet(self.beta_star)
self._dL_dpsi1 += -np.dot(psi1beta.T,LBL_inv) #dC_dpsi1
self._dL_dpsi1 += gamma_1 - mdot(psi1beta.T,Hi,self.psi1,gamma_1) #dD_dpsi1
self._dL_dKmm = -0.5 * np.dot(Kmmipsi1,b_psi1_Ki) #dA_dKmm: logdet(self.beta_star)
self._dL_dKmm += .5*(LBL_inv - Kmmi) + mdot(LBL_inv,psi1beta,Kmmipsi1.T) #dC_dKmm
self._dL_dKmm += -.5 * mdot(Hi,self.psi1,gamma_1) #dD_dKmm
self._dpsi1_dtheta = 0
self._dpsi1_dX = 0
self._dKmm_dtheta = 0
self._dKmm_dX = 0
self._dpsi1_dX_jkj = 0
self._dpsi1_dtheta_jkj = 0
for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.N),self.V_star,alpha,gamma_2,gamma_3):
K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T)
#Diag_dpsi1 = Diag_dA_dpsi1: yT*beta_star*y + Diag_dC_dpsi1 +Diag_dD_dpsi1
_dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:]
#Diag_dKmm = Diag_dA_dKmm: yT*beta_star*y +Diag_dC_dKmm +Diag_dD_dKmm
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z)
self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z)
self._dKmm_dX += 2.*self.kern.dK_dX(_dKmm ,self.Z)
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:])
# the partial derivative vector for the likelihood
if self.likelihood.Nparams == 0:
# save computation here.
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented"
else:
# likelihood is not heterscedatic
dbstar_dnoise = self.likelihood.precision * (self.beta_star**2 * self.Diag0[:,None] - self.beta_star)
Lmi_psi1 = mdot(self.Lmi,self.psi1)
LBiLmipsi1 = np.dot(self.LBi,Lmi_psi1)
aux_0 = np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_2 = np.dot(LBiLmipsi1.T,self._LBi_Lmi_psi1V)
dA_dnoise = 0.5 * self.D * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.D * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T)
alpha = mdot(LBiLmipsi1,self.V_star)
alpha_ = mdot(LBiLmipsi1.T,alpha)
dD_dnoise_2 = -0.5 * self.D * np.sum(alpha_**2 * dbstar_dnoise )
dD_dnoise_1 = mdot(self.V_star.T,self.psi1.T,self.Lmi.T,self.LBi.T,self.LBi,self.Lmi,self.psi1,dbstar_dnoise*self.likelihood.Y)
dD_dnoise_2 = 0.5*mdot(self.V_star.T,self.psi1.T,Hi,self.psi1,dbstar_dnoise*self.psi1.T,Hi,self.psi1,self.V_star)
dD_dnoise = dD_dnoise_1 + dD_dnoise_2
self.partial_for_likelihood = dA_dnoise + dC_dnoise + dD_dnoise
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
C = -self.D * (np.sum(np.log(np.diag(self.LB))))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + C + D
def _log_likelihood_gradients(self):
pass
return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
def dL_dtheta(self):
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0,self.X)
dL_dtheta += self.kern.dK_dtheta(self._dL_dpsi1,self.X,self.Z)
dL_dtheta += self.kern.dK_dtheta(self._dL_dKmm,X=self.Z)
dL_dtheta += self._dKmm_dtheta
dL_dtheta += self._dpsi1_dtheta
return dL_dtheta
def dL_dZ(self):
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X)
dL_dZ += 2. * self.kern.dK_dX(self._dL_dKmm,X=self.Z)
dL_dZ += self._dpsi1_dX
dL_dZ += self._dKmm_dX
return dL_dZ
def _raw_predict(self, Xnew, which_parts, full_cov=False):
if self.likelihood.is_heteroscedastic:
Iplus_Dprod_i = 1./(1.+ self.Diag0 * self.likelihood.precision.flatten())
self.Diag = self.Diag0 * Iplus_Dprod_i
self.P = Iplus_Dprod_i[:,None] * self.psi1.T
self.RPT0 = np.dot(self.Lmi,self.psi1)
self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T))
self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
self.w = self.Diag * self.likelihood.v_tilde
self.Gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde))
self.mu = self.w + np.dot(self.P,self.Gamma)
"""
Make a prediction for the generalized FITC model
Arguments
---------
X : Input prediction data - Nx1 numpy array (floats)
"""
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
# Ci = I + (RPT0)Di(RPT0).T
# C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T
# = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T
# = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
# = I - V.T * V
U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1)
C = np.eye(self.M) - np.dot(V.T,V)
mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:])
#self.C = C
#self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
#self.mu_u = mu_u
#self.U = U
# q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T)
mu_H = np.dot(mu_u,self.mu)
self.mu_H = mu_H
Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
KR0T = np.dot(Kx.T,self.Lmi.T)
mu_star = np.dot(KR0T,mu_H)
if full_cov:
Kxx = self.kern.K(Xnew,which_parts=which_parts)
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
else:
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None]
return mu_star[:,None],var
else:
raise NotImplementedError, "homoscedastic fitc not implemented"
"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu,var[:,None]
"""

View file

@ -1,67 +0,0 @@
### Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import sys, pdb
from .. import kern
from ..core import model
from ..util.linalg import pdinv, PCA
from ..core import GP
from ..likelihoods import Gaussian
from .. import util
from GPy.util import plot_latent
class GPLVM(GP):
"""
Gaussian Process Latent Variable Model
:param Y: observed data
:type Y: np.ndarray
:param input_dim: latent dimensionality
:type input_dim: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
"""
def __init__(self, Y, input_dim, init='PCA', X = None, kernel=None, normalize_Y=False):
if X is None:
X = self.initialise_latent(init, input_dim, Y)
if kernel is None:
kernel = kern.rbf(input_dim, ARD=input_dim>1) + kern.bias(input_dim, np.exp(-2)) + kern.white(input_dim, np.exp(-2))
likelihood = Gaussian(Y, normalize=normalize_Y)
GP.__init__(self, X, likelihood, kernel, normalize_X=False)
self._set_params(self._get_params())
def initialise_latent(self, init, input_dim, Y):
if init == 'PCA':
return PCA(Y, input_dim)[0]
else:
return np.random.randn(Y.shape[0], input_dim)
def _get_param_names(self):
return sum([['X_%i_%i'%(n,q) for q in range(self.input_dim)] for n in range(self.N)],[]) + GP._get_param_names(self)
def _get_params(self):
return np.hstack((self.X.flatten(), GP._get_params(self)))
def _set_params(self,x):
self.X = x[:self.N*self.input_dim].reshape(self.N,self.input_dim).copy()
GP._set_params(self, x[self.X.size:])
def _log_likelihood_gradients(self):
dL_dX = 2.*self.kern.dK_dX(self.dL_dK,self.X)
return np.hstack((dL_dX.flatten(),GP._log_likelihood_gradients(self)))
def plot(self):
assert self.likelihood.Y.shape[1]==2
pb.scatter(self.likelihood.Y[:,0],self.likelihood.Y[:,1],40,self.X[:,0].copy(),linewidth=0,cmap=pb.cm.jet)
Xnew = np.linspace(self.X.min(),self.X.max(),200)[:,None]
mu, var, upper, lower = self.predict(Xnew)
pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5)
def plot_latent(self, *args, **kwargs):
return util.plot_latent.plot_latent(self, *args, **kwargs)

View file

@ -1,41 +0,0 @@
# Copyright (c) 2013, Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import GP
from .. import likelihoods
from .. import kern
class GP_classification(GP):
"""
Gaussian Process classification
This is a thin wrapper around the models.GP class, with a set of sensible defalts
:param X: input observations
:param Y: observed values
:param likelihood: a GPy likelihood, defaults to binomial with probit link_function
:param kernel: a GPy kernel, defaults to rbf
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self,X,Y=None,likelihood=None,kernel=None,normalize_X=False,normalize_Y=False):
if kernel is None:
kernel = kern.rbf(X.shape[1])
if likelihood is None:
distribution = likelihoods.likelihood_functions.binomial()
likelihood = likelihoods.EP(Y, distribution)
elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()):
raise Warning, 'likelihood.data and Y are different.'
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self._set_params(self._get_params())

View file

@ -1,35 +0,0 @@
# Copyright (c) 2012, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import GP
from .. import likelihoods
from .. import kern
class GP_regression(GP):
"""
Gaussian Process model for regression
This is a thin wrapper around the models.GP class, with a set of sensible defalts
:param X: input observations
:param Y: observed values
:param kernel: a GPy kernel, defaults to rbf
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False):
if kernel is None:
kernel = kern.rbf(X.shape[1])
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self._set_params(self._get_params())

View file

@ -2,14 +2,12 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from GP_regression import GP_regression
from GP_classification import GP_classification
from sparse_GP_regression import sparse_GP_regression
from sparse_GP_classification import sparse_GP_classification
from GPLVM import GPLVM
from warped_GP import warpedGP
from sparse_GPLVM import sparse_GPLVM
from Bayesian_GPLVM import Bayesian_GPLVM
from gp_regression import GPRegression
from sparse_gp_regression import SparseGPRegression
from gplvm import GPLVM
from warped_gp import WarpedGP
from bayesian_gplvm import BayesianGPLVM
from mrd import MRD
from generalized_FITC import generalized_FITC
from FITC import FITC
from generalized_fitc import GeneralizedFITC
from fitc import FITC

View file

@ -1,221 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
from ..util.plot import gpplot
from .. import kern
from scipy import stats, linalg
from ..core import sparse_GP
def backsub_both_sides(L,X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1)
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T
class generalized_FITC(sparse_GP):
"""
Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC.
:param X: inputs
:type X: np.ndarray (N x input_dim)
:param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP)
:param kernel : the kernel/covariance function. See link kernels
:type kernel: a GPy kernel
:param X_variance: The variance in the measurements of X (Gaussian variance)
:type X_variance: np.ndarray (N x input_dim) | None
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x input_dim) | None
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
"""
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
self.Z = Z
self.M = self.Z.shape[0]
self.true_precision = likelihood.precision
super(generalized_FITC, self).__init__(X, likelihood, kernel=kernel, Z=self.Z, X_variance=X_variance, normalize_X=normalize_X)
self._set_params(self._get_params())
def _set_params(self, p):
self.Z = p[:self.M*self.input_dim].reshape(self.M, self.input_dim)
self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
self._compute_kernel_matrices()
self._computations()
self._FITC_computations()
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
this function does nothing
Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in sparse_GP.
The true precison is now 'true_precision' not 'precision'.
"""
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self.true_precision = self.likelihood.precision # Save the true precision
self.likelihood.precision = self.true_precision/(1. + self.true_precision*self.Diag0[:,None]) # Add the diagonal element of the FITC approximation
self._set_params(self._get_params()) # update the GP
def _FITC_computations(self):
"""
FITC approximation doesn't have the correction term in the log-likelihood bound,
but adds a diagonal term to the covariance matrix: diag(Knn - Qnn).
This function:
- computes the FITC diagonal term
- removes the extra terms computed in the sparse_GP approximation
- computes the likelihood gradients wrt the true precision.
"""
#NOTE the true precison is now 'true_precision' not 'precision'
if self.likelihood.is_heteroscedastic:
# Compute generalized FITC's diagonal term of the covariance
self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1)
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1)
#self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm)
#self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1)
#a = kj
self.Diag0 = self.psi0 - np.diag(self.Qnn)
Iplus_Dprod_i = 1./(1.+ self.Diag0 * self.true_precision.flatten())
self.Diag = self.Diag0 * Iplus_Dprod_i
self.P = Iplus_Dprod_i[:,None] * self.psi1.T
self.RPT0 = np.dot(self.Lmi,self.psi1)
self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T))
self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
self.w = self.Diag * self.likelihood.v_tilde
self.Gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde))
self.mu = self.w + np.dot(self.P,self.Gamma)
# Remove extra term from dL_dpsi1
self.dL_dpsi1 -= mdot(self.Lmi.T,Lmipsi1*self.likelihood.precision.flatten().reshape(1,self.N))
#self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm)
#self.dL_dpsi1 -= mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
#########333333
#self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
#########333333
else:
raise NotImplementedError, "homoscedastic fitc not implemented"
# Remove extra term from dL_dpsi1
#self.dL_dpsi1 += -mdot(self.Kmmi,self.psi1*self.likelihood.precision) #dB
sf = self.scale_factor
sf2 = sf**2
# Remove extra term from dL_dKmm
self.dL_dKmm += 0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
self.dL_dpsi0 = None
#the partial derivative vector for the likelihood
if self.likelihood.Nparams == 0:
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedastic derivates not implemented"
else:
raise NotImplementedError, "homoscedastic derivatives not implemented"
#likelihood is not heterscedatic
#self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2
#self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision
#self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))
#TODO partial derivative vector for the likelihood not implemented
def dL_dtheta(self):
"""
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
"""
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z)
if self.has_uncertain_inputs:
raise NotImplementedError, "heteroscedatic derivates not implemented"
else:
#NOTE in sparse_GP this would include the gradient wrt psi0
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1,self.Z,self.X)
return dL_dtheta
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor**2
if self.likelihood.is_heteroscedastic:
A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
else:
A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT
C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.M*np.log(sf2))
#C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
D = 0.5*np.sum(np.square(self._LBi_Lmi_psi1V))
#self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
#D_ = 0.5*np.trace(self.Cpsi1VVpsi1)
return A+C+D
def _raw_predict(self, Xnew, which_parts, full_cov=False):
if self.likelihood.is_heteroscedastic:
"""
Make a prediction for the generalized FITC model
Arguments
---------
X : Input prediction data - Nx1 numpy array (floats)
"""
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
# Ci = I + (RPT0)Di(RPT0).T
# C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T
# = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T
# = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
# = I - V.T * V
U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1)
C = np.eye(self.M) - np.dot(V.T,V)
mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:])
#self.C = C
#self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
#self.mu_u = mu_u
#self.U = U
# q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T)
mu_H = np.dot(mu_u,self.mu)
self.mu_H = mu_H
Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
KR0T = np.dot(Kx.T,self.Lmi.T)
mu_star = np.dot(KR0T,mu_H)
if full_cov:
Kxx = self.kern.K(Xnew,which_parts=which_parts)
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
else:
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
Kxx_ = self.kern.K(Xnew,which_parts=which_parts) # TODO: RA, is this line needed?
var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) # TODO: RA, is this line needed?
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None]
return mu_star[:,None],var
else:
raise NotImplementedError, "homoscedastic fitc not implemented"
"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu,var[:,None]
"""

View file

@ -4,14 +4,13 @@ Created on 10 Apr 2013
@author: Max Zwiessele
'''
from GPy.core import model
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
from GPy.core import sparse_GP
from GPy.core import SparseGP
from GPy.util.linalg import PCA
from scipy import linalg
import numpy
import itertools
import pylab
from GPy.kern.kern import kern
from GPy.models.bayesian_gplvm import BayesianGPLVM
class MRD(model):
"""
@ -38,7 +37,7 @@ class MRD(model):
*concat: PCA on concatenated outputs
*single: PCA on each output
*random: random
:param M:
:param num_inducing:
number of inducing inputs to use
:param Z:
initial inducing inputs
@ -62,22 +61,22 @@ class MRD(model):
assert not ('kernel' in kw), "pass kernels through `kernels` argument"
self.input_dim = input_dim
self.M = M
self.num_inducing = M
self._debug = _debug
self._init = True
X = self._init_X(initx, likelihood_or_Y_list)
Z = self._init_Z(initz, X)
self.bgplvms = [Bayesian_GPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, M=self.M, **kw) for l, k in zip(likelihood_or_Y_list, kernels)]
self.bgplvms = [BayesianGPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, M=self.num_inducing, **kw) for l, k in zip(likelihood_or_Y_list, kernels)]
del self._init
self.gref = self.bgplvms[0]
nparams = numpy.array([0] + [sparse_GP._get_params(g).size - g.Z.size for g in self.bgplvms])
nparams = numpy.array([0] + [SparseGP._get_params(g).size - g.Z.size for g in self.bgplvms])
self.nparams = nparams.cumsum()
self.N = self.gref.N
self.NQ = self.N * self.input_dim
self.MQ = self.M * self.input_dim
self.MQ = self.num_inducing * self.input_dim
model.__init__(self) # @UndefinedVariable
self._set_params(self._get_params())
@ -151,7 +150,7 @@ class MRD(model):
itertools.izip(ns,
itertools.repeat(name)))
return list(itertools.chain(n1var, *(map_names(\
sparse_GP._get_param_names(g)[self.MQ:], n) \
SparseGP._get_param_names(g)[self.MQ:], n) \
for g, n in zip(self.bgplvms, self.names))))
def _get_params(self):
@ -165,14 +164,14 @@ class MRD(model):
X = self.gref.X.ravel()
X_var = self.gref.X_variance.ravel()
Z = self.gref.Z.ravel()
thetas = [sparse_GP._get_params(g)[g.Z.size:] for g in self.bgplvms]
thetas = [SparseGP._get_params(g)[g.Z.size:] for g in self.bgplvms]
params = numpy.hstack([X, X_var, Z, numpy.hstack(thetas)])
return params
# def _set_var_params(self, g, X, X_var, Z):
# g.X = X.reshape(self.N, self.input_dim)
# g.X_variance = X_var.reshape(self.N, self.input_dim)
# g.Z = Z.reshape(self.M, self.input_dim)
# g.Z = Z.reshape(self.num_inducing, self.input_dim)
#
# def _set_kern_params(self, g, p):
# g.kern._set_params(p[:g.kern.Nparam])
@ -206,7 +205,7 @@ class MRD(model):
def log_likelihood(self):
ll = -self.gref.KL_divergence()
for g in self.bgplvms:
ll += sparse_GP.log_likelihood(g)
ll += SparseGP.log_likelihood(g)
return ll
def _log_likelihood_gradients(self):
@ -215,7 +214,7 @@ class MRD(model):
dLdmu -= dKLmu
dLdS -= dKLdS
dLdmuS = numpy.hstack((dLdmu.flatten(), dLdS.flatten())).flatten()
dldzt1 = reduce(lambda a, b: a + b, (sparse_GP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms))
dldzt1 = reduce(lambda a, b: a + b, (SparseGP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms))
return numpy.hstack((dLdmuS,
dldzt1,
@ -250,9 +249,9 @@ class MRD(model):
if X is None:
X = self.X
if init in "permute":
Z = numpy.random.permutation(X.copy())[:self.M]
Z = numpy.random.permutation(X.copy())[:self.num_inducing]
elif init in "random":
Z = numpy.random.randn(self.M, self.input_dim) * X.var()
Z = numpy.random.randn(self.num_inducing, self.input_dim) * X.var()
self.Z = Z
return Z

View file

@ -1,61 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import sys, pdb
# from .. import kern
# from ..core import model
# from ..util.linalg import pdinv, PCA
from GPLVM import GPLVM
from sparse_GP_regression import sparse_GP_regression
class sparse_GPLVM(sparse_GP_regression, GPLVM):
"""
Sparse Gaussian Process Latent Variable Model
:param Y: observed data
:type Y: np.ndarray
:param input_dim: latent dimensionality
:type input_dim: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
"""
def __init__(self, Y, input_dim, kernel=None, init='PCA', M=10):
X = self.initialise_latent(init, input_dim, Y)
sparse_GP_regression.__init__(self, X, Y, kernel=kernel,M=M)
def _get_param_names(self):
return (sum([['X_%i_%i'%(n,q) for q in range(self.input_dim)] for n in range(self.N)],[])
+ sparse_GP_regression._get_param_names(self))
def _get_params(self):
return np.hstack((self.X.flatten(), sparse_GP_regression._get_params(self)))
def _set_params(self,x):
self.X = x[:self.X.size].reshape(self.N,self.input_dim).copy()
sparse_GP_regression._set_params(self, x[self.X.size:])
def log_likelihood(self):
return sparse_GP_regression.log_likelihood(self)
def dL_dX(self):
dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0,self.X)
dL_dX += self.kern.dK_dX(self.dL_dpsi1.T,self.X,self.Z)
return dL_dX
def _log_likelihood_gradients(self):
return np.hstack((self.dL_dX().flatten(), sparse_GP_regression._log_likelihood_gradients(self)))
def plot(self):
GPLVM.plot(self)
#passing Z without a small amout of jitter will induce the white kernel where we don;t want it!
mu, var, upper, lower = sparse_GP_regression.predict(self, self.Z+np.random.randn(*self.Z.shape)*0.0001)
pb.plot(mu[:, 0] , mu[:, 1], 'ko')
def plot_latent(self, *args, **kwargs):
input_1, input_2 = GPLVM.plot_latent(*args, **kwargs)
pb.plot(m.Z[:, input_1], m.Z[:, input_2], '^w')

View file

@ -1,50 +0,0 @@
# Copyright (c) 2013, Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import sparse_GP
from .. import likelihoods
from .. import kern
from ..likelihoods import likelihood
from GP_regression import GP_regression
class sparse_GP_classification(sparse_GP):
"""
sparse Gaussian Process model for classification
This is a thin wrapper around the sparse_GP class, with a set of sensible defalts
:param X: input observations
:param Y: observed values
:param likelihood: a GPy likelihood, defaults to binomial with probit link_function
:param kernel: a GPy kernel, defaults to rbf+white
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
:rtype: model object
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10):
if kernel is None:
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
if likelihood is None:
distribution = likelihoods.likelihood_functions.binomial()
likelihood = likelihoods.EP(Y, distribution)
elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()):
raise Warning, 'likelihood.data and Y are different.'
if Z is None:
i = np.random.permutation(X.shape[0])[:M]
Z = X[i].copy()
else:
assert Z.shape[1]==X.shape[1]
sparse_GP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X)
self._set_params(self._get_params())

View file

@ -1,47 +0,0 @@
# Copyright (c) 2012, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import sparse_GP
from .. import likelihoods
from .. import kern
from ..likelihoods import likelihood
from GP_regression import GP_regression
class sparse_GP_regression(sparse_GP):
"""
Gaussian Process model for regression
This is a thin wrapper around the sparse_GP class, with a set of sensible defalts
:param X: input observations
:param Y: observed values
:param kernel: a GPy kernel, defaults to rbf+white
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
:rtype: model object
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10, X_variance=None):
#kern defaults to rbf (plus white for stability)
if kernel is None:
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
#Z defaults to a subset of the data
if Z is None:
i = np.random.permutation(X.shape[0])[:M]
Z = X[i].copy()
else:
assert Z.shape[1]==X.shape[1]
#likelihood defaults to Gaussian
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
sparse_GP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X, X_variance=X_variance)
self._set_params(self._get_params())

View file

@ -1,93 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from .. import kern
from ..core import model
from ..util.linalg import pdinv
from ..util.plot import gpplot
from ..util.warping_functions import *
from GP_regression import GP_regression
from ..core import GP
from .. import likelihoods
from .. import kern
class warpedGP(GP):
def __init__(self, X, Y, kernel=None, warping_function = None, warping_terms = 3, normalize_X=False, normalize_Y=False):
if kernel is None:
kernel = kern.rbf(X.shape[1])
if warping_function == None:
self.warping_function = TanhWarpingFunction_d(warping_terms)
self.warping_params = (np.random.randn(self.warping_function.n_terms*3+1,) * 1)
Y = self._scale_data(Y)
self.has_uncertain_inputs = False
self.Y_untransformed = Y.copy()
self.predict_in_warped_space = False
likelihood = likelihoods.Gaussian(self.transform_data(), normalize=normalize_Y)
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self._set_params(self._get_params())
def _scale_data(self, Y):
self._Ymax = Y.max()
self._Ymin = Y.min()
return (Y-self._Ymin)/(self._Ymax-self._Ymin) - 0.5
def _unscale_data(self, Y):
return (Y + 0.5)*(self._Ymax - self._Ymin) + self._Ymin
def _set_params(self, x):
self.warping_params = x[:self.warping_function.num_parameters]
Y = self.transform_data()
self.likelihood.set_data(Y)
GP._set_params(self, x[self.warping_function.num_parameters:].copy())
def _get_params(self):
return np.hstack((self.warping_params.flatten().copy(), GP._get_params(self).copy()))
def _get_param_names(self):
warping_names = self.warping_function._get_param_names()
param_names = GP._get_param_names(self)
return warping_names + param_names
def transform_data(self):
Y = self.warping_function.f(self.Y_untransformed.copy(), self.warping_params).copy()
return Y
def log_likelihood(self):
ll = GP.log_likelihood(self)
jacobian = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
return ll + np.log(jacobian).sum()
def _log_likelihood_gradients(self):
ll_grads = GP._log_likelihood_gradients(self)
alpha = np.dot(self.Ki, self.likelihood.Y.flatten())
warping_grads = self.warping_function_gradients(alpha)
warping_grads = np.append(warping_grads[:,:-1].flatten(), warping_grads[0,-1])
return np.hstack((warping_grads.flatten(), ll_grads.flatten()))
def warping_function_gradients(self, Kiy):
grad_y = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed, self.warping_params,
return_covar_chain = True)
djac_dpsi = ((1.0/grad_y[:,:, None, None])*grad_y_psi).sum(axis=0).sum(axis=0)
dquad_dpsi = (Kiy[:,None,None,None] * grad_psi).sum(axis=0).sum(axis=0)
return -dquad_dpsi + djac_dpsi
def plot_warping(self):
self.warping_function.plot(self.warping_params, self.Y_untransformed.min(), self.Y_untransformed.max())
def _raw_predict(self, *args, **kwargs):
mu, var = GP._raw_predict(self, *args, **kwargs)
if self.predict_in_warped_space:
mu = self.warping_function.f_inv(mu, self.warping_params)
var = self.warping_function.f_inv(var, self.warping_params)
mu = self._unscale_data(mu)
return mu, var

View file

@ -4,6 +4,7 @@
import unittest
import numpy as np
import GPy
from GPy.models.bayesian_gplvm import BayesianGPLVM
class BGPLVMTests(unittest.TestCase):
def test_bias_kern(self):
@ -11,10 +12,10 @@ class BGPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k, M=M)
m = BayesianGPLVM(Y, input_dim, kernel=k, M=M)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
@ -24,10 +25,10 @@ class BGPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k, M=M)
m = BayesianGPLVM(Y, input_dim, kernel=k, M=M)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
@ -37,10 +38,10 @@ class BGPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k, M=M)
m = BayesianGPLVM(Y, input_dim, kernel=k, M=M)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
@ -50,10 +51,10 @@ class BGPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k, M=M)
m = BayesianGPLVM(Y, input_dim, kernel=k, M=M)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
@ -64,10 +65,10 @@ class BGPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim)
k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k, M=M)
m = BayesianGPLVM(Y, input_dim, kernel=k, M=M)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())

View file

@ -9,6 +9,7 @@ import pkgutil
import os
import random
from nose.tools import nottest
import sys
class ExamplesTests(unittest.TestCase):
def _checkgrad(self, model):
@ -40,9 +41,9 @@ def model_instance(model):
@nottest
def test_models():
examples_path = os.path.dirname(GPy.examples.__file__)
#Load modules
# Load modules
for loader, module_name, is_pkg in pkgutil.iter_modules([examples_path]):
#Load examples
# Load examples
module_examples = loader.find_module(module_name).load_module(module_name)
print "MODULE", module_examples
print "Before"
@ -56,11 +57,11 @@ def test_models():
continue
print "Testing example: ", example[0]
#Generate model
# Generate model
model = example[1]()
print model
#Create tests for instance check
# Create tests for instance check
"""
test = model_instance_generator(model)
test.__name__ = 'test_instance_%s' % example[0]
@ -78,4 +79,5 @@ def test_models():
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()
# unittest.main()
test_models()

View file

@ -11,7 +11,7 @@ class GPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.GPLVM(Y, input_dim, kernel = k)
m.ensure_default_constraints()
@ -23,7 +23,7 @@ class GPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.GPLVM(Y, input_dim, kernel = k)
m.ensure_default_constraints()
@ -35,7 +35,7 @@ class GPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.GPLVM(Y, input_dim, kernel = k)
m.ensure_default_constraints()

View file

@ -12,7 +12,7 @@ class KernelTests(unittest.TestCase):
K.constrain_fixed('2')
X = np.random.rand(5,5)
Y = np.ones((5,1))
m = GPy.models.GP_regression(X,Y,K)
m = GPy.models.GPRegression(X,Y,K)
self.assertTrue(m.checkgrad())
def test_fixedkernel(self):
@ -23,7 +23,7 @@ class KernelTests(unittest.TestCase):
K = np.dot(X, X.T)
kernel = GPy.kern.fixed(4, K)
Y = np.ones((30,1))
m = GPy.models.GP_regression(X,Y,kernel=kernel)
m = GPy.models.GPRegression(X,Y,kernel=kernel)
self.assertTrue(m.checkgrad())
def test_coregionalisation(self):
@ -36,9 +36,9 @@ class KernelTests(unittest.TestCase):
Y = np.vstack((Y1,Y2))
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
k2 = GPy.kern.coregionalise(2,1)
k2 = GPy.kern.Coregionalise(2,1)
k = k1.prod(k2,tensor=True)
m = GPy.models.GP_regression(X,Y,kernel=k)
m = GPy.models.GPRegression(X,Y,kernel=k)
self.assertTrue(m.checkgrad())

View file

@ -20,7 +20,7 @@ class MRDTests(unittest.TestCase):
k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
K = k.K(X)
Ylist = [np.random.multivariate_normal(np.zeros(N), K, D).T for _ in range(num_m)]
Ylist = [np.random.multivariate_normal(np.zeros(N), K, input_dim).T for _ in range(num_m)]
likelihood_list = [GPy.likelihoods.Gaussian(Y) for Y in Ylist]
m = GPy.models.MRD(likelihood_list, input_dim=input_dim, kernels=k, M=M)

View file

@ -13,7 +13,7 @@ class PriorTests(unittest.TestCase):
y = b*X + C + 1*np.sin(X)
y += 0.05*np.random.randn(len(X))
X, y = X[:, None], y[:, None]
m = GPy.models.GP_regression(X, y)
m = GPy.models.GPRegression(X, y)
m.ensure_default_constraints()
lognormal = GPy.priors.LogGaussian(1, 2)
m.set_prior('rbf', lognormal)
@ -27,7 +27,7 @@ class PriorTests(unittest.TestCase):
y = b*X + C + 1*np.sin(X)
y += 0.05*np.random.randn(len(X))
X, y = X[:, None], y[:, None]
m = GPy.models.GP_regression(X, y)
m = GPy.models.GPRegression(X, y)
m.ensure_default_constraints()
Gamma = GPy.priors.Gamma(1, 1)
m.set_prior('rbf', Gamma)
@ -41,7 +41,7 @@ class PriorTests(unittest.TestCase):
y = b*X + C + 1*np.sin(X)
y += 0.05*np.random.randn(len(X))
X, y = X[:, None], y[:, None]
m = GPy.models.GP_regression(X, y)
m = GPy.models.GPRegression(X, y)
m.ensure_default_constraints()
gaussian = GPy.priors.Gaussian(1, 1)
success = False

View file

@ -21,36 +21,36 @@ def ard(p):
@testing.deepTest(__test__)
class Test(unittest.TestCase):
D = 9
M = 4
input_dim = 9
num_inducing = 4
N = 3
Nsamples = 6e6
def setUp(self):
self.kerns = (
# (GPy.kern.rbf(self.D, ARD=True) +
# GPy.kern.linear(self.D, ARD=True) +
# GPy.kern.bias(self.D) +
# GPy.kern.white(self.D)),
(GPy.kern.rbf(self.D, np.random.rand(), np.random.rand(self.D), ARD=True) +
GPy.kern.rbf(self.D, np.random.rand(), np.random.rand(self.D), ARD=True) +
GPy.kern.linear(self.D, np.random.rand(self.D), ARD=True) +
GPy.kern.bias(self.D) +
GPy.kern.white(self.D)),
# GPy.kern.rbf(self.D), GPy.kern.rbf(self.D, ARD=True),
# GPy.kern.linear(self.D, ARD=False), GPy.kern.linear(self.D, ARD=True),
# GPy.kern.linear(self.D) + GPy.kern.bias(self.D),
# GPy.kern.rbf(self.D) + GPy.kern.bias(self.D),
# GPy.kern.linear(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
# GPy.kern.rbf(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
# GPy.kern.bias(self.D), GPy.kern.white(self.D),
# (GPy.kern.rbf(self.input_dim, ARD=True) +
# GPy.kern.linear(self.input_dim, ARD=True) +
# GPy.kern.bias(self.input_dim) +
# GPy.kern.white(self.input_dim)),
(GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) +
GPy.kern.bias(self.input_dim) +
GPy.kern.white(self.input_dim)),
# GPy.kern.rbf(self.input_dim), GPy.kern.rbf(self.input_dim, ARD=True),
# GPy.kern.linear(self.input_dim, ARD=False), GPy.kern.linear(self.input_dim, ARD=True),
# GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim),
# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim),
# GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim),
# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim),
# GPy.kern.bias(self.input_dim), GPy.kern.white(self.input_dim),
)
self.q_x_mean = np.random.randn(self.D)
self.q_x_variance = np.exp(np.random.randn(self.D))
self.q_x_samples = np.random.randn(self.Nsamples, self.D) * np.sqrt(self.q_x_variance) + self.q_x_mean
self.Z = np.random.randn(self.M, self.D)
self.q_x_mean.shape = (1, self.D)
self.q_x_variance.shape = (1, self.D)
self.q_x_mean = np.random.randn(self.input_dim)
self.q_x_variance = np.exp(np.random.randn(self.input_dim))
self.q_x_samples = np.random.randn(self.Nsamples, self.input_dim) * np.sqrt(self.q_x_variance) + self.q_x_mean
self.Z = np.random.randn(self.num_inducing, self.input_dim)
self.q_x_mean.shape = (1, self.input_dim)
self.q_x_variance.shape = (1, self.input_dim)
def test_psi0(self):
for kern in self.kerns:
@ -63,7 +63,7 @@ class Test(unittest.TestCase):
for kern in self.kerns:
Nsamples = 100
psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance)
K_ = np.zeros((Nsamples, self.M))
K_ = np.zeros((Nsamples, self.num_inducing))
diffs = []
for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
K = kern.K(q_x_sample_stripe, self.Z)
@ -89,7 +89,7 @@ class Test(unittest.TestCase):
for kern in self.kerns:
Nsamples = 100
psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
K_ = np.zeros((self.M, self.M))
K_ = np.zeros((self.num_inducing, self.num_inducing))
diffs = []
for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
K = kern.K(q_x_sample_stripe, self.Z)

View file

@ -17,14 +17,14 @@ class PsiStatModel(model):
self.X_variance = X_variance
self.Z = Z
self.N, self.input_dim = X.shape
self.M, input_dim = Z.shape
self.num_inducing, input_dim = Z.shape
assert self.input_dim == input_dim, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape)
self.kern = kernel
super(PsiStatModel, self).__init__()
self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance)
def _get_param_names(self):
Xnames = ["{}_{}_{}".format(what, i, j) for what, i, j in itertools.product(['X', 'X_variance'], range(self.N), range(self.input_dim))]
Znames = ["Z_{}_{}".format(i, j) for i, j in itertools.product(range(self.M), range(self.input_dim))]
Znames = ["Z_{}_{}".format(i, j) for i, j in itertools.product(range(self.num_inducing), range(self.input_dim))]
return Xnames + Znames + self.kern._get_param_names()
def _get_params(self):
return numpy.hstack([self.X.flatten(), self.X_variance.flatten(), self.Z.flatten(), self.kern._get_params()])
@ -34,7 +34,7 @@ class PsiStatModel(model):
start, end = end, end + self.X_variance.size
self.X_variance = x[start: end].reshape(self.N, self.input_dim)
start, end = end, end + self.Z.size
self.Z = x[start: end].reshape(self.M, self.input_dim)
self.Z = x[start: end].reshape(self.num_inducing, self.input_dim)
self.kern._set_params(x[end:])
def log_likelihood(self):
return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum()
@ -43,19 +43,19 @@ class PsiStatModel(model):
try:
psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
except AttributeError:
psiZ = numpy.zeros(self.M * self.input_dim)
psiZ = numpy.zeros(self.num_inducing * self.input_dim)
thetagrad = self.kern.__getattribute__("d" + self.which + "_dtheta")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance).flatten()
return numpy.hstack((psimu.flatten(), psiS.flatten(), psiZ.flatten(), thetagrad))
class DPsiStatTest(unittest.TestCase):
input_dim = 5
N = 50
M = 10
D = 20
num_inducing = 10
input_dim = 20
X = numpy.random.randn(N, input_dim)
X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
Z = numpy.random.permutation(X)[:M]
Y = X.dot(numpy.random.randn(input_dim, D))
Z = numpy.random.permutation(X)[:num_inducing]
Y = X.dot(numpy.random.randn(input_dim, input_dim))
# kernels = [GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)), GPy.kern.rbf(input_dim, ARD=True), GPy.kern.bias(input_dim)]
kernels = [GPy.kern.linear(input_dim), GPy.kern.rbf(input_dim), GPy.kern.bias(input_dim),
@ -65,7 +65,7 @@ class DPsiStatTest(unittest.TestCase):
def testPsi0(self):
for k in self.kernels:
m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.M, kernel=k)
M=self.num_inducing, kernel=k)
try:
assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts)))
except:
@ -80,27 +80,27 @@ class DPsiStatTest(unittest.TestCase):
def testPsi2_lin(self):
k = self.kernels[0]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.M, kernel=k)
M=self.num_inducing, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_lin_bia(self):
k = self.kernels[3]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.M, kernel=k)
M=self.num_inducing, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_rbf(self):
k = self.kernels[1]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.M, kernel=k)
M=self.num_inducing, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_rbf_bia(self):
k = self.kernels[-1]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.M, kernel=k)
M=self.num_inducing, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_bia(self):
k = self.kernels[2]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.M, kernel=k)
M=self.num_inducing, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
@ -108,14 +108,14 @@ if __name__ == "__main__":
import sys
interactive = 'i' in sys.argv
if interactive:
# N, M, input_dim, D = 30, 5, 4, 30
# N, num_inducing, input_dim, input_dim = 30, 5, 4, 30
# X = numpy.random.rand(N, input_dim)
# k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
# K = k.K(X)
# Y = numpy.random.multivariate_normal(numpy.zeros(N), K, D).T
# Y = numpy.random.multivariate_normal(numpy.zeros(N), K, input_dim).T
# Y -= Y.mean(axis=0)
# k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
# m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel=k, M=M)
# m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
# m.ensure_default_constraints()
# m.randomize()
# # self.assertTrue(m.checkgrad())
@ -136,22 +136,22 @@ if __name__ == "__main__":
# for k in kernels:
# m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=k)
# num_inducing=num_inducing, kernel=k)
# assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
#
# m0 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=GPy.kern.linear(input_dim))
# num_inducing=num_inducing, kernel=GPy.kern.linear(input_dim))
# m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=kernel)
# num_inducing=num_inducing, kernel=kernel)
# m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=kernel)
# num_inducing=num_inducing, kernel=kernel)
# m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=GPy.kern.rbf(input_dim))
# num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim))
m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
M=M, kernel=GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)))
m3.ensure_default_constraints()
# + GPy.kern.bias(input_dim))
# m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim))
# num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim))
else:
unittest.main()

View file

@ -4,6 +4,7 @@
import unittest
import numpy as np
import GPy
from GPy.models.sparse_gplvm import SparseGPLVM
class sparse_GPLVMTests(unittest.TestCase):
def test_bias_kern(self):
@ -11,9 +12,9 @@ class sparse_GPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.sparse_GPLVM(Y, input_dim, kernel = k, M=M)
m = SparseGPLVM(Y, input_dim, kernel=k, M=M)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
@ -24,9 +25,9 @@ class sparse_GPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.sparse_GPLVM(Y, input_dim, kernel = k, M=M)
m = SparseGPLVM(Y, input_dim, kernel=k, M=M)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
@ -36,9 +37,9 @@ class sparse_GPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.sparse_GPLVM(Y, input_dim, kernel = k, M=M)
m = SparseGPLVM(Y, input_dim, kernel=k, M=M)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())

View file

@ -5,33 +5,33 @@
import unittest
import numpy as np
import GPy
from GPy.likelihoods.likelihood_functions import Binomial
class GradientTests(unittest.TestCase):
def setUp(self):
######################################
## 1 dimensional example
# # 1 dimensional example
# sample inputs and outputs
self.X1D = np.random.uniform(-3.,3.,(20,1))
self.Y1D = np.sin(self.X1D)+np.random.randn(20,1)*0.05
self.X1D = np.random.uniform(-3., 3., (20, 1))
self.Y1D = np.sin(self.X1D) + np.random.randn(20, 1) * 0.05
######################################
## 2 dimensional example
# # 2 dimensional example
# sample inputs and outputs
self.X2D = np.random.uniform(-3.,3.,(40,2))
self.Y2D = np.sin(self.X2D[:,0:1]) * np.sin(self.X2D[:,1:2])+np.random.randn(40,1)*0.05
self.X2D = np.random.uniform(-3., 3., (40, 2))
self.Y2D = np.sin(self.X2D[:, 0:1]) * np.sin(self.X2D[:, 1:2]) + np.random.randn(40, 1) * 0.05
def check_model_with_white(self, kern, model_type='GP_regression', dimension=1):
#Get the correct gradients
def check_model_with_white(self, kern, model_type='GPRegression', dimension=1):
# Get the correct gradients
if dimension == 1:
X = self.X1D
Y = self.Y1D
else:
X = self.X2D
Y = self.Y2D
#Get model type (GP_regression, GP_sparse_regression, etc)
# Get model type (GPRegression, SparseGPRegression, etc)
model_fit = getattr(GPy.models, model_type)
noise = GPy.kern.white(dimension)
@ -42,114 +42,114 @@ class GradientTests(unittest.TestCase):
# contrain all parameters to be positive
self.assertTrue(m.checkgrad())
def test_gp_regression_rbf_1d(self):
def test_GPRegression_rbf_1d(self):
''' Testing the GP regression with rbf kernel with white kernel on 1d data '''
rbf = GPy.kern.rbf(1)
self.check_model_with_white(rbf, model_type='GP_regression', dimension=1)
self.check_model_with_white(rbf, model_type='GPRegression', dimension=1)
def test_GP_regression_rbf_2D(self):
def test_GPRegression_rbf_2D(self):
''' Testing the GP regression with rbf and white kernel on 2d data '''
rbf = GPy.kern.rbf(2)
self.check_model_with_white(rbf, model_type='GP_regression', dimension=2)
self.check_model_with_white(rbf, model_type='GPRegression', dimension=2)
def test_GP_regression_rbf_ARD_2D(self):
def test_GPRegression_rbf_ARD_2D(self):
''' Testing the GP regression with rbf and white kernel on 2d data '''
k = GPy.kern.rbf(2,ARD=True)
self.check_model_with_white(k, model_type='GP_regression', dimension=2)
k = GPy.kern.rbf(2, ARD=True)
self.check_model_with_white(k, model_type='GPRegression', dimension=2)
def test_GP_regression_matern52_1D(self):
def test_GPRegression_matern52_1D(self):
''' Testing the GP regression with matern52 kernel on 1d data '''
matern52 = GPy.kern.Matern52(1)
self.check_model_with_white(matern52, model_type='GP_regression', dimension=1)
self.check_model_with_white(matern52, model_type='GPRegression', dimension=1)
def test_GP_regression_matern52_2D(self):
def test_GPRegression_matern52_2D(self):
''' Testing the GP regression with matern52 kernel on 2d data '''
matern52 = GPy.kern.Matern52(2)
self.check_model_with_white(matern52, model_type='GP_regression', dimension=2)
self.check_model_with_white(matern52, model_type='GPRegression', dimension=2)
def test_GP_regression_matern52_ARD_2D(self):
def test_GPRegression_matern52_ARD_2D(self):
''' Testing the GP regression with matern52 kernel on 2d data '''
matern52 = GPy.kern.Matern52(2,ARD=True)
self.check_model_with_white(matern52, model_type='GP_regression', dimension=2)
matern52 = GPy.kern.Matern52(2, ARD=True)
self.check_model_with_white(matern52, model_type='GPRegression', dimension=2)
def test_GP_regression_matern32_1D(self):
def test_GPRegression_matern32_1D(self):
''' Testing the GP regression with matern32 kernel on 1d data '''
matern32 = GPy.kern.Matern32(1)
self.check_model_with_white(matern32, model_type='GP_regression', dimension=1)
self.check_model_with_white(matern32, model_type='GPRegression', dimension=1)
def test_GP_regression_matern32_2D(self):
def test_GPRegression_matern32_2D(self):
''' Testing the GP regression with matern32 kernel on 2d data '''
matern32 = GPy.kern.Matern32(2)
self.check_model_with_white(matern32, model_type='GP_regression', dimension=2)
self.check_model_with_white(matern32, model_type='GPRegression', dimension=2)
def test_GP_regression_matern32_ARD_2D(self):
def test_GPRegression_matern32_ARD_2D(self):
''' Testing the GP regression with matern32 kernel on 2d data '''
matern32 = GPy.kern.Matern32(2,ARD=True)
self.check_model_with_white(matern32, model_type='GP_regression', dimension=2)
matern32 = GPy.kern.Matern32(2, ARD=True)
self.check_model_with_white(matern32, model_type='GPRegression', dimension=2)
def test_GP_regression_exponential_1D(self):
def test_GPRegression_exponential_1D(self):
''' Testing the GP regression with exponential kernel on 1d data '''
exponential = GPy.kern.exponential(1)
self.check_model_with_white(exponential, model_type='GP_regression', dimension=1)
self.check_model_with_white(exponential, model_type='GPRegression', dimension=1)
def test_GP_regression_exponential_2D(self):
def test_GPRegression_exponential_2D(self):
''' Testing the GP regression with exponential kernel on 2d data '''
exponential = GPy.kern.exponential(2)
self.check_model_with_white(exponential, model_type='GP_regression', dimension=2)
self.check_model_with_white(exponential, model_type='GPRegression', dimension=2)
def test_GP_regression_exponential_ARD_2D(self):
def test_GPRegression_exponential_ARD_2D(self):
''' Testing the GP regression with exponential kernel on 2d data '''
exponential = GPy.kern.exponential(2,ARD=True)
self.check_model_with_white(exponential, model_type='GP_regression', dimension=2)
exponential = GPy.kern.exponential(2, ARD=True)
self.check_model_with_white(exponential, model_type='GPRegression', dimension=2)
def test_GP_regression_bias_kern_1D(self):
def test_GPRegression_bias_kern_1D(self):
''' Testing the GP regression with bias kernel on 1d data '''
bias = GPy.kern.bias(1)
self.check_model_with_white(bias, model_type='GP_regression', dimension=1)
self.check_model_with_white(bias, model_type='GPRegression', dimension=1)
def test_GP_regression_bias_kern_2D(self):
def test_GPRegression_bias_kern_2D(self):
''' Testing the GP regression with bias kernel on 2d data '''
bias = GPy.kern.bias(2)
self.check_model_with_white(bias, model_type='GP_regression', dimension=2)
self.check_model_with_white(bias, model_type='GPRegression', dimension=2)
def test_GP_regression_linear_kern_1D_ARD(self):
def test_GPRegression_linear_kern_1D_ARD(self):
''' Testing the GP regression with linear kernel on 1d data '''
linear = GPy.kern.linear(1,ARD=True)
self.check_model_with_white(linear, model_type='GP_regression', dimension=1)
linear = GPy.kern.linear(1, ARD=True)
self.check_model_with_white(linear, model_type='GPRegression', dimension=1)
def test_GP_regression_linear_kern_2D_ARD(self):
def test_GPRegression_linear_kern_2D_ARD(self):
''' Testing the GP regression with linear kernel on 2d data '''
linear = GPy.kern.linear(2,ARD=True)
self.check_model_with_white(linear, model_type='GP_regression', dimension=2)
linear = GPy.kern.linear(2, ARD=True)
self.check_model_with_white(linear, model_type='GPRegression', dimension=2)
def test_GP_regression_linear_kern_1D(self):
def test_GPRegression_linear_kern_1D(self):
''' Testing the GP regression with linear kernel on 1d data '''
linear = GPy.kern.linear(1)
self.check_model_with_white(linear, model_type='GP_regression', dimension=1)
self.check_model_with_white(linear, model_type='GPRegression', dimension=1)
def test_GP_regression_linear_kern_2D(self):
def test_GPRegression_linear_kern_2D(self):
''' Testing the GP regression with linear kernel on 2d data '''
linear = GPy.kern.linear(2)
self.check_model_with_white(linear, model_type='GP_regression', dimension=2)
self.check_model_with_white(linear, model_type='GPRegression', dimension=2)
def test_sparse_GP_regression_rbf_white_kern_1d(self):
def test_SparseGPRegression_rbf_white_kern_1d(self):
''' Testing the sparse GP regression with rbf kernel with white kernel on 1d data '''
rbf = GPy.kern.rbf(1)
self.check_model_with_white(rbf, model_type='sparse_GP_regression', dimension=1)
self.check_model_with_white(rbf, model_type='SparseGPRegression', dimension=1)
def test_sparse_GP_regression_rbf_white_kern_2D(self):
def test_SparseGPRegression_rbf_white_kern_2D(self):
''' Testing the sparse GP regression with rbf and white kernel on 2d data '''
rbf = GPy.kern.rbf(2)
self.check_model_with_white(rbf, model_type='sparse_GP_regression', dimension=2)
self.check_model_with_white(rbf, model_type='SparseGPRegression', dimension=2)
def test_GPLVM_rbf_bias_white_kern_2D(self):
""" Testing GPLVM with rbf + bias and white kernel """
N, input_dim, D = 50, 1, 2
X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim, 0.5, 0.9*np.ones((1,))) + GPy.kern.bias(input_dim, 0.1) + GPy.kern.white(input_dim, 0.05)
k = GPy.kern.rbf(input_dim, 0.5, 0.9 * np.ones((1,))) + GPy.kern.bias(input_dim, 0.1) + GPy.kern.white(input_dim, 0.05)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
m = GPy.models.GPLVM(Y, input_dim, kernel = k)
Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T
m = GPy.models.GPLVM(Y, input_dim, kernel=k)
m.ensure_default_constraints()
self.assertTrue(m.checkgrad())
@ -159,44 +159,44 @@ class GradientTests(unittest.TestCase):
X = np.random.rand(N, input_dim)
k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim, 0.1) + GPy.kern.white(input_dim, 0.05)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
m = GPy.models.GPLVM(Y, input_dim, init = 'PCA', kernel = k)
Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T
m = GPy.models.GPLVM(Y, input_dim, init='PCA', kernel=k)
m.ensure_default_constraints()
self.assertTrue(m.checkgrad())
def test_GP_EP_probit(self):
N = 20
X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None]
Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None]
X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None]
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
kernel = GPy.kern.rbf(1)
distribution = GPy.likelihoods.likelihood_functions.binomial()
distribution = GPy.likelihoods.likelihood_functions.Binomial()
likelihood = GPy.likelihoods.EP(Y, distribution)
m = GPy.core.GP(X, likelihood, kernel)
m.ensure_default_constraints()
m.update_likelihood_approximation()
self.assertTrue(m.checkgrad())
#self.assertTrue(m.EPEM)
# self.assertTrue(m.EPEM)
def test_sparse_EP_DTC_probit(self):
N = 20
X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None]
Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None]
Z = np.linspace(0,15,4)[:,None]
X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None]
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
Z = np.linspace(0, 15, 4)[:, None]
kernel = GPy.kern.rbf(1)
distribution = GPy.likelihoods.likelihood_functions.binomial()
distribution = GPy.likelihoods.likelihood_functions.Binomial()
likelihood = GPy.likelihoods.EP(Y, distribution)
m = GPy.core.sparse_GP(X, likelihood, kernel,Z)
m = GPy.core.SparseGP(X, likelihood, kernel, Z)
m.ensure_default_constraints()
m.update_likelihood_approximation()
self.assertTrue(m.checkgrad())
def test_generalized_FITC(self):
N = 20
X = np.hstack([np.random.rand(N/2)+1,np.random.rand(N/2)-1])[:,None]
X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None]
k = GPy.kern.rbf(1) + GPy.kern.white(1)
Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None]
likelihood = GPy.inference.likelihoods.binomial(Y)
m = GPy.models.generalized_FITC(X,likelihood,k,inducing=4)
Y = np.hstack([np.ones(N / 2), -np.ones(N / 2)])[:, None]
likelihood = Binomial(Y)
m = GPy.models.GeneralizedFITC(X, likelihood, k, inducing=4)
m.constrain_positive('(var|len)')
m.approximate_likelihood()
self.assertTrue(m.checkgrad())

View file

@ -13,7 +13,7 @@ default_seed = 10000
# Some general utilities.
def sample_class(f):
p = 1. / (1. + np.exp(-f))
c = np.random.binomial(1, p)
c = np.random.Binomial(1, p)
c = np.where(c, 1, -1)
return c

View file

@ -1,87 +1,80 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#tdot function courtesy of Ian Murray:
# tdot function courtesy of Ian Murray:
# Iain Murray, April 2013. iain contactable via iainmurray.net
# http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot.py
import numpy as np
from scipy import linalg, optimize, weave
import pylab as pb
import Tango
import sys
import re
import pdb
import cPickle
from scipy import linalg, weave
import types
import ctypes
from ctypes import byref, c_char, c_int, c_double # TODO
#import scipy.lib.lapack
import scipy as sp
# import scipy.lib.lapack
try:
_blaslib = ctypes.cdll.LoadLibrary(np.core._dotblas.__file__)
_blaslib = ctypes.cdll.LoadLibrary(np.core._dotblas.__file__) # @UndefinedVariable
_blas_available = True
except:
_blas_available = False
def trace_dot(a,b):
def trace_dot(a, b):
"""
efficiently compute the trace of the matrix product of a and b
"""
return np.sum(a*b)
return np.sum(a * b)
def mdot(*args):
"""Multiply all the arguments using matrix product rules.
The output is equivalent to multiplying the arguments one by one
from left to right using dot().
Precedence can be controlled by creating tuples of arguments,
for instance mdot(a,((b,c),d)) multiplies a (a*((b*c)*d)).
Note that this means the output of dot(a,b) and mdot(a,b) will differ if
a or b is a pure tuple of numbers.
"""
if len(args)==1:
return args[0]
elif len(args)==2:
return _mdot_r(args[0],args[1])
else:
return _mdot_r(args[:-1],args[-1])
"""Multiply all the arguments using matrix product rules.
The output is equivalent to multiplying the arguments one by one
from left to right using dot().
Precedence can be controlled by creating tuples of arguments,
for instance mdot(a,((b,c),d)) multiplies a (a*((b*c)*d)).
Note that this means the output of dot(a,b) and mdot(a,b) will differ if
a or b is a pure tuple of numbers.
"""
if len(args) == 1:
return args[0]
elif len(args) == 2:
return _mdot_r(args[0], args[1])
else:
return _mdot_r(args[:-1], args[-1])
def _mdot_r(a,b):
"""Recursive helper for mdot"""
if type(a)==types.TupleType:
if len(a)>1:
a = mdot(*a)
else:
a = a[0]
if type(b)==types.TupleType:
if len(b)>1:
b = mdot(*b)
else:
b = b[0]
return np.dot(a,b)
def _mdot_r(a, b):
"""Recursive helper for mdot"""
if type(a) == types.TupleType:
if len(a) > 1:
a = mdot(*a)
else:
a = a[0]
if type(b) == types.TupleType:
if len(b) > 1:
b = mdot(*b)
else:
b = b[0]
return np.dot(a, b)
def jitchol(A,maxtries=5):
def jitchol(A, maxtries=5):
A = np.asfortranarray(A)
L,info = linalg.lapack.flapack.dpotrf(A,lower=1)
if info ==0:
L, info = linalg.lapack.flapack.dpotrf(A, lower=1)
if info == 0:
return L
else:
diagA = np.diag(A)
if np.any(diagA<0.):
if np.any(diagA < 0.):
raise linalg.LinAlgError, "not pd: negative diagonal elements"
jitter= diagA.mean()*1e-6
for i in range(1,maxtries+1):
jitter = diagA.mean() * 1e-6
for i in range(1, maxtries + 1):
print 'Warning: adding jitter of {:.10e}'.format(jitter)
try:
return linalg.cholesky(A+np.eye(A.shape[0]).T*jitter, lower = True)
return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
except:
jitter *= 10
raise linalg.LinAlgError,"not positive definite, even with jitter."
raise linalg.LinAlgError, "not positive definite, even with jitter."
def jitchol_old(A,maxtries=5):
def jitchol_old(A, maxtries=5):
"""
:param A : An almost pd square matrix
@ -93,20 +86,20 @@ def jitchol_old(A,maxtries=5):
np.allclose(sp.linalg.cholesky(XXT, lower = True), np.triu(sp.linalg.cho_factor(XXT)[0]).T)
"""
try:
return linalg.cholesky(A, lower = True)
return linalg.cholesky(A, lower=True)
except linalg.LinAlgError:
diagA = np.diag(A)
if np.any(diagA<0.):
if np.any(diagA < 0.):
raise linalg.LinAlgError, "not pd: negative diagonal elements"
jitter= diagA.mean()*1e-6
for i in range(1,maxtries+1):
jitter = diagA.mean() * 1e-6
for i in range(1, maxtries + 1):
print '\rWarning: adding jitter of {:.10e} '.format(jitter),
try:
return linalg.cholesky(A+np.eye(A.shape[0]).T*jitter, lower = True)
return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
except:
jitter *= 10
raise linalg.LinAlgError,"not positive definite, even with jitter."
raise linalg.LinAlgError, "not positive definite, even with jitter."
def pdinv(A, *args):
"""
@ -125,7 +118,7 @@ def pdinv(A, *args):
logdet = 2.*np.sum(np.log(np.diag(L)))
Li = chol_inv(L)
Ai, _ = linalg.lapack.flapack.dpotri(L)
#Ai = np.tril(Ai) + np.tril(Ai,-1).T
# Ai = np.tril(Ai) + np.tril(Ai,-1).T
symmetrify(Ai)
return Ai, L, Li, logdet
@ -140,7 +133,7 @@ def chol_inv(L):
"""
return linalg.lapack.flapack.dtrtri(L, lower = True)[0]
return linalg.lapack.flapack.dtrtri(L, lower=True)[0]
def multiple_pdinv(A):
@ -155,11 +148,11 @@ def multiple_pdinv(A):
hld: 0.5* the log of the determinants of A
"""
N = A.shape[-1]
chols = [jitchol(A[:,:,i]) for i in range(N)]
chols = [jitchol(A[:, :, i]) for i in range(N)]
halflogdets = [np.sum(np.log(np.diag(L[0]))) for L in chols]
invs = [linalg.lapack.flapack.dpotri(L[0],True)[0] for L in chols]
invs = [np.triu(I)+np.triu(I,1).T for I in invs]
return np.dstack(invs),np.array(halflogdets)
invs = [linalg.lapack.flapack.dpotri(L[0], True)[0] for L in chols]
invs = [np.triu(I) + np.triu(I, 1).T for I in invs]
return np.dstack(invs), np.array(halflogdets)
def PCA(Y, input_dim):
@ -179,18 +172,18 @@ def PCA(Y, input_dim):
if not np.allclose(Y.mean(axis=0), 0.0):
print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)"
#Y -= Y.mean(axis=0)
# Y -= Y.mean(axis=0)
Z = linalg.svd(Y-Y.mean(axis=0), full_matrices = False)
[X, W] = [Z[0][:,0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:,0:input_dim]]
Z = linalg.svd(Y - Y.mean(axis=0), full_matrices=False)
[X, W] = [Z[0][:, 0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:, 0:input_dim]]
v = X.std(axis=0)
X /= v;
W *= v;
return X, W.T
def tdot_numpy(mat,out=None):
return np.dot(mat,mat.T,out)
def tdot_numpy(mat, out=None):
return np.dot(mat, mat.T, out)
def tdot_blas(mat, out=None):
"""returns np.dot(mat, mat.T), but faster for large 2D arrays of doubles."""
@ -198,16 +191,16 @@ def tdot_blas(mat, out=None):
return np.dot(mat, mat.T)
nn = mat.shape[0]
if out is None:
out = np.zeros((nn,nn))
out = np.zeros((nn, nn))
else:
assert(out.dtype == 'float64')
assert(out.shape == (nn,nn))
assert(out.shape == (nn, nn))
# FIXME: should allow non-contiguous out, and copy output into it:
assert(8 in out.strides)
# zeroing needed because of dumb way I copy across triangular answer
out[:] = 0.0
## Call to DSYRK from BLAS
# # Call to DSYRK from BLAS
# If already in Fortran order (rare), and has the right sorts of strides I
# could avoid the copy. I also thought swapping to cblas API would allow use
# of C order. However, I tried that and had errors with large matrices:
@ -226,17 +219,17 @@ def tdot_blas(mat, out=None):
_blaslib.dsyrk_(byref(UPLO), byref(TRANS), byref(N), byref(K),
byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC))
symmetrify(out,upper=True)
symmetrify(out, upper=True)
return out
def tdot(*args, **kwargs):
if _blas_available:
return tdot_blas(*args,**kwargs)
return tdot_blas(*args, **kwargs)
else:
return tdot_numpy(*args,**kwargs)
return tdot_numpy(*args, **kwargs)
def DSYR_blas(A,x,alpha=1.):
def DSYR_blas(A, x, alpha=1.):
"""
Performs a symmetric rank-1 update operation:
A <- A + alpha * np.dot(x,x.T)
@ -256,9 +249,9 @@ def DSYR_blas(A,x,alpha=1.):
INCX = c_int(1)
_blaslib.dsyr_(byref(UPLO), byref(N), byref(ALPHA),
x_, byref(INCX), A_, byref(LDA))
symmetrify(A,upper=True)
symmetrify(A, upper=True)
def DSYR_numpy(A,x,alpha=1.):
def DSYR_numpy(A, x, alpha=1.):
"""
Performs a symmetric rank-1 update operation:
A <- A + alpha * np.dot(x,x.T)
@ -269,23 +262,23 @@ def DSYR_numpy(A,x,alpha=1.):
:param x: Nx1 np.array
:param alpha: scalar
"""
A += alpha*np.dot(x[:,None],x[None,:])
A += alpha * np.dot(x[:, None], x[None, :])
def DSYR(*args, **kwargs):
if _blas_available:
return DSYR_blas(*args,**kwargs)
return DSYR_blas(*args, **kwargs)
else:
return DSYR_numpy(*args,**kwargs)
return DSYR_numpy(*args, **kwargs)
def symmetrify(A,upper=False):
def symmetrify(A, upper=False):
"""
Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
works IN PLACE.
"""
N,M = A.shape
assert N==M
N, M = A.shape
assert N == M
c_contig_code = """
int iN;
for (int i=1; i<N; i++){
@ -305,13 +298,13 @@ def symmetrify(A,upper=False):
}
"""
if A.flags['C_CONTIGUOUS'] and upper:
weave.inline(f_contig_code,['A','N'], extra_compile_args=['-O3'])
weave.inline(f_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
elif A.flags['C_CONTIGUOUS'] and not upper:
weave.inline(c_contig_code,['A','N'], extra_compile_args=['-O3'])
weave.inline(c_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
elif A.flags['F_CONTIGUOUS'] and upper:
weave.inline(c_contig_code,['A','N'], extra_compile_args=['-O3'])
weave.inline(c_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
elif A.flags['F_CONTIGUOUS'] and not upper:
weave.inline(f_contig_code,['A','N'], extra_compile_args=['-O3'])
weave.inline(f_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
else:
if upper:
tmp = np.tril(A.T)
@ -319,15 +312,15 @@ def symmetrify(A,upper=False):
tmp = np.tril(A)
A[:] = 0.0
A += tmp
A += np.tril(tmp,-1).T
A += np.tril(tmp, -1).T
def symmetrify_murray(A):
A += A.T
nn = A.shape[0]
A[[range(nn),range(nn)]] /= 2.0
A[[range(nn), range(nn)]] /= 2.0
def cholupdate(L,x):
def cholupdate(L, x):
"""
update the LOWER cholesky factor of a pd matrix IN PLACE
@ -337,7 +330,7 @@ def cholupdate(L,x):
support_code = """
#include <math.h>
"""
code="""
code = """
double r,c,s;
int j,i;
for(j=0; j<N; j++){
@ -353,11 +346,11 @@ def cholupdate(L,x):
"""
x = x.copy()
N = x.size
weave.inline(code, support_code=support_code, arg_names=['N','L','x'], type_converters=weave.converters.blitz)
weave.inline(code, support_code=support_code, arg_names=['N', 'L', 'x'], type_converters=weave.converters.blitz)
def backsub_both_sides(L, X,transpose='left'):
def backsub_both_sides(L, X, transpose='left'):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
if transpose=='left':
if transpose == 'left':
tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1)
return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T
else:

View file

@ -221,7 +221,7 @@ class image_show(data_show):
if not self.palette == []: # Can just show the image (self.set_image() took care of setting the palette)
self.handle = self.axes.imshow(self.vals, interpolation='nearest')
else: # Use a boring gray map.
self.handle = self.axes.imshow(self.vals, cmap=plt.cm.gray, interpolation='nearest')
self.handle = self.axes.imshow(self.vals, cmap=plt.cm.gray, interpolation='nearest') # @UndefinedVariable
plt.show()
def modify(self, vals):