Merged and fixed conflicts, names still need changing accordingly

This commit is contained in:
Ricardo 2013-06-05 14:22:16 +01:00
commit b3eeacd956
55 changed files with 912 additions and 927 deletions

View file

@ -1,9 +1,9 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from GP import GP
from sparse_GP import sparse_GP
from FITC import FITC
from model import * from model import *
from parameterised import * from parameterised import *
import priors import priors
from GPy.core.gp import GP
from GPy.core.sparse_gp import SparseGP
from FITC import FITC

View file

@ -46,12 +46,12 @@ class GP(GPBase):
#alpha = np.dot(self.Ki, self.likelihood.Y) #alpha = np.dot(self.Ki, self.likelihood.Y)
alpha,_ = linalg.lapack.flapack.dpotrs(self.L, self.likelihood.Y,lower=1) alpha,_ = linalg.lapack.flapack.dpotrs(self.L, self.likelihood.Y,lower=1)
self.dL_dK = 0.5 * (tdot(alpha) - self.D * self.Ki) self.dL_dK = 0.5 * (tdot(alpha) - self.input_dim * self.Ki)
else: else:
#tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki) #tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1) tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1)
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(tmp.T), lower=1) tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
self.dL_dK = 0.5 * (tmp - self.D * self.Ki) self.dL_dK = 0.5 * (tmp - self.input_dim * self.Ki)
def _get_params(self): def _get_params(self):
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params())) return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
@ -89,7 +89,7 @@ class GP(GPBase):
model for a new variable Y* = v_tilde/tau_tilde, with a covariance model for a new variable Y* = v_tilde/tau_tilde, with a covariance
matrix K* = K + diag(1./tau_tilde) plus a normalization term. matrix K* = K + diag(1./tau_tilde) plus a normalization term.
""" """
return -0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z return -0.5 * self.input_dim * self.K_logdet + self._model_fit_term() + self.likelihood.Z
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
@ -117,7 +117,7 @@ class GP(GPBase):
var = Kxx - np.sum(np.multiply(KiKx, Kx), 0) var = Kxx - np.sum(np.multiply(KiKx, Kx), 0)
var = var[:, None] var = var[:, None]
if stop: if stop:
debug_this debug_this # @UndefinedVariable
return mu, var return mu, var
def predict(self, Xnew, which_parts='all', full_cov=False): def predict(self, Xnew, which_parts='all', full_cov=False):
@ -131,12 +131,12 @@ class GP(GPBase):
:type which_parts: ('all', list of bools) :type which_parts: ('all', list of bools)
:param full_cov: whether to return the folll covariance matrix, or just the diagonal :param full_cov: whether to return the folll covariance matrix, or just the diagonal
:type full_cov: bool :type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.D :rtype: posterior mean, a Numpy array, Nnew x self.input_dim
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.D :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew. If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
This is to allow for different normalizations of the output dimensions. This is to allow for different normalizations of the output dimensions.
""" """

View file

@ -18,15 +18,15 @@ class GPBase(model.model):
self.kern = kernel self.kern = kernel
self.likelihood = likelihood self.likelihood = likelihood
assert self.X.shape[0] == self.likelihood.data.shape[0] assert self.X.shape[0] == self.likelihood.data.shape[0]
self.N, self.D = self.likelihood.data.shape self.N, self.output_dim = self.likelihood.data.shape
if normalize_X: if normalize_X:
self._Xmean = X.mean(0)[None, :] self._Xmean = X.mean(0)[None, :]
self._Xstd = X.std(0)[None, :] self._Xstd = X.std(0)[None, :]
self.X = (X.copy() - self._Xmean) / self._Xstd self.X = (X.copy() - self._Xmean) / self._Xstd
else: else:
self._Xmean = np.zeros((1,self.input_dim)) self._Xmean = np.zeros((1, self.input_dim))
self._Xstd = np.ones((1,self.input_dim)) self._Xstd = np.ones((1, self.input_dim))
model.model.__init__(self) model.model.__init__(self)
@ -70,7 +70,7 @@ class GPBase(model.model):
else: else:
m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True) m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True)
Ysim = np.random.multivariate_normal(m.flatten(), v, samples) Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None,], axes=ax) gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
for i in range(samples): for i in range(samples):
ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25) ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5) ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
@ -108,19 +108,19 @@ class GPBase(model.model):
if self.X.shape[1] == 1: if self.X.shape[1] == 1:
Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
m, var, lower, upper = self.predict(Xnew, which_parts=which_parts) m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
for d in range(m.shape[1]): for d in range(m.shape[1]):
gpplot(Xnew, m[:,d], lower[:,d], upper[:,d],axes=ax) gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax)
ax.plot(Xu[which_data], self.likelihood.data[which_data,d], 'kx', mew=1.5) ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5)
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax) ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax) ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 2: # FIXME elif self.X.shape[1] == 2: # FIXME
resolution = resolution or 50 resolution = resolution or 50
Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits, resolution) Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution) x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)

View file

@ -23,7 +23,7 @@ class model(parameterised):
self.priors = None self.priors = None
self.optimization_runs = [] self.optimization_runs = []
self.sampling_runs = [] self.sampling_runs = []
self.preferred_optimizer = 'tnc' self.preferred_optimizer = 'scg'
#self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes #self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes
def _get_params(self): def _get_params(self):
raise NotImplementedError, "this needs to be implemented to use the model class" raise NotImplementedError, "this needs to be implemented to use the model class"

View file

@ -85,7 +85,7 @@ class parameterised(object):
else: else:
return self._get_params()[matches] return self._get_params()[matches]
else: else:
raise AttributeError, "no parameter matches %s" % name raise AttributeError, "no parameter matches %s" % regexp
def __setitem__(self, name, val): def __setitem__(self, name, val):
""" """

View file

@ -99,9 +99,9 @@ class MultivariateGaussian:
assert len(self.var.shape) == 2 assert len(self.var.shape) == 2
assert self.var.shape[0] == self.var.shape[1] assert self.var.shape[0] == self.var.shape[1]
assert self.var.shape[0] == self.mu.size assert self.var.shape[0] == self.mu.size
self.D = self.mu.size self.input_dim = self.mu.size
self.inv, self.hld = pdinv(self.var) self.inv, self.hld = pdinv(self.var)
self.constant = -0.5 * self.D * np.log(2 * np.pi) - self.hld self.constant = -0.5 * self.input_dim * np.log(2 * np.pi) - self.hld
def summary(self): def summary(self):
raise NotImplementedError raise NotImplementedError
@ -121,7 +121,7 @@ class MultivariateGaussian:
return np.random.multivariate_normal(self.mu, self.var, n) return np.random.multivariate_normal(self.mu, self.var, n)
def plot(self): def plot(self):
if self.D == 2: if self.input_dim == 2:
rvs = self.rvs(200) rvs = self.rvs(200)
pb.plot(rvs[:, 0], rvs[:, 1], 'kx', mew=1.5) pb.plot(rvs[:, 0], rvs[:, 1], 'kx', mew=1.5)
xmin, xmax = pb.xlim() xmin, xmax = pb.xlim()

View file

@ -8,7 +8,7 @@ from scipy import linalg
from ..likelihoods import Gaussian from ..likelihoods import Gaussian
from gp_base import GPBase from gp_base import GPBase
class sparse_GP(GPBase): class SparseGP(GPBase):
""" """
Variational sparse GP model Variational sparse GP model
@ -21,9 +21,9 @@ class sparse_GP(GPBase):
:param X_variance: The uncertainty in the measurements of X (Gaussian variance) :param X_variance: The uncertainty in the measurements of X (Gaussian variance)
:type X_variance: np.ndarray (N x input_dim) | None :type X_variance: np.ndarray (N x input_dim) | None
:param Z: inducing inputs (optional, see note) :param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x input_dim) | None :type Z: np.ndarray (num_inducing x input_dim) | None
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None) :param num_inducing : Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int :type num_inducing: int
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales) :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool :type normalize_(X|Y): bool
""" """
@ -32,7 +32,7 @@ class sparse_GP(GPBase):
GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X) GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self.Z = Z self.Z = Z
self.M = Z.shape[0] self.num_inducing = Z.shape[0]
self.likelihood = likelihood self.likelihood = likelihood
if X_variance is None: if X_variance is None:
@ -85,7 +85,7 @@ class sparse_GP(GPBase):
# factor B # factor B
self.B = np.eye(self.M) + self.A self.B = np.eye(self.num_inducing) + self.A
self.LB = jitchol(self.B) self.LB = jitchol(self.B)
# TODO: make a switch for either first compute psi1V, or VV.T # TODO: make a switch for either first compute psi1V, or VV.T
@ -99,16 +99,16 @@ class sparse_GP(GPBase):
# Compute dL_dKmm # Compute dL_dKmm
tmp = tdot(self._LBi_Lmi_psi1V) tmp = tdot(self._LBi_Lmi_psi1V)
self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.D * np.eye(self.M) + tmp) self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.input_dim * np.eye(self.num_inducing) + tmp)
tmp = -0.5 * self.DBi_plus_BiPBi tmp = -0.5 * self.DBi_plus_BiPBi
tmp += -0.5 * self.B * self.D tmp += -0.5 * self.B * self.input_dim
tmp += self.D * np.eye(self.M) tmp += self.input_dim * np.eye(self.num_inducing)
self.dL_dKmm = backsub_both_sides(self.Lm, tmp) self.dL_dKmm = backsub_both_sides(self.Lm, tmp)
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
self.dL_dpsi0 = -0.5 * self.D * (self.likelihood.precision * np.ones([self.N, 1])).flatten() self.dL_dpsi0 = -0.5 * self.input_dim * (self.likelihood.precision * np.ones([self.N, 1])).flatten()
self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T) self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T)
dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.D * np.eye(self.M) - self.DBi_plus_BiPBi) dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.input_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
@ -135,24 +135,24 @@ class sparse_GP(GPBase):
raise NotImplementedError, "heteroscedatic derivates not implemented" raise NotImplementedError, "heteroscedatic derivates not implemented"
else: else:
# likelihood is not heterscedatic # likelihood is not heterscedatic
self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2 self.partial_for_likelihood = -0.5 * self.N * self.input_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision) self.partial_for_likelihood += 0.5 * self.input_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V))) self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
def log_likelihood(self): def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """ """ Compute the (lower bound on the) log marginal likelihood """
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y) A = -0.5 * self.N * self.input_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y)
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A)) B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
else: else:
A = -0.5 * self.N * self.D * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT A = -0.5 * self.N * self.input_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A)) B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2)) C = -self.input_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + B + C + D + self.likelihood.Z return A + B + C + D + self.likelihood.Z
def _set_params(self, p): def _set_params(self, p):
self.Z = p[:self.M * self.input_dim].reshape(self.M, self.input_dim) self.Z = p[:self.num_inducing * self.input_dim].reshape(self.num_inducing, self.input_dim)
self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam]) self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:]) self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:])
self._compute_kernel_matrices() self._compute_kernel_matrices()
@ -221,7 +221,7 @@ class sparse_GP(GPBase):
Bi, _ = linalg.lapack.flapack.dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work! Bi, _ = linalg.lapack.flapack.dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work!
symmetrify(Bi) symmetrify(Bi)
Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.M) - Bi) Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.num_inducing) - Bi)
if X_variance_new is None: if X_variance_new is None:
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts) Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
@ -259,12 +259,12 @@ class sparse_GP(GPBase):
:type which_parts: ('all', list of bools) :type which_parts: ('all', list of bools)
:param full_cov: whether to return the folll covariance matrix, or just the diagonal :param full_cov: whether to return the folll covariance matrix, or just the diagonal
:type full_cov: bool :type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.D :rtype: posterior mean, a Numpy array, Nnew x self.input_dim
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.D :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew. If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
This is to allow for different normalizations of the output dimensions. This is to allow for different normalizations of the output dimensions.
""" """

View file

@ -5,9 +5,9 @@ import numpy as np
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
import GPy import GPy
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
from GPy.util.datasets import swiss_roll_generated from GPy.util.datasets import swiss_roll_generated
from GPy.core.transformations import logexp from GPy.core.transformations import logexp
from GPy.models.bayesian_gplvm import BayesianGPLVM
default_seed = np.random.seed(123344) default_seed = np.random.seed(123344)
@ -20,14 +20,14 @@ def BGPLVM(seed=default_seed):
X = np.random.rand(N, Q) X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001) k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N), K, D).T Y = np.random.multivariate_normal(np.zeros(N), K, Q).T
k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.linear(Q, ARD=True) + GPy.kern.rbf(Q, ARD=True) + GPy.kern.white(Q) k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.linear(Q, ARD=True) + GPy.kern.rbf(Q, ARD=True) + GPy.kern.white(Q)
# k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q) # k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) # k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
# k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001) # k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M) m = GPy.models.BayesianGPLVM(Y, Q, kernel=k, M=M)
m.constrain_positive('(rbf|bias|noise|white|S)') m.constrain_positive('(rbf|bias|noise|white|S)')
# m.constrain_fixed('S', 1) # m.constrain_fixed('S', 1)
@ -105,7 +105,7 @@ def swiss_roll(optimize=True, N=1000, M=15, Q=4, sigma=.2, plot=False):
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2)) kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2))
m = Bayesian_GPLVM(Y, Q, X=X, X_variance=S, M=M, Z=Z, kernel=kernel) m = BayesianGPLVM(Y, Q, X=X, X_variance=S, M=M, Z=Z, kernel=kernel)
m.data_colors = c m.data_colors = c
m.data_t = t m.data_t = t
@ -129,7 +129,7 @@ def BGPLVM_oil(optimize=True, N=100, Q=5, M=25, max_f_eval=4e3, plot=False, **k)
Yn = Y - Y.mean(0) Yn = Y - Y.mean(0)
Yn /= Yn.std(0) Yn /= Yn.std(0)
m = GPy.models.Bayesian_GPLVM(Yn, Q, kernel=kernel, M=M, **k) m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, M=M, **k)
m.data_labels = data['Y'][:N].argmax(axis=1) m.data_labels = data['Y'][:N].argmax(axis=1)
# m.constrain('variance|leng', logexp_clipped()) # m.constrain('variance|leng', logexp_clipped())
@ -234,7 +234,7 @@ def bgplvm_simulation_matlab_compare():
from GPy import kern from GPy import kern
reload(mrd); reload(kern) reload(mrd); reload(kern)
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, m = BayesianGPLVM(Y, Q, init="PCA", M=M, kernel=k,
# X=mu, # X=mu,
# X_variance=S, # X_variance=S,
_debug=False) _debug=False)
@ -259,7 +259,7 @@ def bgplvm_simulation(optimize='scg',
Y = Ylist[0] Y = Ylist[0]
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q) k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q)
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True) m = BayesianGPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True)
# m.constrain('variance|noise', logexp_clipped()) # m.constrain('variance|noise', logexp_clipped())
m.ensure_default_constraints() m.ensure_default_constraints()
m['noise'] = Y.var() / 100. m['noise'] = Y.var() / 100.
@ -285,7 +285,7 @@ def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw):
reload(mrd); reload(kern) reload(mrd); reload(kern)
k = kern.linear(Q, [.05] * Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) k = kern.linear(Q, [.05] * Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
m = mrd.MRD(Ylist, Q=Q, M=M, kernels=k, initx="", initz='permute', **kw) m = mrd.MRD(Ylist, input_dim=Q, M=M, kernels=k, initx="", initz='permute', **kw)
for i, Y in enumerate(Ylist): for i, Y in enumerate(Ylist):
m['{}_noise'.format(i + 1)] = Y.var() / 100. m['{}_noise'.format(i + 1)] = Y.var() / 100.
@ -297,7 +297,7 @@ def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw):
if optimize: if optimize:
print "Optimizing Model:" print "Optimizing Model:"
m.optimize('scg', messages=1, max_iters=5e4, max_f_eval=5e4) m.optimize('scg', messages=1, max_iters=5e4, max_f_eval=5e4, gtol=.05)
if plot: if plot:
m.plot_X_1d("MRD Latent Space 1D") m.plot_X_1d("MRD Latent Space 1D")
m.plot_scales("MRD Scales") m.plot_scales("MRD Scales")
@ -313,7 +313,7 @@ def brendan_faces():
Yn /= Yn.std() Yn /= Yn.std()
m = GPy.models.GPLVM(Yn, Q) m = GPy.models.GPLVM(Yn, Q)
# m = GPy.models.Bayesian_GPLVM(Yn, Q, M=100) # m = GPy.models.BayesianGPLVM(Yn, Q, M=100)
# optimize # optimize
m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped()) m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped())
@ -380,7 +380,7 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True):
# M = 30 # M = 30
# #
# kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q) # kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
# m = GPy.models.Bayesian_GPLVM(X, Q, kernel=kernel, M=M) # m = GPy.models.BayesianGPLVM(X, Q, kernel=kernel, M=M)
# # m.scale_factor = 100.0 # # m.scale_factor = 100.0
# m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)') # m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)')
# from sklearn import cluster # from sklearn import cluster

View file

@ -10,16 +10,16 @@ import numpy as np
import GPy import GPy
def toy_rbf_1d(optim_iters=100): def toy_rbf_1d(max_nb_eval_optim=100):
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance.""" """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
data = GPy.util.datasets.toy_rbf_1d() data = GPy.util.datasets.toy_rbf_1d()
# create simple GP model # create simple GP model
m = GPy.models.GP_regression(data['X'],data['Y']) m = GPy.models.GPRegression(data['X'],data['Y'])
# optimize # optimize
m.ensure_default_constraints() m.ensure_default_constraints()
m.optimize(max_f_eval=optim_iters) m.optimize(max_f_eval=max_nb_eval_optim)
# plot # plot
m.plot() m.plot()
print(m) print(m)
@ -30,7 +30,7 @@ def rogers_girolami_olympics(optim_iters=100):
data = GPy.util.datasets.rogers_girolami_olympics() data = GPy.util.datasets.rogers_girolami_olympics()
# create simple GP model # create simple GP model
m = GPy.models.GP_regression(data['X'],data['Y']) m = GPy.models.GPRegression(data['X'],data['Y'])
#set the lengthscale to be something sensible (defaults to 1) #set the lengthscale to be something sensible (defaults to 1)
m['rbf_lengthscale'] = 10 m['rbf_lengthscale'] = 10
@ -49,7 +49,7 @@ def toy_rbf_1d_50(optim_iters=100):
data = GPy.util.datasets.toy_rbf_1d_50() data = GPy.util.datasets.toy_rbf_1d_50()
# create simple GP model # create simple GP model
m = GPy.models.GP_regression(data['X'],data['Y']) m = GPy.models.GPRegression(data['X'],data['Y'])
# optimize # optimize
m.ensure_default_constraints() m.ensure_default_constraints()
@ -65,7 +65,7 @@ def silhouette(optim_iters=100):
data = GPy.util.datasets.silhouette() data = GPy.util.datasets.silhouette()
# create simple GP model # create simple GP model
m = GPy.models.GP_regression(data['X'],data['Y']) m = GPy.models.GPRegression(data['X'],data['Y'])
# optimize # optimize
m.ensure_default_constraints() m.ensure_default_constraints()
@ -87,9 +87,9 @@ def coregionalisation_toy2(optim_iters=100):
Y = np.vstack((Y1,Y2)) Y = np.vstack((Y1,Y2))
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1) k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
k2 = GPy.kern.coregionalise(2,1) k2 = GPy.kern.Coregionalise(2,1)
k = k1.prod(k2,tensor=True) k = k1.prod(k2,tensor=True)
m = GPy.models.GP_regression(X,Y,kernel=k) m = GPy.models.GPRegression(X,Y,kernel=k)
m.constrain_fixed('.*rbf_var',1.) m.constrain_fixed('.*rbf_var',1.)
#m.constrain_positive('.*kappa') #m.constrain_positive('.*kappa')
m.ensure_default_constraints() m.ensure_default_constraints()
@ -119,9 +119,9 @@ def coregionalisation_toy(optim_iters=100):
Y = np.vstack((Y1,Y2)) Y = np.vstack((Y1,Y2))
k1 = GPy.kern.rbf(1) k1 = GPy.kern.rbf(1)
k2 = GPy.kern.coregionalise(2,2) k2 = GPy.kern.Coregionalise(2,2)
k = k1.prod(k2,tensor=True) k = k1.prod(k2,tensor=True)
m = GPy.models.GP_regression(X,Y,kernel=k) m = GPy.models.GPRegression(X,Y,kernel=k)
m.constrain_fixed('.*rbf_var',1.) m.constrain_fixed('.*rbf_var',1.)
#m.constrain_positive('kappa') #m.constrain_positive('kappa')
m.ensure_default_constraints() m.ensure_default_constraints()
@ -155,10 +155,10 @@ def coregionalisation_sparse(optim_iters=100):
Z = np.hstack((np.random.rand(M,1)*8,np.random.randint(0,2,M)[:,None])) Z = np.hstack((np.random.rand(M,1)*8,np.random.randint(0,2,M)[:,None]))
k1 = GPy.kern.rbf(1) k1 = GPy.kern.rbf(1)
k2 = GPy.kern.coregionalise(2,2) k2 = GPy.kern.Coregionalise(2,2)
k = k1.prod(k2,tensor=True) + GPy.kern.white(2,0.001) k = k1.prod(k2,tensor=True) + GPy.kern.white(2,0.001)
m = GPy.models.sparse_GP_regression(X,Y,kernel=k,Z=Z) m = GPy.models.SparseGPRegression(X,Y,kernel=k,Z=Z)
m.scale_factor = 10000. m.scale_factor = 10000.
m.constrain_fixed('.*rbf_var',1.) m.constrain_fixed('.*rbf_var',1.)
#m.constrain_positive('kappa') #m.constrain_positive('kappa')
@ -181,7 +181,7 @@ def coregionalisation_sparse(optim_iters=100):
return m return m
def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000, optim_iters=100): def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000, optim_iters=300):
"""Show an example of a multimodal error surface for Gaussian process regression. Gene 939 has bimodal behaviour where the noisey mode is higher.""" """Show an example of a multimodal error surface for Gaussian process regression. Gene 939 has bimodal behaviour where the noisey mode is higher."""
# Contour over a range of length scales and signal/noise ratios. # Contour over a range of length scales and signal/noise ratios.
@ -197,7 +197,7 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
data['Y'] = data['Y'] - np.mean(data['Y']) data['Y'] = data['Y'] - np.mean(data['Y'])
lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf) lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf)
pb.contour(length_scales, log_SNRs, np.exp(lls), 20) pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet)
ax = pb.gca() ax = pb.gca()
pb.xlabel('length scale') pb.xlabel('length scale')
pb.ylabel('log_10 SNR') pb.ylabel('log_10 SNR')
@ -211,18 +211,20 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
optim_point_y = np.empty(2) optim_point_y = np.empty(2)
np.random.seed(seed=seed) np.random.seed(seed=seed)
for i in range(0, model_restarts): for i in range(0, model_restarts):
kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) + GPy.kern.white(1,variance=np.random.exponential(1.)) #kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.))
kern = GPy.kern.rbf(1, variance=np.random.uniform(1e-3,1), lengthscale=np.random.uniform(5,50))
m = GPy.models.GP_regression(data['X'],data['Y'], kernel=kern) m = GPy.models.GPRegression(data['X'],data['Y'], kernel=kern)
optim_point_x[0] = m.get('rbf_lengthscale') m['noise_variance'] = np.random.uniform(1e-3,1)
optim_point_y[0] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance')); optim_point_x[0] = m['rbf_lengthscale']
optim_point_y[0] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']);
# optimize # optimize
m.ensure_default_constraints() m.ensure_default_constraints()
m.optimize(xtol=1e-6, ftol=1e-6, max_f_eval=optim_iters) m.optimize('scg', xtol=1e-6, ftol=1e-6, max_f_eval=optim_iters)
optim_point_x[1] = m.get('rbf_lengthscale') optim_point_x[1] = m['rbf_lengthscale']
optim_point_y[1] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance')); optim_point_y[1] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']);
pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1]-optim_point_x[0], optim_point_y[1]-optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k') pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1]-optim_point_x[0], optim_point_y[1]-optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k')
models.append(m) models.append(m)
@ -231,39 +233,32 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
ax.set_ylim(ylim) ax.set_ylim(ylim)
return (models, lls) return (models, lls)
def _contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf): def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf):
"""Evaluate the GP objective function for a given data set for a range of signal to noise ratios and a range of lengthscales. """Evaluate the GP objective function for a given data set for a range of signal to noise ratios and a range of lengthscales.
:data_set: A data set from the utils.datasets director. :data_set: A data set from the utils.datasets director.
:length_scales: a list of length scales to explore for the contour plot. :length_scales: a list of length scales to explore for the contour plot.
:log_SNRs: a list of base 10 logarithm signal to noise ratios to explore for the contour plot. :log_SNRs: a list of base 10 logarithm signal to noise ratios to explore for the contour plot.
:signal_kernel: a kernel to use for the 'signal' portion of the data.""" :kernel: a kernel to use for the 'signal' portion of the data."""
lls = [] lls = []
total_var = np.var(data['Y']) total_var = np.var(data['Y'])
kernel = kernel_call(1, variance=1., lengthscale=1.)
model = GPy.models.GPRegression(data['X'], data['Y'], kernel=kernel)
for log_SNR in log_SNRs: for log_SNR in log_SNRs:
SNR = 10**log_SNR SNR = 10.**log_SNR
noise_var = total_var/(1.+SNR)
signal_var = total_var - noise_var
model.kern['.*variance'] = signal_var
model['noise_variance'] = noise_var
length_scale_lls = [] length_scale_lls = []
for length_scale in length_scales: for length_scale in length_scales:
noise_var = 1. model['.*lengthscale'] = length_scale
signal_var = SNR
noise_var = noise_var/(noise_var + signal_var)*total_var
signal_var = signal_var/(noise_var + signal_var)*total_var
signal_kernel = signal_kernel_call(1, variance=signal_var, lengthscale=length_scale)
noise_kernel = GPy.kern.white(1, variance=noise_var)
kernel = signal_kernel + noise_kernel
K = kernel.K(data['X'])
total_var = (np.dot(np.dot(data['Y'].T,GPy.util.linalg.pdinv(K)[0]), data['Y'])/data['Y'].shape[0])[0,0]
noise_var *= total_var
signal_var *= total_var
kernel = signal_kernel_call(1, variance=signal_var, lengthscale=length_scale) + GPy.kern.white(1, variance=noise_var)
model = GPy.models.GP_regression(data['X'], data['Y'], kernel=kernel)
model.constrain_positive('')
length_scale_lls.append(model.log_likelihood()) length_scale_lls.append(model.log_likelihood())
lls.append(length_scale_lls) lls.append(length_scale_lls)
return np.array(lls) return np.array(lls)
def sparse_GP_regression_1D(N = 400, M = 5, optim_iters=100): def sparse_GP_regression_1D(N = 400, M = 5, optim_iters=100):
@ -276,7 +271,7 @@ def sparse_GP_regression_1D(N = 400, M = 5, optim_iters=100):
noise = GPy.kern.white(1) noise = GPy.kern.white(1)
kernel = rbf + noise kernel = rbf + noise
# create simple GP model # create simple GP model
m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M) m = GPy.models.SparseGPRegression(X, Y, kernel, M=M)
m.ensure_default_constraints() m.ensure_default_constraints()
@ -296,7 +291,7 @@ def sparse_GP_regression_2D(N = 400, M = 50, optim_iters=100):
kernel = rbf + noise kernel = rbf + noise
# create simple GP model # create simple GP model
m = GPy.models.sparse_GP_regression(X,Y,kernel, M = M) m = GPy.models.SparseGPRegression(X,Y,kernel, M = M)
# contrain all parameters to be positive (but not inducing inputs) # contrain all parameters to be positive (but not inducing inputs)
m.ensure_default_constraints() m.ensure_default_constraints()
@ -325,7 +320,7 @@ def uncertain_inputs_sparse_regression(optim_iters=100):
k = GPy.kern.rbf(1) + GPy.kern.white(1) k = GPy.kern.rbf(1) + GPy.kern.white(1)
# create simple GP model - no input uncertainty on this one # create simple GP model - no input uncertainty on this one
m = GPy.models.sparse_GP_regression(X, Y, kernel=k, Z=Z) m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z)
m.ensure_default_constraints() m.ensure_default_constraints()
m.optimize('scg', messages=1, max_f_eval=optim_iters) m.optimize('scg', messages=1, max_f_eval=optim_iters)
m.plot(ax=axes[0]) m.plot(ax=axes[0])
@ -333,7 +328,7 @@ def uncertain_inputs_sparse_regression(optim_iters=100):
#the same model with uncertainty #the same model with uncertainty
m = GPy.models.sparse_GP_regression(X, Y, kernel=k, Z=Z, X_variance=S) m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z, X_variance=S)
m.ensure_default_constraints() m.ensure_default_constraints()
m.optimize('scg', messages=1, max_f_eval=optim_iters) m.optimize('scg', messages=1, max_f_eval=optim_iters)
m.plot(ax=axes[1]) m.plot(ax=axes[1])

View file

@ -19,7 +19,7 @@ def tuto_GP_regression():
kernel = GPy.kern.rbf(D=1, variance=1., lengthscale=1.) kernel = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
m = GPy.models.GP_regression(X,Y,kernel) m = GPy.models.GPRegression(X, Y, kernel)
print m print m
m.plot() m.plot()
@ -47,7 +47,7 @@ def tuto_GP_regression():
ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.white(2) ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.white(2)
# create simple GP model # create simple GP model
m = GPy.models.GP_regression(X,Y,ker) m = GPy.models.GPRegression(X, Y, ker)
# contrain all parameters to be positive # contrain all parameters to be positive
m.constrain_positive('') m.constrain_positive('')
@ -145,7 +145,7 @@ def tuto_kernel_overview():
Y = 0.5*X[:,:1] + 0.5*X[:,1:] + 2*np.sin(X[:,:1]) * np.sin(X[:,1:]) Y = 0.5*X[:,:1] + 0.5*X[:,1:] + 2*np.sin(X[:,:1]) * np.sin(X[:,1:])
# Create GP regression model # Create GP regression model
m = GPy.models.GP_regression(X,Y,Kanova) m = GPy.models.GPRegression(X, Y, Kanova)
pb.figure(figsize=(5,5)) pb.figure(figsize=(5,5))
m.plot() m.plot()
@ -196,5 +196,5 @@ def model_interaction():
X = np.random.randn(20,1) X = np.random.randn(20,1)
Y = np.sin(X) + np.random.randn(*X.shape)*0.01 + 5. Y = np.sin(X) + np.random.randn(*X.shape)*0.01 + 5.
k = GPy.kern.rbf(1) + GPy.kern.bias(1) k = GPy.kern.rbf(1) + GPy.kern.bias(1)
return GPy.models.GP_regression(X,Y,kernel=k) return GPy.models.GPRegression(X, Y, kernel=k)

View file

@ -1,18 +1,16 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import pdb
import pylab as pb import pylab as pb
import datetime as dt import datetime as dt
from scipy import optimize from scipy import optimize
import numpy as np
try: try:
import rasmussens_minimize as rasm import rasmussens_minimize as rasm
rasm_available = True rasm_available = True
except ImportError: except ImportError:
rasm_available = False rasm_available = False
from SCG import SCG from scg import SCG
class Optimizer(): class Optimizer():
""" """
@ -51,9 +49,9 @@ class Optimizer():
start = dt.datetime.now() start = dt.datetime.now()
self.opt(**kwargs) self.opt(**kwargs)
end = dt.datetime.now() end = dt.datetime.now()
self.time = str(end-start) self.time = str(end - start)
def opt(self, f_fp = None, f = None, fp = None): def opt(self, f_fp=None, f=None, fp=None):
raise NotImplementedError, "this needs to be implemented to use the optimizer class" raise NotImplementedError, "this needs to be implemented to use the optimizer class"
def plot(self): def plot(self):
@ -78,7 +76,7 @@ class opt_tnc(Optimizer):
Optimizer.__init__(self, *args, **kwargs) Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "TNC (Scipy implementation)" self.opt_name = "TNC (Scipy implementation)"
def opt(self, f_fp = None, f = None, fp = None): def opt(self, f_fp=None, f=None, fp=None):
""" """
Run the TNC optimizer Run the TNC optimizer
@ -96,8 +94,8 @@ class opt_tnc(Optimizer):
if self.gtol is not None: if self.gtol is not None:
opt_dict['pgtol'] = self.gtol opt_dict['pgtol'] = self.gtol
opt_result = optimize.fmin_tnc(f_fp, self.x_init, messages = self.messages, opt_result = optimize.fmin_tnc(f_fp, self.x_init, messages=self.messages,
maxfun = self.max_f_eval, **opt_dict) maxfun=self.max_f_eval, **opt_dict)
self.x_opt = opt_result[0] self.x_opt = opt_result[0]
self.f_opt = f_fp(self.x_opt)[0] self.f_opt = f_fp(self.x_opt)[0]
self.funct_eval = opt_result[1] self.funct_eval = opt_result[1]
@ -108,7 +106,7 @@ class opt_lbfgsb(Optimizer):
Optimizer.__init__(self, *args, **kwargs) Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "L-BFGS-B (Scipy implementation)" self.opt_name = "L-BFGS-B (Scipy implementation)"
def opt(self, f_fp = None, f = None, fp = None): def opt(self, f_fp=None, f=None, fp=None):
""" """
Run the optimizer Run the optimizer
@ -130,8 +128,8 @@ class opt_lbfgsb(Optimizer):
if self.gtol is not None: if self.gtol is not None:
opt_dict['pgtol'] = self.gtol opt_dict['pgtol'] = self.gtol
opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint = iprint, opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint=iprint,
maxfun = self.max_f_eval, **opt_dict) maxfun=self.max_f_eval, **opt_dict)
self.x_opt = opt_result[0] self.x_opt = opt_result[0]
self.f_opt = f_fp(self.x_opt)[0] self.f_opt = f_fp(self.x_opt)[0]
self.funct_eval = opt_result[2]['funcalls'] self.funct_eval = opt_result[2]['funcalls']
@ -142,12 +140,12 @@ class opt_simplex(Optimizer):
Optimizer.__init__(self, *args, **kwargs) Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "Nelder-Mead simplex routine (via Scipy)" self.opt_name = "Nelder-Mead simplex routine (via Scipy)"
def opt(self, f_fp = None, f = None, fp = None): def opt(self, f_fp=None, f=None, fp=None):
""" """
The simplex optimizer does not require gradients. The simplex optimizer does not require gradients.
""" """
statuses = ['Converged', 'Maximum number of function evaluations made','Maximum number of iterations reached'] statuses = ['Converged', 'Maximum number of function evaluations made', 'Maximum number of iterations reached']
opt_dict = {} opt_dict = {}
if self.xtol is not None: if self.xtol is not None:
@ -157,8 +155,8 @@ class opt_simplex(Optimizer):
if self.gtol is not None: if self.gtol is not None:
print "WARNING: simplex doesn't have an gtol arg, so I'm going to ignore it" print "WARNING: simplex doesn't have an gtol arg, so I'm going to ignore it"
opt_result = optimize.fmin(f, self.x_init, (), disp = self.messages, opt_result = optimize.fmin(f, self.x_init, (), disp=self.messages,
maxfun = self.max_f_eval, full_output=True, **opt_dict) maxfun=self.max_f_eval, full_output=True, **opt_dict)
self.x_opt = opt_result[0] self.x_opt = opt_result[0]
self.f_opt = opt_result[1] self.f_opt = opt_result[1]
@ -172,7 +170,7 @@ class opt_rasm(Optimizer):
Optimizer.__init__(self, *args, **kwargs) Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "Rasmussen's Conjugate Gradient" self.opt_name = "Rasmussen's Conjugate Gradient"
def opt(self, f_fp = None, f = None, fp = None): def opt(self, f_fp=None, f=None, fp=None):
""" """
Run Rasmussen's Conjugate Gradient optimizer Run Rasmussen's Conjugate Gradient optimizer
""" """
@ -189,8 +187,8 @@ class opt_rasm(Optimizer):
if self.gtol is not None: if self.gtol is not None:
print "WARNING: minimize doesn't have an gtol arg, so I'm going to ignore it" print "WARNING: minimize doesn't have an gtol arg, so I'm going to ignore it"
opt_result = rasm.minimize(self.x_init, f_fp, (), messages = self.messages, opt_result = rasm.minimize(self.x_init, f_fp, (), messages=self.messages,
maxnumfuneval = self.max_f_eval) maxnumfuneval=self.max_f_eval)
self.x_opt = opt_result[0] self.x_opt = opt_result[0]
self.f_opt = opt_result[1][-1] self.f_opt = opt_result[1][-1]
self.funct_eval = opt_result[2] self.funct_eval = opt_result[2]
@ -203,7 +201,7 @@ class opt_SCG(Optimizer):
Optimizer.__init__(self, *args, **kwargs) Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "Scaled Conjugate Gradients" self.opt_name = "Scaled Conjugate Gradients"
def opt(self, f_fp = None, f = None, fp = None): def opt(self, f_fp=None, f=None, fp=None):
assert not f is None assert not f is None
assert not fp is None assert not fp is None
opt_result = SCG(f, fp, self.x_init, display=self.messages, opt_result = SCG(f, fp, self.x_init, display=self.messages,
@ -218,7 +216,7 @@ class opt_SCG(Optimizer):
self.status = opt_result[3] self.status = opt_result[3]
def get_optimizer(f_min): def get_optimizer(f_min):
from SGD import opt_SGD from sgd import opt_SGD
optimizers = {'fmin_tnc': opt_tnc, optimizers = {'fmin_tnc': opt_tnc,
'simplex': opt_simplex, 'simplex': opt_simplex,

View file

@ -292,8 +292,8 @@ class opt_SGD(Optimizer):
NLL = [] NLL = []
import pylab as plt import pylab as plt
for count, j in enumerate(features): for count, j in enumerate(features):
self.model.D = len(j) self.model.input_dim = len(j)
self.model.likelihood.D = len(j) self.model.likelihood.input_dim = len(j)
self.model.likelihood.set_data(Y[:, j]) self.model.likelihood.set_data(Y[:, j])
# self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision # self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision
@ -334,9 +334,9 @@ class opt_SGD(Optimizer):
self.f_opt = np.mean(NLL) self.f_opt = np.mean(NLL)
self.model.N = N self.model.N = N
self.model.X = X self.model.X = X
self.model.D = D self.model.input_dim = D
self.model.likelihood.N = N self.model.likelihood.N = N
self.model.likelihood.D = D self.model.likelihood.input_dim = D
self.model.likelihood.Y = Y self.model.likelihood.Y = Y
sigma = self.model.likelihood._variance sigma = self.model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache self.model.likelihood._variance = None # invalidate cache

View file

@ -13,14 +13,14 @@ class Brownian(kernpart):
""" """
Brownian Motion kernel. Brownian Motion kernel.
:param D: the number of input dimensions :param input_dim: the number of input dimensions
:type D: int :type input_dim: int
:param variance: :param variance:
:type variance: float :type variance: float
""" """
def __init__(self,D,variance=1.): def __init__(self,input_dim,variance=1.):
self.D = D self.input_dim = input_dim
assert self.D==1, "Brownian motion in 1D only" assert self.input_dim==1, "Brownian motion in 1D only"
self.Nparam = 1. self.Nparam = 1.
self.name = 'Brownian' self.name = 'Brownian'
self._set_params(np.array([variance]).flatten()) self._set_params(np.array([variance]).flatten())

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, Coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
try: try:
from constructors import rbf_sympy, sympykern # these depend on sympy from constructors import rbf_sympy, sympykern # these depend on sympy
except: except:

View file

@ -7,14 +7,14 @@ import numpy as np
import hashlib import hashlib
class bias(kernpart): class bias(kernpart):
def __init__(self,D,variance=1.): def __init__(self,input_dim,variance=1.):
""" """
:param D: the number of input dimensions :param input_dim: the number of input dimensions
:type D: int :type input_dim: int
:param variance: the variance of the kernel :param variance: the variance of the kernel
:type variance: float :type variance: float
""" """
self.D = D self.input_dim = input_dim
self.Nparam = 1 self.Nparam = 1
self.name = 'bias' self.name = 'bias'
self._set_params(np.array([variance]).flatten()) self._set_params(np.array([variance]).flatten())

View file

@ -21,7 +21,7 @@ from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part
from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part
from prod import prod as prodpart from prod import prod as prodpart
from symmetric import symmetric as symmetric_part from symmetric import symmetric as symmetric_part
from coregionalise import coregionalise as coregionalise_part from coregionalise import Coregionalise as coregionalise_part
from rational_quadratic import rational_quadratic as rational_quadraticpart from rational_quadratic import rational_quadratic as rational_quadraticpart
from rbfcos import rbfcos as rbfcospart from rbfcos import rbfcos as rbfcospart
from independent_outputs import independent_outputs as independent_output_part from independent_outputs import independent_outputs as independent_output_part
@ -33,8 +33,8 @@ def rbf(D,variance=1., lengthscale=None,ARD=False):
""" """
Construct an RBF kernel Construct an RBF kernel
:param D: dimensionality of the kernel, obligatory :param input_dim: dimensionality of the kernel, obligatory
:type D: int :type input_dim: int
:param variance: the variance of the kernel :param variance: the variance of the kernel
:type variance: float :type variance: float
:param lengthscale: the lengthscale of the kernel :param lengthscale: the lengthscale of the kernel
@ -51,7 +51,7 @@ def linear(D,variances=None,ARD=False):
Arguments Arguments
--------- ---------
D (int), obligatory input_dimD (int), obligatory
variances (np.ndarray) variances (np.ndarray)
ARD (boolean) ARD (boolean)
""" """
@ -64,7 +64,7 @@ def white(D,variance=1.):
Arguments Arguments
--------- ---------
D (int), obligatory input_dimD (int), obligatory
variance (float) variance (float)
""" """
part = whitepart(D,variance) part = whitepart(D,variance)
@ -74,8 +74,8 @@ def exponential(D,variance=1., lengthscale=None, ARD=False):
""" """
Construct an exponential kernel Construct an exponential kernel
:param D: dimensionality of the kernel, obligatory :param input_dim: dimensionality of the kernel, obligatory
:type D: int :type input_dim: int
:param variance: the variance of the kernel :param variance: the variance of the kernel
:type variance: float :type variance: float
:param lengthscale: the lengthscale of the kernel :param lengthscale: the lengthscale of the kernel
@ -90,8 +90,8 @@ def Matern32(D,variance=1., lengthscale=None, ARD=False):
""" """
Construct a Matern 3/2 kernel. Construct a Matern 3/2 kernel.
:param D: dimensionality of the kernel, obligatory :param input_dim: dimensionality of the kernel, obligatory
:type D: int :type input_dim: int
:param variance: the variance of the kernel :param variance: the variance of the kernel
:type variance: float :type variance: float
:param lengthscale: the lengthscale of the kernel :param lengthscale: the lengthscale of the kernel
@ -106,8 +106,8 @@ def Matern52(D,variance=1., lengthscale=None, ARD=False):
""" """
Construct a Matern 5/2 kernel. Construct a Matern 5/2 kernel.
:param D: dimensionality of the kernel, obligatory :param input_dim: dimensionality of the kernel, obligatory
:type D: int :type input_dim: int
:param variance: the variance of the kernel :param variance: the variance of the kernel
:type variance: float :type variance: float
:param lengthscale: the lengthscale of the kernel :param lengthscale: the lengthscale of the kernel
@ -124,7 +124,7 @@ def bias(D,variance=1.):
Arguments Arguments
--------- ---------
D (int), obligatory input_dim (int), obligatory
variance (float) variance (float)
""" """
part = biaspart(D,variance) part = biaspart(D,variance)
@ -133,7 +133,7 @@ def bias(D,variance=1.):
def finite_dimensional(D,F,G,variances=1.,weights=None): def finite_dimensional(D,F,G,variances=1.,weights=None):
""" """
Construct a finite dimensional kernel. Construct a finite dimensional kernel.
D: int - the number of input dimensions input_dim: int - the number of input dimensions
F: np.array of functions with shape (n,) - the n basis functions F: np.array of functions with shape (n,) - the n basis functions
G: np.array with shape (n,n) - the Gram matrix associated to F G: np.array with shape (n,n) - the Gram matrix associated to F
variances : np.ndarray with shape (n,) variances : np.ndarray with shape (n,)
@ -145,8 +145,8 @@ def spline(D,variance=1.):
""" """
Construct a spline kernel. Construct a spline kernel.
:param D: Dimensionality of the kernel :param input_dim: Dimensionality of the kernel
:type D: int :type input_dim: int
:param variance: the variance of the kernel :param variance: the variance of the kernel
:type variance: float :type variance: float
""" """
@ -157,8 +157,8 @@ def Brownian(D,variance=1.):
""" """
Construct a Brownian motion kernel. Construct a Brownian motion kernel.
:param D: Dimensionality of the kernel :param input_dim: Dimensionality of the kernel
:type D: int :type input_dim: int
:param variance: the variance of the kernel :param variance: the variance of the kernel
:type variance: float :type variance: float
""" """
@ -204,8 +204,8 @@ def periodic_exponential(D=1,variance=1., lengthscale=None, period=2*np.pi,n_fre
""" """
Construct an periodic exponential kernel Construct an periodic exponential kernel
:param D: dimensionality, only defined for D=1 :param input_dim: dimensionality, only defined for input_dim=1
:type D: int :type input_dim: int
:param variance: the variance of the kernel :param variance: the variance of the kernel
:type variance: float :type variance: float
:param lengthscale: the lengthscale of the kernel :param lengthscale: the lengthscale of the kernel
@ -222,8 +222,8 @@ def periodic_Matern32(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,
""" """
Construct a periodic Matern 3/2 kernel. Construct a periodic Matern 3/2 kernel.
:param D: dimensionality, only defined for D=1 :param input_dim: dimensionality, only defined for input_dim=1
:type D: int :type input_dim: int
:param variance: the variance of the kernel :param variance: the variance of the kernel
:type variance: float :type variance: float
:param lengthscale: the lengthscale of the kernel :param lengthscale: the lengthscale of the kernel
@ -240,8 +240,8 @@ def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,
""" """
Construct a periodic Matern 5/2 kernel. Construct a periodic Matern 5/2 kernel.
:param D: dimensionality, only defined for D=1 :param input_dim: dimensionality, only defined for input_dim=1
:type D: int :type input_dim: int
:param variance: the variance of the kernel :param variance: the variance of the kernel
:type variance: float :type variance: float
:param lengthscale: the lengthscale of the kernel :param lengthscale: the lengthscale of the kernel
@ -256,14 +256,14 @@ def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,
def prod(k1,k2,tensor=False): def prod(k1,k2,tensor=False):
""" """
Construct a product kernel over D from two kernels over D Construct a product kernel over input_dim from two kernels over input_dim
:param k1, k2: the kernels to multiply :param k1, k2: the kernels to multiply
:type k1, k2: kernpart :type k1, k2: kernpart
:rtype: kernel object :rtype: kernel object
""" """
part = prodpart(k1,k2,tensor) part = prodpart(k1,k2,tensor)
return kern(part.D, [part]) return kern(part.input_dim, [part])
def symmetric(k): def symmetric(k):
""" """
@ -273,7 +273,7 @@ def symmetric(k):
k_.parts = [symmetric_part(p) for p in k.parts] k_.parts = [symmetric_part(p) for p in k.parts]
return k_ return k_
def coregionalise(Nout,R=1, W=None, kappa=None): def Coregionalise(Nout,R=1, W=None, kappa=None):
p = coregionalise_part(Nout,R,W,kappa) p = coregionalise_part(Nout,R,W,kappa)
return kern(1,[p]) return kern(1,[p])
@ -282,8 +282,8 @@ def rational_quadratic(D,variance=1., lengthscale=1., power=1.):
""" """
Construct rational quadratic kernel. Construct rational quadratic kernel.
:param D: the number of input dimensions :param input_dim: the number of input dimensions
:type D: int (D=1 is the only value currently supported) :type input_dim: int (input_dim=1 is the only value currently supported)
:param variance: the variance :math:`\sigma^2` :param variance: the variance :math:`\sigma^2`
:type variance: float :type variance: float
:param lengthscale: the lengthscale :math:`\ell` :param lengthscale: the lengthscale :math:`\ell`
@ -300,7 +300,7 @@ def fixed(D, K, variance=1.):
Arguments Arguments
--------- ---------
D (int), obligatory input_dim (int), obligatory
K (np.array), obligatory K (np.array), obligatory
variance (float) variance (float)
""" """
@ -321,6 +321,6 @@ def independent_outputs(k):
for sl in k.input_slices: for sl in k.input_slices:
assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)" assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
parts = [independent_output_part(p) for p in k.parts] parts = [independent_output_part(p) for p in k.parts]
return kern(k.D+1,parts) return kern(k.input_dim+1,parts)

View file

@ -7,12 +7,12 @@ from GPy.util.linalg import mdot, pdinv
import pdb import pdb
from scipy import weave from scipy import weave
class coregionalise(kernpart): class Coregionalise(kernpart):
""" """
Kernel for Intrinsic Corregionalization Models Kernel for Intrinsic Corregionalization Models
""" """
def __init__(self,Nout,R=1, W=None, kappa=None): def __init__(self,Nout,R=1, W=None, kappa=None):
self.D = 1 self.input_dim = 1
self.name = 'coregion' self.name = 'coregion'
self.Nout = Nout self.Nout = Nout
self.R = R self.R = R

View file

@ -11,14 +11,14 @@ from prod import prod
from ..util.linalg import symmetrify from ..util.linalg import symmetrify
class kern(parameterised): class kern(parameterised):
def __init__(self, D, parts=[], input_slices=None): def __init__(self, input_dim, parts=[], input_slices=None):
""" """
This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where. This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where.
The technical code for kernels is divided into _parts_ (see e.g. rbf.py). This obnject contains a list of parts, which are computed additively. For multiplication, special _prod_ parts are used. The technical code for kernels is divided into _parts_ (see e.g. rbf.py). This obnject contains a list of parts, which are computed additively. For multiplication, special _prod_ parts are used.
:param D: The dimensioality of the kernel's input space :param input_dim: The dimensioality of the kernel's input space
:type D: int :type input_dim: int
:param parts: the 'parts' (PD functions) of the kernel :param parts: the 'parts' (PD functions) of the kernel
:type parts: list of kernpart objects :type parts: list of kernpart objects
:param input_slices: the slices on the inputs which apply to each kernel :param input_slices: the slices on the inputs which apply to each kernel
@ -29,7 +29,7 @@ class kern(parameterised):
self.Nparts = len(parts) self.Nparts = len(parts)
self.Nparam = sum([p.Nparam for p in self.parts]) self.Nparam = sum([p.Nparam for p in self.parts])
self.D = D self.input_dim = input_dim
# deal with input_slices # deal with input_slices
if input_slices is None: if input_slices is None:
@ -96,10 +96,10 @@ class kern(parameterised):
:type other: GPy.kern :type other: GPy.kern
""" """
if tensor: if tensor:
D = self.D + other.D D = self.input_dim + other.input_dim
self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices] self_input_slices = [slice(*sl.indices(self.input_dim)) for sl in self.input_slices]
other_input_indices = [sl.indices(other.D) for sl in other.input_slices] other_input_indices = [sl.indices(other.input_dim) for sl in other.input_slices]
other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices] other_input_slices = [slice(i[0] + self.input_dim, i[1] + self.input_dim, i[2]) for i in other_input_indices]
newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices) newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
@ -111,8 +111,8 @@ class kern(parameterised):
newkern.constraints = self.constraints + other.constraints newkern.constraints = self.constraints + other.constraints
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices] newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
else: else:
assert self.D == other.D assert self.input_dim == other.input_dim
newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices) newkern = kern(self.input_dim, self.parts + other.parts, self.input_slices + other.input_slices)
# transfer constraints: # transfer constraints:
newkern.constrained_indices = self.constrained_indices + [i + self.Nparam for i in other.constrained_indices] newkern.constrained_indices = self.constrained_indices + [i + self.Nparam for i in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints newkern.constraints = self.constraints + other.constraints
@ -138,16 +138,16 @@ class kern(parameterised):
slices = [] slices = []
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices): for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
s1, s2 = [False] * K1.D, [False] * K2.D s1, s2 = [False] * K1.input_dim, [False] * K2.input_dim
s1[sl1], s2[sl2] = [True], [True] s1[sl1], s2[sl2] = [True], [True]
slices += [s1 + s2] slices += [s1 + s2]
newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)] newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)]
if tensor: if tensor:
newkern = kern(K1.D + K2.D, newkernparts, slices) newkern = kern(K1.input_dim + K2.input_dim, newkernparts, slices)
else: else:
newkern = kern(K1.D, newkernparts, slices) newkern = kern(K1.input_dim, newkernparts, slices)
newkern._follow_constrains(K1, K2) newkern._follow_constrains(K1, K2)
return newkern return newkern
@ -211,7 +211,7 @@ class kern(parameterised):
def K(self, X, X2=None, which_parts='all'): def K(self, X, X2=None, which_parts='all'):
if which_parts == 'all': if which_parts == 'all':
which_parts = [True] * self.Nparts which_parts = [True] * self.Nparts
assert X.shape[1] == self.D assert X.shape[1] == self.input_dim
if X2 is None: if X2 is None:
target = np.zeros((X.shape[0], X.shape[0])) target = np.zeros((X.shape[0], X.shape[0]))
[p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used] [p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
@ -225,11 +225,11 @@ class kern(parameterised):
:param dL_dK: An array of dL_dK derivaties, dL_dK :param dL_dK: An array of dL_dK derivaties, dL_dK
:type dL_dK: Np.ndarray (N x M) :type dL_dK: Np.ndarray (N x M)
:param X: Observed data inputs :param X: Observed data inputs
:type X: np.ndarray (N x D) :type X: np.ndarray (N x input_dim)
:param X2: Observed dara inputs (optional, defaults to X) :param X2: Observed dara inputs (optional, defaults to X)
:type X2: np.ndarray (M x D) :type X2: np.ndarray (M x input_dim)
""" """
assert X.shape[1] == self.D assert X.shape[1] == self.input_dim
target = np.zeros(self.Nparam) target = np.zeros(self.Nparam)
if X2 is None: if X2 is None:
[p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)] [p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)]
@ -251,20 +251,20 @@ class kern(parameterised):
def Kdiag(self, X, which_parts='all'): def Kdiag(self, X, which_parts='all'):
if which_parts == 'all': if which_parts == 'all':
which_parts = [True] * self.Nparts which_parts = [True] * self.Nparts
assert X.shape[1] == self.D assert X.shape[1] == self.input_dim
target = np.zeros(X.shape[0]) target = np.zeros(X.shape[0])
[p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self.parts, self.input_slices, which_parts) if part_on] [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self.parts, self.input_slices, which_parts) if part_on]
return target return target
def dKdiag_dtheta(self, dL_dKdiag, X): def dKdiag_dtheta(self, dL_dKdiag, X):
assert X.shape[1] == self.D assert X.shape[1] == self.input_dim
assert dL_dKdiag.size == X.shape[0] assert dL_dKdiag.size == X.shape[0]
target = np.zeros(self.Nparam) target = np.zeros(self.Nparam)
[p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)] [p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)]
return self._transform_gradients(target) return self._transform_gradients(target)
def dKdiag_dX(self, dL_dKdiag, X): def dKdiag_dX(self, dL_dKdiag, X):
assert X.shape[1] == self.D assert X.shape[1] == self.input_dim
target = np.zeros_like(X) target = np.zeros_like(X)
[p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] [p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
return target return target
@ -386,7 +386,7 @@ class kern(parameterised):
def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs): def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs):
if which_parts == 'all': if which_parts == 'all':
which_parts = [True] * self.Nparts which_parts = [True] * self.Nparts
if self.D == 1: if self.input_dim == 1:
if x is None: if x is None:
x = np.zeros((1, 1)) x = np.zeros((1, 1))
else: else:
@ -408,7 +408,7 @@ class kern(parameterised):
pb.xlabel("x") pb.xlabel("x")
pb.ylabel("k(x,%0.1f)" % x) pb.ylabel("k(x,%0.1f)" % x)
elif self.D == 2: elif self.input_dim == 2:
if x is None: if x is None:
x = np.zeros((1, 2)) x = np.zeros((1, 2))
else: else:

View file

@ -3,16 +3,16 @@
class kernpart(object): class kernpart(object):
def __init__(self,D): def __init__(self,input_dim):
""" """
The base class for a kernpart: a positive definite function which forms part of a kernel The base class for a kernpart: a positive definite function which forms part of a kernel
:param D: the number of input dimensions to the function :param input_dim: the number of input dimensions to the function
:type D: int :type input_dim: int
Do not instantiate. Do not instantiate.
""" """
self.D = D self.input_dim = input_dim
self.Nparam = 1 self.Nparam = 1
self.name = 'unnamed' self.name = 'unnamed'

View file

@ -13,10 +13,10 @@ class linear(kernpart):
.. math:: .. math::
k(x,y) = \sum_{i=1}^D \sigma^2_i x_iy_i k(x,y) = \sum_{i=1}^input_dim \sigma^2_i x_iy_i
:param D: the number of input dimensions :param input_dim: the number of input dimensions
:type D: int :type input_dim: int
:param variances: the vector of variances :math:`\sigma^2_i` :param variances: the vector of variances :math:`\sigma^2_i`
:type variances: array or list of the appropriate size (or float if there is only one variance parameter) :type variances: array or list of the appropriate size (or float if there is only one variance parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension. :param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension.
@ -24,8 +24,8 @@ class linear(kernpart):
:rtype: kernel object :rtype: kernel object
""" """
def __init__(self, D, variances=None, ARD=False): def __init__(self, input_dim, variances=None, ARD=False):
self.D = D self.input_dim = input_dim
self.ARD = ARD self.ARD = ARD
if ARD == False: if ARD == False:
self.Nparam = 1 self.Nparam = 1
@ -37,13 +37,13 @@ class linear(kernpart):
variances = np.ones(1) variances = np.ones(1)
self._Xcache, self._X2cache = np.empty(shape=(2,)) self._Xcache, self._X2cache = np.empty(shape=(2,))
else: else:
self.Nparam = self.D self.Nparam = self.input_dim
self.name = 'linear' self.name = 'linear'
if variances is not None: if variances is not None:
variances = np.asarray(variances) variances = np.asarray(variances)
assert variances.size == self.D, "bad number of lengthscales" assert variances.size == self.input_dim, "bad number of lengthscales"
else: else:
variances = np.ones(self.D) variances = np.ones(self.input_dim)
self._set_params(variances.flatten()) self._set_params(variances.flatten())
# initialize cache # initialize cache
@ -82,7 +82,7 @@ class linear(kernpart):
def dK_dtheta(self, dL_dK, X, X2, target): def dK_dtheta(self, dL_dK, X, X2, target):
if self.ARD: if self.ARD:
if X2 is None: if X2 is None:
[np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.D)] [np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.input_dim)]
else: else:
product = X[:, None, :] * X2[None, :, :] product = X[:, None, :] * X2[None, :, :]
target += (dL_dK[:, :, None] * product).sum(0).sum(0) target += (dL_dK[:, :, None] * product).sum(0).sum(0)
@ -153,7 +153,7 @@ class linear(kernpart):
# psi2_real[n, m, m_prime] = np.dot(tmp, ( # psi2_real[n, m, m_prime] = np.dot(tmp, (
# self._Z[m_prime:m_prime + 1] * self.variances).T) # self._Z[m_prime:m_prime + 1] * self.variances).T)
# mu2_S = (self._mu[:, None, :] * self._mu[:, :, None]) # mu2_S = (self._mu[:, None, :] * self._mu[:, :, None])
# mu2_S[:, np.arange(self.D), np.arange(self.D)] += self._S # mu2_S[:, np.arange(self.input_dim), np.arange(self.input_dim)] += self._S
# psi2 = (self.ZA[None, :, None, :] * mu2_S[:, None]).sum(-1) # psi2 = (self.ZA[None, :, None, :] * mu2_S[:, None]).sum(-1)
# psi2 = (psi2[:, :, None] * self.ZA[None, None]).sum(-1) # psi2 = (psi2[:, :, None] * self.ZA[None, None]).sum(-1)
# psi2_tensor = np.tensordot(self.ZZ[None, :, :, :] * np.square(self.variances), self.mu2_S[:, None, None, :], ((3), (3))).squeeze().T # psi2_tensor = np.tensordot(self.ZZ[None, :, :, :] * np.square(self.variances), self.mu2_S[:, None, None, :], ((3), (3))).squeeze().T

View file

@ -22,14 +22,14 @@ class prod(kernpart):
self.k1 = k1 self.k1 = k1
self.k2 = k2 self.k2 = k2
if tensor: if tensor:
self.D = k1.D + k2.D self.input_dim = k1.input_dim + k2.input_dim
self.slice1 = slice(0,self.k1.D) self.slice1 = slice(0,self.k1.input_dim)
self.slice2 = slice(self.k1.D,self.k1.D+self.k2.D) self.slice2 = slice(self.k1.input_dim,self.k1.input_dim+self.k2.input_dim)
else: else:
assert k1.D == k2.D, "Error: The input spaces of the kernels to sum don't have the same dimension." assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to sum don't have the same dimension."
self.D = k1.D self.input_dim = k1.input_dim
self.slice1 = slice(0,self.D) self.slice1 = slice(0,self.input_dim)
self.slice2 = slice(0,self.D) self.slice2 = slice(0,self.input_dim)
self._X, self._X2, self._params = np.empty(shape=(3,1)) self._X, self._X2, self._params = np.empty(shape=(3,1))
self._set_params(np.hstack((k1._get_params(),k2._get_params()))) self._set_params(np.hstack((k1._get_params(),k2._get_params())))

View file

@ -18,8 +18,8 @@ class rbf(kernpart):
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input. where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
:param D: the number of input dimensions :param input_dim: the number of input dimensions
:type D: int :type input_dim: int
:param variance: the variance of the kernel :param variance: the variance of the kernel
:type variance: float :type variance: float
:param lengthscale: the vector of lengthscale of the kernel :param lengthscale: the vector of lengthscale of the kernel
@ -31,8 +31,8 @@ class rbf(kernpart):
.. Note: this object implements both the ARD and 'spherical' version of the function .. Note: this object implements both the ARD and 'spherical' version of the function
""" """
def __init__(self,D,variance=1.,lengthscale=None,ARD=False): def __init__(self,input_dim,variance=1.,lengthscale=None,ARD=False):
self.D = D self.input_dim = input_dim
self.name = 'rbf' self.name = 'rbf'
self.ARD = ARD self.ARD = ARD
if not ARD: if not ARD:
@ -43,12 +43,12 @@ class rbf(kernpart):
else: else:
lengthscale = np.ones(1) lengthscale = np.ones(1)
else: else:
self.Nparam = self.D + 1 self.Nparam = self.input_dim + 1
if lengthscale is not None: if lengthscale is not None:
lengthscale = np.asarray(lengthscale) lengthscale = np.asarray(lengthscale)
assert lengthscale.size == self.D, "bad number of lengthscales" assert lengthscale.size == self.input_dim, "bad number of lengthscales"
else: else:
lengthscale = np.ones(self.D) lengthscale = np.ones(self.input_dim)
self._set_params(np.hstack((variance,lengthscale.flatten()))) self._set_params(np.hstack((variance,lengthscale.flatten())))
@ -100,7 +100,7 @@ class rbf(kernpart):
code = """ code = """
int q,i,j; int q,i,j;
double tmp; double tmp;
for(q=0; q<D; q++){ for(q=0; q<input_dim; q++){
tmp = 0; tmp = 0;
for(i=0; i<N; i++){ for(i=0; i<N; i++){
for(j=0; j<i; j++){ for(j=0; j<i; j++){
@ -110,12 +110,12 @@ class rbf(kernpart):
target(q+1) += var_len3(q)*tmp; target(q+1) += var_len3(q)*tmp;
} }
""" """
N,M,D = X.shape[0], X.shape[0], self.D N, M, input_dim = X.shape[0], X.shape[0], self.input_dim
else: else:
code = """ code = """
int q,i,j; int q,i,j;
double tmp; double tmp;
for(q=0; q<D; q++){ for(q=0; q<input_dim; q++){
tmp = 0; tmp = 0;
for(i=0; i<N; i++){ for(i=0; i<N; i++){
for(j=0; j<M; j++){ for(j=0; j<M; j++){
@ -125,9 +125,9 @@ class rbf(kernpart):
target(q+1) += var_len3(q)*tmp; target(q+1) += var_len3(q)*tmp;
} }
""" """
N,M,D = X.shape[0], X2.shape[0], self.D N, M, input_dim = X.shape[0], X2.shape[0], self.input_dim
#[np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.D)] #[np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)]
weave.inline(code, arg_names=['N','M','D','X','X2','target','dvardLdK','var_len3'], weave.inline(code, arg_names=['N','M','input_dim','X','X2','target','dvardLdK','var_len3'],
type_converters=weave.converters.blitz,**self.weave_options) type_converters=weave.converters.blitz,**self.weave_options)
else: else:
target[1] += (self.variance/self.lengthscale)*np.sum(self._K_dvar*self._K_dist2*dL_dK) target[1] += (self.variance/self.lengthscale)*np.sum(self._K_dvar*self._K_dist2*dL_dK)
@ -278,8 +278,8 @@ class rbf(kernpart):
psi2 = np.empty((N,M,M)) psi2 = np.empty((N,M,M))
psi2_Zdist_sq = self._psi2_Zdist_sq psi2_Zdist_sq = self._psi2_Zdist_sq
_psi2_denom = self._psi2_denom.squeeze().reshape(N,self.D) _psi2_denom = self._psi2_denom.squeeze().reshape(N,self.input_dim)
half_log_psi2_denom = 0.5*np.log(self._psi2_denom).squeeze().reshape(N,self.D) half_log_psi2_denom = 0.5*np.log(self._psi2_denom).squeeze().reshape(N,self.input_dim)
variance_sq = float(np.square(self.variance)) variance_sq = float(np.square(self.variance))
if self.ARD: if self.ARD:
lengthscale2 = self.lengthscale2 lengthscale2 = self.lengthscale2

View file

@ -8,13 +8,13 @@ class white(kernpart):
""" """
White noise kernel. White noise kernel.
:param D: the number of input dimensions :param input_dim: the number of input dimensions
:type D: int :type input_dim: int
:param variance: :param variance:
:type variance: float :type variance: float
""" """
def __init__(self,D,variance=1.): def __init__(self,input_dim,variance=1.):
self.D = D self.input_dim = input_dim
self.Nparam = 1 self.Nparam = 1
self.name = 'white' self.name = 'white'
self._set_params(np.array([variance]).flatten()) self._set_params(np.array([variance]).flatten())

View file

@ -1,4 +1,4 @@
from EP import EP from ep import EP
from Gaussian import Gaussian from gaussian import Gaussian
# TODO: from Laplace import Laplace # TODO: from Laplace import Laplace
import likelihood_functions as functions import likelihood_functions as functions

View file

@ -4,23 +4,23 @@ from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot
from likelihood import likelihood from likelihood import likelihood
class EP(likelihood): class EP(likelihood):
def __init__(self,data,likelihood_function,epsilon=1e-3,power_ep=[1.,1.]): def __init__(self,data,LikelihoodFunction,epsilon=1e-3,power_ep=[1.,1.]):
""" """
Expectation Propagation Expectation Propagation
Arguments Arguments
--------- ---------
epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
likelihood_function : a likelihood function (see likelihood_functions.py) LikelihoodFunction : a likelihood function (see likelihood_functions.py)
""" """
self.likelihood_function = likelihood_function self.LikelihoodFunction = LikelihoodFunction
self.epsilon = epsilon self.epsilon = epsilon
self.eta, self.delta = power_ep self.eta, self.delta = power_ep
self.data = data self.data = data
self.N, self.D = self.data.shape self.N, self.input_dim = self.data.shape
self.is_heteroscedastic = True self.is_heteroscedastic = True
self.Nparams = 0 self.Nparams = 0
self._transf_data = self.likelihood_function._preprocess_values(data) self._transf_data = self.LikelihoodFunction._preprocess_values(data)
#Initial values - Likelihood approximation parameters: #Initial values - Likelihood approximation parameters:
#p(y|f) = t(f|tau_tilde,v_tilde) #p(y|f) = t(f|tau_tilde,v_tilde)
@ -48,7 +48,7 @@ class EP(likelihood):
def predictive_values(self,mu,var,full_cov): def predictive_values(self,mu,var,full_cov):
if full_cov: if full_cov:
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood" raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
return self.likelihood_function.predictive_values(mu,var) return self.LikelihoodFunction.predictive_values(mu,var)
def _get_params(self): def _get_params(self):
return np.zeros(0) return np.zeros(0)
@ -110,7 +110,7 @@ class EP(likelihood):
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i] self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i] self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
#Marginal moments #Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update #Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
@ -200,7 +200,7 @@ class EP(likelihood):
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments #Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update #Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
@ -295,7 +295,7 @@ class EP(likelihood):
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments #Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update #Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])

View file

@ -15,7 +15,7 @@ class Gaussian(likelihood):
self.is_heteroscedastic = False self.is_heteroscedastic = False
self.Nparams = 1 self.Nparams = 1
self.Z = 0. # a correction factor which accounts for the approximation made self.Z = 0. # a correction factor which accounts for the approximation made
N, self.D = data.shape N, self.input_dim = data.shape
# normalization # normalization
if normalize: if normalize:
@ -24,8 +24,8 @@ class Gaussian(likelihood):
# Don't scale outputs which have zero variance to zero. # Don't scale outputs which have zero variance to zero.
self._scale[np.nonzero(self._scale == 0.)] = 1.0e-3 self._scale[np.nonzero(self._scale == 0.)] = 1.0e-3
else: else:
self._offset = np.zeros((1, self.D)) self._offset = np.zeros((1, self.input_dim))
self._scale = np.ones((1, self.D)) self._scale = np.ones((1, self.input_dim))
self.set_data(data) self.set_data(data)
@ -35,7 +35,7 @@ class Gaussian(likelihood):
def set_data(self, data): def set_data(self, data):
self.data = data self.data = data
self.N, D = data.shape self.N, D = data.shape
assert D == self.D assert D == self.input_dim
self.Y = (self.data - self._offset) / self._scale self.Y = (self.data - self._offset) / self._scale
if D > self.N: if D > self.N:
self.YYT = np.dot(self.Y, self.Y.T) self.YYT = np.dot(self.Y, self.Y.T)
@ -52,7 +52,7 @@ class Gaussian(likelihood):
def _set_params(self, x): def _set_params(self, x):
x = np.float64(x) x = np.float64(x)
if self._variance != x: if np.all(self._variance != x):
if x == 0.: if x == 0.:
self.precision = None self.precision = None
self.V = None self.V = None
@ -68,9 +68,9 @@ class Gaussian(likelihood):
""" """
mean = mu * self._scale + self._offset mean = mu * self._scale + self._offset
if full_cov: if full_cov:
if self.D > 1: if self.input_dim > 1:
raise NotImplementedError, "TODO" raise NotImplementedError, "TODO"
# Note. for D>1, we need to re-normalise all the outputs independently. # Note. for input_dim>1, we need to re-normalise all the outputs independently.
# This will mess up computations of diag(true_var), below. # This will mess up computations of diag(true_var), below.
# note that the upper, lower quantiles should be the same shape as mean # note that the upper, lower quantiles should be the same shape as mean
# Augment the output variance with the likelihood variance and rescale. # Augment the output variance with the likelihood variance and rescale.

View file

@ -10,12 +10,12 @@ from ..util.plot import gpplot
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import link_functions import link_functions
class likelihood_function(object): class LikelihoodFunction(object):
""" """
Likelihood class for doing Expectation propagation Likelihood class for doing Expectation propagation
:param Y: observed output (Nx1 numpy.darray) :param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the likelihood_function used ..Note:: Y values allowed depend on the LikelihoodFunction used
""" """
def __init__(self,link): def __init__(self,link):
if link == self._analytical: if link == self._analytical:
@ -69,7 +69,7 @@ class likelihood_function(object):
sigma2_hat = m2 - mu_hat**2 # Second central moment sigma2_hat = m2 - mu_hat**2 # Second central moment
return float(Z_hat), float(mu_hat), float(sigma2_hat) return float(Z_hat), float(mu_hat), float(sigma2_hat)
class binomial(likelihood_function): class Binomial(LikelihoodFunction):
""" """
Probit likelihood Probit likelihood
Y is expected to take values in {-1,1} Y is expected to take values in {-1,1}
@ -82,7 +82,7 @@ class binomial(likelihood_function):
self._analytical = link_functions.probit self._analytical = link_functions.probit
if not link: if not link:
link = self._analytical link = self._analytical
super(binomial, self).__init__(link) super(Binomial, self).__init__(link)
def _distribution(self,gp,obs): def _distribution(self,gp,obs):
pass pass
@ -134,7 +134,7 @@ class binomial(likelihood_function):
p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var)) p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var))
return mean[:,None], np.nan*var, p_025[:,None], p_975[:,None] # TODO: var return mean[:,None], np.nan*var, p_025[:,None], p_975[:,None] # TODO: var
class Poisson(likelihood_function): class Poisson(LikelihoodFunction):
""" """
Poisson likelihood Poisson likelihood
Y is expected to take values in {0,1,2,...} Y is expected to take values in {0,1,2,...}

View file

@ -1,252 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, tdot, symmetrify,pdinv
from ..util.plot import gpplot
from .. import kern
from scipy import stats, linalg
from ..core import sparse_GP
def backsub_both_sides(L,X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1)
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T
class FITC(sparse_GP):
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
super(FITC, self).__init__(X, likelihood, kernel, normalize_X=normalize_X)
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
this function does nothing
Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in sparse_GP.
The true precison is now 'true_precision' not 'precision'.
"""
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP
def _computations(self):
#factor Kmm
self.Lm = jitchol(self.Kmm)
self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1)
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1).copy()
self.Diag0 = self.psi0 - np.diag(self.Qnn)
self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #Includes Diag0 in the precision
self.V_star = self.beta_star * self.likelihood.Y
# The rather complex computations of self.A
if self.has_uncertain_inputs:
raise NotImplementedError
else:
if self.likelihood.is_heteroscedastic:
assert self.likelihood.D == 1
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.N)))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
# factor B
self.B = np.eye(self.M) + self.A
self.LB = jitchol(self.B)
self.LBi = chol_inv(self.LB)
self.psi1V = np.dot(self.psi1, self.V_star)
Lmi_psi1V, info = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(Lmi_psi1V), lower=1, trans=0)
Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1)
b_psi1_Ki = self.beta_star * Kmmipsi1.T
Ki_pbp_Ki = np.dot(Kmmipsi1,b_psi1_Ki)
Kmmi = np.dot(self.Lmi.T,self.Lmi)
LBiLmi = np.dot(self.LBi,self.Lmi)
LBL_inv = np.dot(LBiLmi.T,LBiLmi)
VVT = np.outer(self.V_star,self.V_star)
VV_p_Ki = np.dot(VVT,Kmmipsi1.T)
Ki_pVVp_Ki = np.dot(Kmmipsi1,VV_p_Ki)
psi1beta = self.psi1*self.beta_star.T
H = self.Kmm + mdot(self.psi1,psi1beta.T)
LH = jitchol(H)
LHi = chol_inv(LH)
Hi = np.dot(LHi.T,LHi)
betapsi1TLmiLBi = np.dot(psi1beta.T,LBiLmi.T)
alpha = np.array([np.dot(a.T,a) for a in betapsi1TLmiLBi])[:,None]
gamma_1 = mdot(VVT,self.psi1.T,Hi)
pHip = mdot(self.psi1.T,Hi,self.psi1)
gamma_2 = mdot(self.beta_star*pHip,self.V_star)
gamma_3 = self.V_star * gamma_2
self._dL_dpsi0 = -0.5 * self.beta_star#dA_dpsi0: logdet(self.beta_star)
self._dL_dpsi0 += .5 * self.V_star**2 #dA_psi0: yT*beta_star*y
self._dL_dpsi0 += .5 *alpha #dC_dpsi0
self._dL_dpsi0 += 0.5*mdot(self.beta_star*pHip,self.V_star)**2 - self.V_star * mdot(self.V_star.T,pHip*self.beta_star).T #dD_dpsi0
self._dL_dpsi1 = b_psi1_Ki.copy() #dA_dpsi1: logdet(self.beta_star)
self._dL_dpsi1 += -np.dot(psi1beta.T,LBL_inv) #dC_dpsi1
self._dL_dpsi1 += gamma_1 - mdot(psi1beta.T,Hi,self.psi1,gamma_1) #dD_dpsi1
self._dL_dKmm = -0.5 * np.dot(Kmmipsi1,b_psi1_Ki) #dA_dKmm: logdet(self.beta_star)
self._dL_dKmm += .5*(LBL_inv - Kmmi) + mdot(LBL_inv,psi1beta,Kmmipsi1.T) #dC_dKmm
self._dL_dKmm += -.5 * mdot(Hi,self.psi1,gamma_1) #dD_dKmm
self._dpsi1_dtheta = 0
self._dpsi1_dX = 0
self._dKmm_dtheta = 0
self._dKmm_dX = 0
self._dpsi1_dX_jkj = 0
self._dpsi1_dtheta_jkj = 0
for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.N),self.V_star,alpha,gamma_2,gamma_3):
K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T)
#Diag_dpsi1 = Diag_dA_dpsi1: yT*beta_star*y + Diag_dC_dpsi1 +Diag_dD_dpsi1
_dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:]
#Diag_dKmm = Diag_dA_dKmm: yT*beta_star*y +Diag_dC_dKmm +Diag_dD_dKmm
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z)
self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z)
self._dKmm_dX += 2.*self.kern.dK_dX(_dKmm ,self.Z)
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:])
# the partial derivative vector for the likelihood
if self.likelihood.Nparams == 0:
# save computation here.
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented"
else:
# likelihood is not heterscedatic
dbstar_dnoise = self.likelihood.precision * (self.beta_star**2 * self.Diag0[:,None] - self.beta_star)
Lmi_psi1 = mdot(self.Lmi,self.psi1)
LBiLmipsi1 = np.dot(self.LBi,Lmi_psi1)
aux_0 = np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_2 = np.dot(LBiLmipsi1.T,self._LBi_Lmi_psi1V)
dA_dnoise = 0.5 * self.D * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.D * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T)
alpha = mdot(LBiLmipsi1,self.V_star)
alpha_ = mdot(LBiLmipsi1.T,alpha)
dD_dnoise_2 = -0.5 * self.D * np.sum(alpha_**2 * dbstar_dnoise )
dD_dnoise_1 = mdot(self.V_star.T,self.psi1.T,self.Lmi.T,self.LBi.T,self.LBi,self.Lmi,self.psi1,dbstar_dnoise*self.likelihood.Y)
dD_dnoise_2 = 0.5*mdot(self.V_star.T,self.psi1.T,Hi,self.psi1,dbstar_dnoise*self.psi1.T,Hi,self.psi1,self.V_star)
dD_dnoise = dD_dnoise_1 + dD_dnoise_2
self.partial_for_likelihood = dA_dnoise + dC_dnoise + dD_dnoise
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
C = -self.D * (np.sum(np.log(np.diag(self.LB))))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + C + D
def _log_likelihood_gradients(self):
pass
return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
def dL_dtheta(self):
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0,self.X)
dL_dtheta += self.kern.dK_dtheta(self._dL_dpsi1,self.X,self.Z)
dL_dtheta += self.kern.dK_dtheta(self._dL_dKmm,X=self.Z)
dL_dtheta += self._dKmm_dtheta
dL_dtheta += self._dpsi1_dtheta
return dL_dtheta
def dL_dZ(self):
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X)
dL_dZ += 2. * self.kern.dK_dX(self._dL_dKmm,X=self.Z)
dL_dZ += self._dpsi1_dX
dL_dZ += self._dKmm_dX
return dL_dZ
def _raw_predict(self, Xnew, which_parts, full_cov=False):
if self.likelihood.is_heteroscedastic:
Iplus_Dprod_i = 1./(1.+ self.Diag0 * self.likelihood.precision.flatten())
self.Diag = self.Diag0 * Iplus_Dprod_i
self.P = Iplus_Dprod_i[:,None] * self.psi1.T
self.RPT0 = np.dot(self.Lmi,self.psi1)
self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T))
self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
self.w = self.Diag * self.likelihood.v_tilde
self.Gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde))
self.mu = self.w + np.dot(self.P,self.Gamma)
"""
Make a prediction for the generalized FITC model
Arguments
---------
X : Input prediction data - Nx1 numpy array (floats)
"""
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
# Ci = I + (RPT0)Di(RPT0).T
# C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T
# = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T
# = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
# = I - V.T * V
U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1)
C = np.eye(self.M) - np.dot(V.T,V)
mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:])
#self.C = C
#self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
#self.mu_u = mu_u
#self.U = U
# q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T)
mu_H = np.dot(mu_u,self.mu)
self.mu_H = mu_H
Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
KR0T = np.dot(Kx.T,self.Lmi.T)
mu_star = np.dot(KR0T,mu_H)
if full_cov:
Kxx = self.kern.K(Xnew,which_parts=which_parts)
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
else:
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None]
return mu_star[:,None],var
else:
raise NotImplementedError, "homoscedastic fitc not implemented"
"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu,var[:,None]
"""

View file

@ -1,14 +1,11 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from gp_regression import GPRegression
from GP_regression import GP_regression from sparse_gp_regression import SparseGPRegression
from GP_classification import GP_classification from sparse_gp_classification import SparseGPClassification
from sparse_GP_regression import sparse_GP_regression from fitc_classification import FITCClassification
from sparse_GP_classification import sparse_GP_classification from gplvm import GPLVM
from FITC_classification import FITC_classification from warped_gp import WarpedGP
from GPLVM import GPLVM from bayesian_gplvm import BayesianGPLVM
from warped_GP import warpedGP
from sparse_GPLVM import sparse_GPLVM
from Bayesian_GPLVM import Bayesian_GPLVM
from mrd import MRD from mrd import MRD

View file

@ -2,21 +2,16 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
import pylab as pb from ..core import SparseGP
import sys, pdb
from GPLVM import GPLVM
from ..core import sparse_GP
from GPy.util.linalg import pdinv
from ..likelihoods import Gaussian from ..likelihoods import Gaussian
from .. import kern from .. import kern
from numpy.linalg.linalg import LinAlgError
import itertools import itertools
from matplotlib.colors import colorConverter from matplotlib.colors import colorConverter
from matplotlib.figure import SubplotParams
from GPy.inference.optimization import SCG from GPy.inference.optimization import SCG
from GPy.util import plot_latent from GPy.util import plot_latent
from GPy.models.gplvm import GPLVM
class Bayesian_GPLVM(sparse_GP, GPLVM): class BayesianGPLVM(SparseGP, GPLVM):
""" """
Bayesian Gaussian Process Latent Variable Model Bayesian Gaussian Process Latent Variable Model
@ -64,7 +59,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
self._savedpsiKmm = [] self._savedpsiKmm = []
self._savedABCD = [] self._savedABCD = []
sparse_GP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs) SparseGP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs)
self._set_params(self._get_params()) self._set_params(self._get_params())
@property @property
@ -80,19 +75,19 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
def _get_param_names(self): def _get_param_names(self):
X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], []) X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], [])
S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], []) S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], [])
return (X_names + S_names + sparse_GP._get_param_names(self)) return (X_names + S_names + SparseGP._get_param_names(self))
def _get_params(self): def _get_params(self):
""" """
Horizontally stacks the parameters in order to present them to the optimizer. Horizontally stacks the parameters in order to present them to the optimizer.
The resulting 1-D array has this structure: The resulting 1-input_dim array has this structure:
=============================================================== ===============================================================
| mu | S | Z | theta | beta | | mu | S | Z | theta | beta |
=============================================================== ===============================================================
""" """
x = np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self))) x = np.hstack((self.X.flatten(), self.X_variance.flatten(), SparseGP._get_params(self)))
return x return x
def _clipped(self, x): def _clipped(self, x):
@ -104,7 +99,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
N, input_dim = self.N, self.input_dim N, input_dim = self.N, self.input_dim
self.X = x[:self.X.size].reshape(N, input_dim).copy() self.X = x[:self.X.size].reshape(N, input_dim).copy()
self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy() self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy()
sparse_GP._set_params(self, x[(2 * N * input_dim):]) SparseGP._set_params(self, x[(2 * N * input_dim):])
# self.oldps = x # self.oldps = x
# except (LinAlgError, FloatingPointError, ZeroDivisionError): # except (LinAlgError, FloatingPointError, ZeroDivisionError):
# print "\rWARNING: Caught LinAlgError, continueing without setting " # print "\rWARNING: Caught LinAlgError, continueing without setting "
@ -134,7 +129,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.N return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.N
def log_likelihood(self): def log_likelihood(self):
ll = sparse_GP.log_likelihood(self) ll = SparseGP.log_likelihood(self)
kl = self.KL_divergence() kl = self.KL_divergence()
# if ll < -2E4: # if ll < -2E4:
@ -151,14 +146,14 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]]) self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]])
# sf2 = self.scale_factor ** 2 # sf2 = self.scale_factor ** 2
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y) A = -0.5 * self.N * self.input_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y)
# B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2) # B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A)) B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
else: else:
A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT A = -0.5 * self.N * self.input_dim * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
# B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2) # B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A)) B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2)) C = -self.input_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
self._savedABCD.append([self.f_call, A, B, C, D]) self._savedABCD.append([self.f_call, A, B, C, D])
@ -181,7 +176,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
# d_dS = (dL_dS).flatten() # d_dS = (dL_dS).flatten()
# ======================== # ========================
self.dbound_dmuS = np.hstack((d_dmu, d_dS)) self.dbound_dmuS = np.hstack((d_dmu, d_dS))
self.dbound_dZtheta = sparse_GP._log_likelihood_gradients(self) self.dbound_dZtheta = SparseGP._log_likelihood_gradients(self)
return self._clipped(np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))) return self._clipped(np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta)))
def plot_latent(self, *args, **kwargs): def plot_latent(self, *args, **kwargs):
@ -200,7 +195,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
means = np.zeros((N_test, input_dim)) means = np.zeros((N_test, input_dim))
covars = np.zeros((N_test, input_dim)) covars = np.zeros((N_test, input_dim))
dpsi0 = -0.5 * self.D * self.likelihood.precision dpsi0 = -0.5 * self.input_dim * self.likelihood.precision
dpsi2 = self.dL_dpsi2[0][None, :, :] # TODO: this may change if we ignore het. likelihoods dpsi2 = self.dL_dpsi2[0][None, :, :] # TODO: this may change if we ignore het. likelihoods
V = self.likelihood.precision * Y V = self.likelihood.precision * Y
dpsi1 = np.dot(self.Cpsi1V, V.T) dpsi1 = np.dot(self.Cpsi1V, V.T)
@ -263,7 +258,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
def __getstate__(self): def __getstate__(self):
return (self.likelihood, self.input_dim, self.X, self.X_variance, return (self.likelihood, self.input_dim, self.X, self.X_variance,
self.init, self.M, self.Z, self.kern, self.init, self.num_inducing, self.Z, self.kern,
self.oldpsave, self._debug) self.oldpsave, self._debug)
def __setstate__(self, state): def __setstate__(self, state):
@ -274,8 +269,8 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
X = x[start:end].reshape(self.N, self.input_dim) X = x[start:end].reshape(self.N, self.input_dim)
start, end = end, end + self.X_variance.size start, end = end, end + self.X_variance.size
X_v = x[start:end].reshape(self.N, self.input_dim) X_v = x[start:end].reshape(self.N, self.input_dim)
start, end = end, end + (self.M * self.input_dim) start, end = end, end + (self.num_inducing * self.input_dim)
Z = x[start:end].reshape(self.M, self.input_dim) Z = x[start:end].reshape(self.num_inducing, self.input_dim)
start, end = end, end + self.input_dim start, end = end, end + self.input_dim
theta = x[start:] theta = x[start:]
return X, X_v, Z, theta return X, X_v, Z, theta
@ -353,12 +348,12 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6))) figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6)))
fig = figs[-1] fig = figs[-1]
ax8 = fig.add_subplot(121) ax8 = fig.add_subplot(121)
ax8.text(.5, .5, r"${\mathbf{A,B,C,D}}$", color='k', alpha=.5, transform=ax8.transAxes, ax8.text(.5, .5, r"${\mathbf{A,B,C,input_dim}}$", color='k', alpha=.5, transform=ax8.transAxes,
ha='center', va='center') ha='center', va='center')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 1], label='A') ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 1], label='A')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 2], label='B') ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 2], label='B')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 3], label='C') ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 3], label='C')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 4], label='D') ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 4], label='input_dim')
ax8.legend() ax8.legend()
figs[-1].canvas.draw() figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(.15, 0, 1, .86)) figs[-1].tight_layout(rect=(.15, 0, 1, .86))

252
GPy/models/fitc.py Normal file
View file

@ -0,0 +1,252 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, tdot, symmetrify, pdinv
from ..util.plot import gpplot
from .. import kern
from scipy import stats, linalg
from GPy.core.sparse_gp import SparseGP
def backsub_both_sides(L, X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1)
return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T
class FITC(SparseGP):
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
super(FITC, self).__init__(X, likelihood, kernel, normalize_X=normalize_X)
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
this function does nothing
Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in SparseGP.
The true precison is now 'true_precision' not 'precision'.
"""
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
self.likelihood.fit_FITC(self.Kmm, self.psi1, self.psi0)
self._set_params(self._get_params()) # update the GP
def _computations(self):
# factor Kmm
self.Lm = jitchol(self.Kmm)
self.Lmi, info = linalg.lapack.flapack.dtrtrs(self.Lm, np.eye(self.num_inducing), lower=1)
Lmipsi1 = np.dot(self.Lmi, self.psi1)
self.Qnn = np.dot(Lmipsi1.T, Lmipsi1).copy()
self.Diag0 = self.psi0 - np.diag(self.Qnn)
self.beta_star = self.likelihood.precision / (1. + self.likelihood.precision * self.Diag0[:, None]) # Includes Diag0 in the precision
self.V_star = self.beta_star * self.likelihood.Y
# The rather complex computations of self.A
if self.has_uncertain_inputs:
raise NotImplementedError
else:
if self.likelihood.is_heteroscedastic:
assert self.likelihood.input_dim == 1
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.N)))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
# factor B
self.B = np.eye(self.num_inducing) + self.A
self.LB = jitchol(self.B)
self.LBi = chol_inv(self.LB)
self.psi1V = np.dot(self.psi1, self.V_star)
Lmi_psi1V, info = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(Lmi_psi1V), lower=1, trans=0)
Kmmipsi1 = np.dot(self.Lmi.T, Lmipsi1)
b_psi1_Ki = self.beta_star * Kmmipsi1.T
Ki_pbp_Ki = np.dot(Kmmipsi1, b_psi1_Ki)
Kmmi = np.dot(self.Lmi.T, self.Lmi)
LBiLmi = np.dot(self.LBi, self.Lmi)
LBL_inv = np.dot(LBiLmi.T, LBiLmi)
VVT = np.outer(self.V_star, self.V_star)
VV_p_Ki = np.dot(VVT, Kmmipsi1.T)
Ki_pVVp_Ki = np.dot(Kmmipsi1, VV_p_Ki)
psi1beta = self.psi1 * self.beta_star.T
H = self.Kmm + mdot(self.psi1, psi1beta.T)
LH = jitchol(H)
LHi = chol_inv(LH)
Hi = np.dot(LHi.T, LHi)
betapsi1TLmiLBi = np.dot(psi1beta.T, LBiLmi.T)
alpha = np.array([np.dot(a.T, a) for a in betapsi1TLmiLBi])[:, None]
gamma_1 = mdot(VVT, self.psi1.T, Hi)
pHip = mdot(self.psi1.T, Hi, self.psi1)
gamma_2 = mdot(self.beta_star * pHip, self.V_star)
gamma_3 = self.V_star * gamma_2
self._dL_dpsi0 = -0.5 * self.beta_star # dA_dpsi0: logdet(self.beta_star)
self._dL_dpsi0 += .5 * self.V_star ** 2 # dA_psi0: yT*beta_star*y
self._dL_dpsi0 += .5 * alpha # dC_dpsi0
self._dL_dpsi0 += 0.5 * mdot(self.beta_star * pHip, self.V_star) ** 2 - self.V_star * mdot(self.V_star.T, pHip * self.beta_star).T # dD_dpsi0
self._dL_dpsi1 = b_psi1_Ki.copy() # dA_dpsi1: logdet(self.beta_star)
self._dL_dpsi1 += -np.dot(psi1beta.T, LBL_inv) # dC_dpsi1
self._dL_dpsi1 += gamma_1 - mdot(psi1beta.T, Hi, self.psi1, gamma_1) # dD_dpsi1
self._dL_dKmm = -0.5 * np.dot(Kmmipsi1, b_psi1_Ki) # dA_dKmm: logdet(self.beta_star)
self._dL_dKmm += .5 * (LBL_inv - Kmmi) + mdot(LBL_inv, psi1beta, Kmmipsi1.T) # dC_dKmm
self._dL_dKmm += -.5 * mdot(Hi, self.psi1, gamma_1) # dD_dKmm
self._dpsi1_dtheta = 0
self._dpsi1_dX = 0
self._dKmm_dtheta = 0
self._dKmm_dX = 0
self._dpsi1_dX_jkj = 0
self._dpsi1_dtheta_jkj = 0
for i, V_n, alpha_n, gamma_n, gamma_k in zip(range(self.N), self.V_star, alpha, gamma_2, gamma_3):
K_pp_K = np.dot(Kmmipsi1[:, i:(i + 1)], Kmmipsi1[:, i:(i + 1)].T)
# Diag_dpsi1 = Diag_dA_dpsi1: yT*beta_star*y + Diag_dC_dpsi1 +Diag_dD_dpsi1
_dpsi1 = (-V_n ** 2 - alpha_n + 2.*gamma_k - gamma_n ** 2) * Kmmipsi1.T[i:(i + 1), :]
# Diag_dKmm = Diag_dA_dKmm: yT*beta_star*y +Diag_dC_dKmm +Diag_dD_dKmm
_dKmm = .5 * (V_n ** 2 + alpha_n + gamma_n ** 2 - 2.*gamma_k) * K_pp_K # Diag_dD_dKmm
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1, self.X[i:i + 1, :], self.Z)
self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm, self.Z)
self._dKmm_dX += 2.*self.kern.dK_dX(_dKmm , self.Z)
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T, self.Z, self.X[i:i + 1, :])
# the partial derivative vector for the likelihood
if self.likelihood.Nparams == 0:
# save computation here.
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented"
else:
# likelihood is not heterscedatic
dbstar_dnoise = self.likelihood.precision * (self.beta_star ** 2 * self.Diag0[:, None] - self.beta_star)
Lmi_psi1 = mdot(self.Lmi, self.psi1)
LBiLmipsi1 = np.dot(self.LBi, Lmi_psi1)
aux_0 = np.dot(self._LBi_Lmi_psi1V.T, LBiLmipsi1)
aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T, LBiLmipsi1)
aux_2 = np.dot(LBiLmipsi1.T, self._LBi_Lmi_psi1V)
dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise / self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y ** 2 * dbstar_dnoise)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T, self.LBi, Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T, self.LBi, Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dD_dnoise_1 = mdot(self.V_star * LBiLmipsi1.T, LBiLmipsi1 * dbstar_dnoise.T * self.likelihood.Y.T)
alpha = mdot(LBiLmipsi1, self.V_star)
alpha_ = mdot(LBiLmipsi1.T, alpha)
dD_dnoise_2 = -0.5 * self.input_dim * np.sum(alpha_ ** 2 * dbstar_dnoise)
dD_dnoise_1 = mdot(self.V_star.T, self.psi1.T, self.Lmi.T, self.LBi.T, self.LBi, self.Lmi, self.psi1, dbstar_dnoise * self.likelihood.Y)
dD_dnoise_2 = 0.5 * mdot(self.V_star.T, self.psi1.T, Hi, self.psi1, dbstar_dnoise * self.psi1.T, Hi, self.psi1, self.V_star)
dD_dnoise = dD_dnoise_1 + dD_dnoise_2
self.partial_for_likelihood = dA_dnoise + dC_dnoise + dD_dnoise
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
A = -0.5 * self.N * self.input_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
C = -self.input_dim * (np.sum(np.log(np.diag(self.LB))))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + C + D
def _log_likelihood_gradients(self):
pass
return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
def dL_dtheta(self):
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0, self.X)
dL_dtheta += self.kern.dK_dtheta(self._dL_dpsi1, self.X, self.Z)
dL_dtheta += self.kern.dK_dtheta(self._dL_dKmm, X=self.Z)
dL_dtheta += self._dKmm_dtheta
dL_dtheta += self._dpsi1_dtheta
return dL_dtheta
def dL_dZ(self):
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T, self.Z, self.X)
dL_dZ += 2. * self.kern.dK_dX(self._dL_dKmm, X=self.Z)
dL_dZ += self._dpsi1_dX
dL_dZ += self._dKmm_dX
return dL_dZ
def _raw_predict(self, Xnew, which_parts, full_cov=False):
if self.likelihood.is_heteroscedastic:
Iplus_Dprod_i = 1. / (1. + self.Diag0 * self.likelihood.precision.flatten())
self.Diag = self.Diag0 * Iplus_Dprod_i
self.P = Iplus_Dprod_i[:, None] * self.psi1.T
self.RPT0 = np.dot(self.Lmi, self.psi1)
self.L = np.linalg.cholesky(np.eye(self.num_inducing) + np.dot(self.RPT0, ((1. - Iplus_Dprod_i) / self.Diag0)[:, None] * self.RPT0.T))
self.R, info = linalg.flapack.dtrtrs(self.L, self.Lmi, lower=1)
self.RPT = np.dot(self.R, self.P.T)
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T, self.RPT)
self.w = self.Diag * self.likelihood.v_tilde
self.Gamma = np.dot(self.R.T, np.dot(self.RPT, self.likelihood.v_tilde))
self.mu = self.w + np.dot(self.P, self.Gamma)
"""
Make a prediction for the generalized FITC model
Arguments
---------
X : Input prediction data - Nx1 numpy array (floats)
"""
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
# Ci = I + (RPT0)Di(RPT0).T
# C = I - [RPT0] * (input_dim+[RPT0].T*[RPT0])^-1*[RPT0].T
# = I - [RPT0] * (input_dim + self.Qnn)^-1 * [RPT0].T
# = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
# = I - V.T * V
U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
V, info = linalg.flapack.dtrtrs(U, self.RPT0.T, lower=1)
C = np.eye(self.num_inducing) - np.dot(V.T, V)
mu_u = np.dot(C, self.RPT0) * (1. / self.Diag0[None, :])
# self.C = C
# self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
# self.mu_u = mu_u
# self.U = U
# q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T)
mu_H = np.dot(mu_u, self.mu)
self.mu_H = mu_H
Sigma_H = C + np.dot(mu_u, np.dot(self.Sigma, mu_u.T))
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
KR0T = np.dot(Kx.T, self.Lmi.T)
mu_star = np.dot(KR0T, mu_H)
if full_cov:
Kxx = self.kern.K(Xnew, which_parts=which_parts)
var = Kxx + np.dot(KR0T, np.dot(Sigma_H - np.eye(self.num_inducing), KR0T.T))
else:
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
var = (Kxx + np.sum(KR0T.T * np.dot(Sigma_H - np.eye(self.num_inducing), KR0T.T), 0))[:, None]
return mu_star[:, None], var
else:
raise NotImplementedError, "homoscedastic fitc not implemented"
"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu,var[:,None]
"""

View file

@ -7,7 +7,11 @@ from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
from ..util.plot import gpplot from ..util.plot import gpplot
from .. import kern from .. import kern
from scipy import stats, linalg from scipy import stats, linalg
<<<<<<< HEAD:GPy/models/generalized_FITC.py
from sparse_GP import sparse_GP from sparse_GP import sparse_GP
=======
from ..core import SparseGP
>>>>>>> 7040b26f41f382edfdca3d3f7b689b9bbfc1a54f:GPy/models/generalized_fitc.py
def backsub_both_sides(L,X): def backsub_both_sides(L,X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky""" """ Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
@ -15,7 +19,7 @@ def backsub_both_sides(L,X):
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T
class generalized_FITC(sparse_GP): class GeneralizedFITC(SparseGP):
""" """
Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC. Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC.
@ -28,9 +32,9 @@ class generalized_FITC(sparse_GP):
:param X_variance: The variance in the measurements of X (Gaussian variance) :param X_variance: The variance in the measurements of X (Gaussian variance)
:type X_variance: np.ndarray (N x input_dim) | None :type X_variance: np.ndarray (N x input_dim) | None
:param Z: inducing inputs (optional, see note) :param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x input_dim) | None :type Z: np.ndarray (num_inducing x input_dim) | None
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None) :param num_inducing : Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int :type num_inducing: int
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales) :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool :type normalize_(X|Y): bool
""" """
@ -38,13 +42,18 @@ class generalized_FITC(sparse_GP):
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False): def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
self.Z = Z self.Z = Z
self.M = self.Z.shape[0] self.num_inducing = self.Z.shape[0]
self.true_precision = likelihood.precision self.true_precision = likelihood.precision
<<<<<<< HEAD:GPy/models/generalized_FITC.py
sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False) sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False)
=======
super(GeneralizedFITC, self).__init__(X, likelihood, kernel=kernel, Z=self.Z, X_variance=X_variance, normalize_X=normalize_X)
self._set_params(self._get_params())
>>>>>>> 7040b26f41f382edfdca3d3f7b689b9bbfc1a54f:GPy/models/generalized_fitc.py
def _set_params(self, p): def _set_params(self, p):
self.Z = p[:self.M*self.input_dim].reshape(self.M, self.input_dim) self.Z = p[:self.num_inducing*self.input_dim].reshape(self.num_inducing, self.input_dim)
self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam]) self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:]) self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
self._compute_kernel_matrices() self._compute_kernel_matrices()
@ -58,7 +67,7 @@ class generalized_FITC(sparse_GP):
For a Gaussian (or direct: TODO) likelihood, no iteration is required: For a Gaussian (or direct: TODO) likelihood, no iteration is required:
this function does nothing this function does nothing
Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in sparse_GP. Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in SparseGP.
The true precison is now 'true_precision' not 'precision'. The true precison is now 'true_precision' not 'precision'.
""" """
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
@ -75,14 +84,14 @@ class generalized_FITC(sparse_GP):
but adds a diagonal term to the covariance matrix: diag(Knn - Qnn). but adds a diagonal term to the covariance matrix: diag(Knn - Qnn).
This function: This function:
- computes the FITC diagonal term - computes the FITC diagonal term
- removes the extra terms computed in the sparse_GP approximation - removes the extra terms computed in the SparseGP approximation
- computes the likelihood gradients wrt the true precision. - computes the likelihood gradients wrt the true precision.
""" """
#NOTE the true precison is now 'true_precision' not 'precision' #NOTE the true precison is now 'true_precision' not 'precision'
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
# Compute generalized FITC's diagonal term of the covariance # Compute generalized FITC's diagonal term of the covariance
self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1) self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.num_inducing),lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1) Lmipsi1 = np.dot(self.Lmi,self.psi1)
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1) self.Qnn = np.dot(Lmipsi1.T,Lmipsi1)
#self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm) #self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm)
@ -94,7 +103,7 @@ class generalized_FITC(sparse_GP):
self.P = Iplus_Dprod_i[:,None] * self.psi1.T self.P = Iplus_Dprod_i[:,None] * self.psi1.T
self.RPT0 = np.dot(self.Lmi,self.psi1) self.RPT0 = np.dot(self.Lmi,self.psi1)
self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T)) self.L = np.linalg.cholesky(np.eye(self.num_inducing) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T))
self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1) self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1)
self.RPT = np.dot(self.R,self.P.T) self.RPT = np.dot(self.R,self.P.T)
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT) self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
@ -122,7 +131,7 @@ class generalized_FITC(sparse_GP):
sf2 = sf**2 sf2 = sf**2
# Remove extra term from dL_dKmm # Remove extra term from dL_dKmm
self.dL_dKmm += 0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB self.dL_dKmm += 0.5 * self.input_dim * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
self.dL_dpsi0 = None self.dL_dpsi0 = None
#the partial derivative vector for the likelihood #the partial derivative vector for the likelihood
@ -133,8 +142,8 @@ class generalized_FITC(sparse_GP):
else: else:
raise NotImplementedError, "homoscedastic derivatives not implemented" raise NotImplementedError, "homoscedastic derivatives not implemented"
#likelihood is not heterscedatic #likelihood is not heterscedatic
#self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2 #self.partial_for_likelihood = - 0.5 * self.N*self.input_dim*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2
#self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision #self.partial_for_likelihood += 0.5 * self.input_dim * trace_dot(self.Bi,self.A)*self.likelihood.precision
#self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1)) #self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))
#TODO partial derivative vector for the likelihood not implemented #TODO partial derivative vector for the likelihood not implemented
@ -146,7 +155,7 @@ class generalized_FITC(sparse_GP):
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
raise NotImplementedError, "heteroscedatic derivates not implemented" raise NotImplementedError, "heteroscedatic derivates not implemented"
else: else:
#NOTE in sparse_GP this would include the gradient wrt psi0 #NOTE in SparseGP this would include the gradient wrt psi0
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1,self.Z,self.X) dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1,self.Z,self.X)
return dL_dtheta return dL_dtheta
@ -155,11 +164,11 @@ class generalized_FITC(sparse_GP):
""" Compute the (lower bound on the) log marginal likelihood """ """ Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor**2 sf2 = self.scale_factor**2
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y) A = -0.5*self.N*self.input_dim*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
else: else:
A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT A = -0.5*self.N*self.input_dim*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT
C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.M*np.log(sf2)) C = -self.input_dim * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.num_inducing*np.log(sf2))
#C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2)) #C = -0.5*self.input_dim * (self.B_logdet + self.num_inducing*np.log(sf2))
D = 0.5*np.sum(np.square(self._LBi_Lmi_psi1V)) D = 0.5*np.sum(np.square(self._LBi_Lmi_psi1V))
#self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T) #self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
#D_ = 0.5*np.trace(self.Cpsi1VVpsi1) #D_ = 0.5*np.trace(self.Cpsi1VVpsi1)
@ -177,13 +186,13 @@ class generalized_FITC(sparse_GP):
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T) # q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
# Ci = I + (RPT0)Di(RPT0).T # Ci = I + (RPT0)Di(RPT0).T
# C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T # C = I - [RPT0] * (input_dim+[RPT0].T*[RPT0])^-1*[RPT0].T
# = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T # = I - [RPT0] * (input_dim + self.Qnn)^-1 * [RPT0].T
# = I - [RPT0] * (U*U.T)^-1 * [RPT0].T # = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
# = I - V.T * V # = I - V.T * V
U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn) U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1) V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1)
C = np.eye(self.M) - np.dot(V.T,V) C = np.eye(self.num_inducing) - np.dot(V.T,V)
mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:]) mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:])
#self.C = C #self.C = C
#self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T #self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
@ -199,12 +208,12 @@ class generalized_FITC(sparse_GP):
mu_star = np.dot(KR0T,mu_H) mu_star = np.dot(KR0T,mu_H)
if full_cov: if full_cov:
Kxx = self.kern.K(Xnew,which_parts=which_parts) Kxx = self.kern.K(Xnew,which_parts=which_parts)
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.num_inducing),KR0T.T))
else: else:
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts) Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
Kxx_ = self.kern.K(Xnew,which_parts=which_parts) # TODO: RA, is this line needed? Kxx_ = self.kern.K(Xnew,which_parts=which_parts) # TODO: RA, is this line needed?
var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) # TODO: RA, is this line needed? var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.num_inducing),KR0T.T)) # TODO: RA, is this line needed?
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None] var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.num_inducing),KR0T.T),0))[:,None]
return mu_star[:,None],var return mu_star[:,None],var
else: else:
raise NotImplementedError, "homoscedastic fitc not implemented" raise NotImplementedError, "homoscedastic fitc not implemented"

View file

@ -15,7 +15,7 @@ class GP_classification(GP):
:param X: input observations :param X: input observations
:param Y: observed values :param Y: observed values
:param likelihood: a GPy likelihood, defaults to binomial with probit link_function :param likelihood: a GPy likelihood, defaults to Binomial with probit link_function
:param kernel: a GPy kernel, defaults to rbf :param kernel: a GPy kernel, defaults to rbf
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True :type normalize_X: False|True
@ -31,7 +31,7 @@ class GP_classification(GP):
kernel = kern.rbf(X.shape[1]) kernel = kern.rbf(X.shape[1])
if likelihood is None: if likelihood is None:
distribution = likelihoods.likelihood_functions.binomial() distribution = likelihoods.likelihood_functions.Binomial()
likelihood = likelihoods.EP(Y, distribution) likelihood = likelihoods.EP(Y, distribution)
elif Y is not None: elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()): if not all(Y.flatten() == likelihood.data.flatten()):

View file

@ -7,7 +7,7 @@ from ..core import GP
from .. import likelihoods from .. import likelihoods
from .. import kern from .. import kern
class GP_regression(GP): class GPRegression(GP):
""" """
Gaussian Process model for regression Gaussian Process model for regression

View file

@ -4,14 +4,13 @@ Created on 10 Apr 2013
@author: Max Zwiessele @author: Max Zwiessele
''' '''
from GPy.core import model from GPy.core import model
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM from GPy.core import SparseGP
from GPy.core import sparse_GP
from GPy.util.linalg import PCA from GPy.util.linalg import PCA
from scipy import linalg
import numpy import numpy
import itertools import itertools
import pylab import pylab
from GPy.kern.kern import kern from GPy.kern.kern import kern
from GPy.models.bayesian_gplvm import BayesianGPLVM
class MRD(model): class MRD(model):
""" """
@ -38,7 +37,7 @@ class MRD(model):
*concat: PCA on concatenated outputs *concat: PCA on concatenated outputs
*single: PCA on each output *single: PCA on each output
*random: random *random: random
:param M: :param num_inducing:
number of inducing inputs to use number of inducing inputs to use
:param Z: :param Z:
initial inducing inputs initial inducing inputs
@ -62,22 +61,22 @@ class MRD(model):
assert not ('kernel' in kw), "pass kernels through `kernels` argument" assert not ('kernel' in kw), "pass kernels through `kernels` argument"
self.input_dim = input_dim self.input_dim = input_dim
self.M = M self.num_inducing = M
self._debug = _debug self._debug = _debug
self._init = True self._init = True
X = self._init_X(initx, likelihood_or_Y_list) X = self._init_X(initx, likelihood_or_Y_list)
Z = self._init_Z(initz, X) Z = self._init_Z(initz, X)
self.bgplvms = [Bayesian_GPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, M=self.M, **kw) for l, k in zip(likelihood_or_Y_list, kernels)] self.bgplvms = [BayesianGPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, M=self.num_inducing, **kw) for l, k in zip(likelihood_or_Y_list, kernels)]
del self._init del self._init
self.gref = self.bgplvms[0] self.gref = self.bgplvms[0]
nparams = numpy.array([0] + [sparse_GP._get_params(g).size - g.Z.size for g in self.bgplvms]) nparams = numpy.array([0] + [SparseGP._get_params(g).size - g.Z.size for g in self.bgplvms])
self.nparams = nparams.cumsum() self.nparams = nparams.cumsum()
self.N = self.gref.N self.N = self.gref.N
self.NQ = self.N * self.input_dim self.NQ = self.N * self.input_dim
self.MQ = self.M * self.input_dim self.MQ = self.num_inducing * self.input_dim
model.__init__(self) # @UndefinedVariable model.__init__(self) # @UndefinedVariable
self._set_params(self._get_params()) self._set_params(self._get_params())
@ -151,7 +150,7 @@ class MRD(model):
itertools.izip(ns, itertools.izip(ns,
itertools.repeat(name))) itertools.repeat(name)))
return list(itertools.chain(n1var, *(map_names(\ return list(itertools.chain(n1var, *(map_names(\
sparse_GP._get_param_names(g)[self.MQ:], n) \ SparseGP._get_param_names(g)[self.MQ:], n) \
for g, n in zip(self.bgplvms, self.names)))) for g, n in zip(self.bgplvms, self.names))))
def _get_params(self): def _get_params(self):
@ -165,14 +164,14 @@ class MRD(model):
X = self.gref.X.ravel() X = self.gref.X.ravel()
X_var = self.gref.X_variance.ravel() X_var = self.gref.X_variance.ravel()
Z = self.gref.Z.ravel() Z = self.gref.Z.ravel()
thetas = [sparse_GP._get_params(g)[g.Z.size:] for g in self.bgplvms] thetas = [SparseGP._get_params(g)[g.Z.size:] for g in self.bgplvms]
params = numpy.hstack([X, X_var, Z, numpy.hstack(thetas)]) params = numpy.hstack([X, X_var, Z, numpy.hstack(thetas)])
return params return params
# def _set_var_params(self, g, X, X_var, Z): # def _set_var_params(self, g, X, X_var, Z):
# g.X = X.reshape(self.N, self.input_dim) # g.X = X.reshape(self.N, self.input_dim)
# g.X_variance = X_var.reshape(self.N, self.input_dim) # g.X_variance = X_var.reshape(self.N, self.input_dim)
# g.Z = Z.reshape(self.M, self.input_dim) # g.Z = Z.reshape(self.num_inducing, self.input_dim)
# #
# def _set_kern_params(self, g, p): # def _set_kern_params(self, g, p):
# g.kern._set_params(p[:g.kern.Nparam]) # g.kern._set_params(p[:g.kern.Nparam])
@ -206,7 +205,7 @@ class MRD(model):
def log_likelihood(self): def log_likelihood(self):
ll = -self.gref.KL_divergence() ll = -self.gref.KL_divergence()
for g in self.bgplvms: for g in self.bgplvms:
ll += sparse_GP.log_likelihood(g) ll += SparseGP.log_likelihood(g)
return ll return ll
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
@ -215,7 +214,7 @@ class MRD(model):
dLdmu -= dKLmu dLdmu -= dKLmu
dLdS -= dKLdS dLdS -= dKLdS
dLdmuS = numpy.hstack((dLdmu.flatten(), dLdS.flatten())).flatten() dLdmuS = numpy.hstack((dLdmu.flatten(), dLdS.flatten())).flatten()
dldzt1 = reduce(lambda a, b: a + b, (sparse_GP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms)) dldzt1 = reduce(lambda a, b: a + b, (SparseGP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms))
return numpy.hstack((dLdmuS, return numpy.hstack((dLdmuS,
dldzt1, dldzt1,
@ -250,9 +249,9 @@ class MRD(model):
if X is None: if X is None:
X = self.X X = self.X
if init in "permute": if init in "permute":
Z = numpy.random.permutation(X.copy())[:self.M] Z = numpy.random.permutation(X.copy())[:self.num_inducing]
elif init in "random": elif init in "random":
Z = numpy.random.randn(self.M, self.input_dim) * X.var() Z = numpy.random.randn(self.num_inducing, self.input_dim) * X.var()
self.Z = Z self.Z = Z
return Z return Z
@ -274,8 +273,8 @@ class MRD(model):
else: else:
return pylab.gcf() return pylab.gcf()
def plot_X_1d(self): def plot_X_1d(self, *a, **kw):
return self.gref.plot_X_1d() return self.gref.plot_X_1d(*a, **kw)
def plot_X(self, fignum=None, ax=None): def plot_X(self, fignum=None, ax=None):
fig = self._handle_plotting(fignum, ax, lambda i, g, ax: ax.imshow(g.X)) fig = self._handle_plotting(fignum, ax, lambda i, g, ax: ax.imshow(g.X))

View file

@ -1,61 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import sys, pdb
# from .. import kern
# from ..core import model
# from ..util.linalg import pdinv, PCA
from GPLVM import GPLVM
from sparse_GP_regression import sparse_GP_regression
class sparse_GPLVM(sparse_GP_regression, GPLVM):
"""
Sparse Gaussian Process Latent Variable Model
:param Y: observed data
:type Y: np.ndarray
:param input_dim: latent dimensionality
:type input_dim: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
"""
def __init__(self, Y, input_dim, kernel=None, init='PCA', M=10):
X = self.initialise_latent(init, input_dim, Y)
sparse_GP_regression.__init__(self, X, Y, kernel=kernel,M=M)
def _get_param_names(self):
return (sum([['X_%i_%i'%(n,q) for q in range(self.input_dim)] for n in range(self.N)],[])
+ sparse_GP_regression._get_param_names(self))
def _get_params(self):
return np.hstack((self.X.flatten(), sparse_GP_regression._get_params(self)))
def _set_params(self,x):
self.X = x[:self.X.size].reshape(self.N,self.input_dim).copy()
sparse_GP_regression._set_params(self, x[self.X.size:])
def log_likelihood(self):
return sparse_GP_regression.log_likelihood(self)
def dL_dX(self):
dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0,self.X)
dL_dX += self.kern.dK_dX(self.dL_dpsi1.T,self.X,self.Z)
return dL_dX
def _log_likelihood_gradients(self):
return np.hstack((self.dL_dX().flatten(), sparse_GP_regression._log_likelihood_gradients(self)))
def plot(self):
GPLVM.plot(self)
#passing Z without a small amout of jitter will induce the white kernel where we don;t want it!
mu, var, upper, lower = sparse_GP_regression.predict(self, self.Z+np.random.randn(*self.Z.shape)*0.0001)
pb.plot(mu[:, 0] , mu[:, 1], 'ko')
def plot_latent(self, *args, **kwargs):
input_1, input_2 = GPLVM.plot_latent(*args, **kwargs)
pb.plot(m.Z[:, input_1], m.Z[:, input_2], '^w')

View file

@ -16,7 +16,7 @@ class sparse_GP_classification(sparse_GP):
:param X: input observations :param X: input observations
:param Y: observed values :param Y: observed values
:param likelihood: a GPy likelihood, defaults to binomial with probit link_function :param likelihood: a GPy likelihood, defaults to Binomial with probit link_function
:param kernel: a GPy kernel, defaults to rbf+white :param kernel: a GPy kernel, defaults to rbf+white
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True :type normalize_X: False|True
@ -31,7 +31,7 @@ class sparse_GP_classification(sparse_GP):
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3) kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
if likelihood is None: if likelihood is None:
distribution = likelihoods.likelihood_functions.binomial() distribution = likelihoods.likelihood_functions.Binomial()
likelihood = likelihoods.EP(Y, distribution) likelihood = likelihoods.EP(Y, distribution)
elif Y is not None: elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()): if not all(Y.flatten() == likelihood.data.flatten()):

View file

@ -3,17 +3,15 @@
import numpy as np import numpy as np
from ..core import sparse_GP from ..core import SparseGP
from .. import likelihoods from .. import likelihoods
from .. import kern from .. import kern
from ..likelihoods import likelihood
from GP_regression import GP_regression
class sparse_GP_regression(sparse_GP): class SparseGPRegression(SparseGP):
""" """
Gaussian Process model for regression Gaussian Process model for regression
This is a thin wrapper around the sparse_GP class, with a set of sensible defalts This is a thin wrapper around the SparseGP class, with a set of sensible defalts
:param X: input observations :param X: input observations
:param Y: observed values :param Y: observed values
@ -29,19 +27,19 @@ class sparse_GP_regression(sparse_GP):
""" """
def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10, X_variance=None): def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10, X_variance=None):
#kern defaults to rbf (plus white for stability) # kern defaults to rbf (plus white for stability)
if kernel is None: if kernel is None:
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3) kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1], 1e-3)
#Z defaults to a subset of the data # Z defaults to a subset of the data
if Z is None: if Z is None:
i = np.random.permutation(X.shape[0])[:M] i = np.random.permutation(X.shape[0])[:M]
Z = X[i].copy() Z = X[i].copy()
else: else:
assert Z.shape[1]==X.shape[1] assert Z.shape[1] == X.shape[1]
#likelihood defaults to Gaussian # likelihood defaults to Gaussian
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y) likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y)
sparse_GP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X, X_variance=X_variance) SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X, X_variance=X_variance)
self._set_params(self._get_params()) self._set_params(self._get_params())

View file

@ -0,0 +1,61 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import sys, pdb
from GPy.models.sparse_gp_regression import SparseGPRegression
from GPy.models.gplvm import GPLVM
# from .. import kern
# from ..core import model
# from ..util.linalg import pdinv, PCA
class SparseGPLVM(SparseGPRegression, GPLVM):
"""
Sparse Gaussian Process Latent Variable Model
:param Y: observed data
:type Y: np.ndarray
:param input_dim: latent dimensionality
:type input_dim: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
"""
def __init__(self, Y, input_dim, kernel=None, init='PCA', M=10):
X = self.initialise_latent(init, input_dim, Y)
SparseGPRegression.__init__(self, X, Y, kernel=kernel, M=M)
def _get_param_names(self):
return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], [])
+ SparseGPRegression._get_param_names(self))
def _get_params(self):
return np.hstack((self.X.flatten(), SparseGPRegression._get_params(self)))
def _set_params(self, x):
self.X = x[:self.X.size].reshape(self.N, self.input_dim).copy()
SparseGPRegression._set_params(self, x[self.X.size:])
def log_likelihood(self):
return SparseGPRegression.log_likelihood(self)
def dL_dX(self):
dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0, self.X)
dL_dX += self.kern.dK_dX(self.dL_dpsi1.T, self.X, self.Z)
return dL_dX
def _log_likelihood_gradients(self):
return np.hstack((self.dL_dX().flatten(), SparseGPRegression._log_likelihood_gradients(self)))
def plot(self):
GPLVM.plot(self)
# passing Z without a small amout of jitter will induce the white kernel where we don;t want it!
mu, var, upper, lower = SparseGPRegression.predict(self, self.Z + np.random.randn(*self.Z.shape) * 0.0001)
pb.plot(mu[:, 0] , mu[:, 1], 'ko')
def plot_latent(self, *args, **kwargs):
input_1, input_2 = GPLVM.plot_latent(*args, **kwargs)
pb.plot(m.Z[:, input_1], m.Z[:, input_2], '^w')

View file

@ -3,25 +3,21 @@
import numpy as np import numpy as np
from .. import kern
from ..core import model
from ..util.linalg import pdinv
from ..util.plot import gpplot
from ..util.warping_functions import * from ..util.warping_functions import *
from GP_regression import GP_regression
from ..core import GP from ..core import GP
from .. import likelihoods from .. import likelihoods
from .. import kern from GPy.util.warping_functions import TanhWarpingFunction_d
from GPy import kern
class warpedGP(GP): class WarpedGP(GP):
def __init__(self, X, Y, kernel=None, warping_function = None, warping_terms = 3, normalize_X=False, normalize_Y=False): def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3, normalize_X=False, normalize_Y=False):
if kernel is None: if kernel is None:
kernel = kern.rbf(X.shape[1]) kernel = kern.rbf(X.shape[1])
if warping_function == None: if warping_function == None:
self.warping_function = TanhWarpingFunction_d(warping_terms) self.warping_function = TanhWarpingFunction_d(warping_terms)
self.warping_params = (np.random.randn(self.warping_function.n_terms*3+1,) * 1) self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1,) * 1)
Y = self._scale_data(Y) Y = self._scale_data(Y)
self.has_uncertain_inputs = False self.has_uncertain_inputs = False
@ -35,10 +31,10 @@ class warpedGP(GP):
def _scale_data(self, Y): def _scale_data(self, Y):
self._Ymax = Y.max() self._Ymax = Y.max()
self._Ymin = Y.min() self._Ymin = Y.min()
return (Y-self._Ymin)/(self._Ymax-self._Ymin) - 0.5 return (Y - self._Ymin) / (self._Ymax - self._Ymin) - 0.5
def _unscale_data(self, Y): def _unscale_data(self, Y):
return (Y + 0.5)*(self._Ymax - self._Ymin) + self._Ymin return (Y + 0.5) * (self._Ymax - self._Ymin) + self._Ymin
def _set_params(self, x): def _set_params(self, x):
self.warping_params = x[:self.warping_function.num_parameters] self.warping_params = x[:self.warping_function.num_parameters]
@ -68,15 +64,15 @@ class warpedGP(GP):
alpha = np.dot(self.Ki, self.likelihood.Y.flatten()) alpha = np.dot(self.Ki, self.likelihood.Y.flatten())
warping_grads = self.warping_function_gradients(alpha) warping_grads = self.warping_function_gradients(alpha)
warping_grads = np.append(warping_grads[:,:-1].flatten(), warping_grads[0,-1]) warping_grads = np.append(warping_grads[:, :-1].flatten(), warping_grads[0, -1])
return np.hstack((warping_grads.flatten(), ll_grads.flatten())) return np.hstack((warping_grads.flatten(), ll_grads.flatten()))
def warping_function_gradients(self, Kiy): def warping_function_gradients(self, Kiy):
grad_y = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params) grad_y = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed, self.warping_params, grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed, self.warping_params,
return_covar_chain = True) return_covar_chain=True)
djac_dpsi = ((1.0/grad_y[:,:, None, None])*grad_y_psi).sum(axis=0).sum(axis=0) djac_dpsi = ((1.0 / grad_y[:, :, None, None]) * grad_y_psi).sum(axis=0).sum(axis=0)
dquad_dpsi = (Kiy[:,None,None,None] * grad_psi).sum(axis=0).sum(axis=0) dquad_dpsi = (Kiy[:, None, None, None] * grad_psi).sum(axis=0).sum(axis=0)
return -dquad_dpsi + djac_dpsi return -dquad_dpsi + djac_dpsi

View file

@ -4,6 +4,7 @@
import unittest import unittest
import numpy as np import numpy as np
import GPy import GPy
from GPy.models.bayesian_gplvm import BayesianGPLVM
class BGPLVMTests(unittest.TestCase): class BGPLVMTests(unittest.TestCase):
def test_bias_kern(self): def test_bias_kern(self):
@ -11,10 +12,10 @@ class BGPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0) Y -= Y.mean(axis=0)
k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k, M=M) m = BayesianGPLVM(Y, input_dim, kernel=k, M=M)
m.ensure_default_constraints() m.ensure_default_constraints()
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -24,10 +25,10 @@ class BGPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0) Y -= Y.mean(axis=0)
k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k, M=M) m = BayesianGPLVM(Y, input_dim, kernel=k, M=M)
m.ensure_default_constraints() m.ensure_default_constraints()
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -37,10 +38,10 @@ class BGPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0) Y -= Y.mean(axis=0)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k, M=M) m = BayesianGPLVM(Y, input_dim, kernel=k, M=M)
m.ensure_default_constraints() m.ensure_default_constraints()
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -50,10 +51,10 @@ class BGPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0) Y -= Y.mean(axis=0)
k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k, M=M) m = BayesianGPLVM(Y, input_dim, kernel=k, M=M)
m.ensure_default_constraints() m.ensure_default_constraints()
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -64,10 +65,10 @@ class BGPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0) Y -= Y.mean(axis=0)
k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k, M=M) m = BayesianGPLVM(Y, input_dim, kernel=k, M=M)
m.ensure_default_constraints() m.ensure_default_constraints()
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())

View file

@ -9,6 +9,7 @@ import pkgutil
import os import os
import random import random
from nose.tools import nottest from nose.tools import nottest
import sys
class ExamplesTests(unittest.TestCase): class ExamplesTests(unittest.TestCase):
def _checkgrad(self, model): def _checkgrad(self, model):
@ -40,9 +41,9 @@ def model_instance(model):
@nottest @nottest
def test_models(): def test_models():
examples_path = os.path.dirname(GPy.examples.__file__) examples_path = os.path.dirname(GPy.examples.__file__)
#Load modules # Load modules
for loader, module_name, is_pkg in pkgutil.iter_modules([examples_path]): for loader, module_name, is_pkg in pkgutil.iter_modules([examples_path]):
#Load examples # Load examples
module_examples = loader.find_module(module_name).load_module(module_name) module_examples = loader.find_module(module_name).load_module(module_name)
print "MODULE", module_examples print "MODULE", module_examples
print "Before" print "Before"
@ -56,11 +57,11 @@ def test_models():
continue continue
print "Testing example: ", example[0] print "Testing example: ", example[0]
#Generate model # Generate model
model = example[1]() model = example[1]()
print model print model
#Create tests for instance check # Create tests for instance check
""" """
test = model_instance_generator(model) test = model_instance_generator(model)
test.__name__ = 'test_instance_%s' % example[0] test.__name__ = 'test_instance_%s' % example[0]
@ -78,4 +79,5 @@ def test_models():
if __name__ == "__main__": if __name__ == "__main__":
print "Running unit tests, please be (very) patient..." print "Running unit tests, please be (very) patient..."
unittest.main() # unittest.main()
test_models()

View file

@ -11,7 +11,7 @@ class GPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.GPLVM(Y, input_dim, kernel = k) m = GPy.models.GPLVM(Y, input_dim, kernel = k)
m.ensure_default_constraints() m.ensure_default_constraints()
@ -23,7 +23,7 @@ class GPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.GPLVM(Y, input_dim, kernel = k) m = GPy.models.GPLVM(Y, input_dim, kernel = k)
m.ensure_default_constraints() m.ensure_default_constraints()
@ -35,7 +35,7 @@ class GPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.GPLVM(Y, input_dim, kernel = k) m = GPy.models.GPLVM(Y, input_dim, kernel = k)
m.ensure_default_constraints() m.ensure_default_constraints()

View file

@ -12,7 +12,7 @@ class KernelTests(unittest.TestCase):
K.constrain_fixed('2') K.constrain_fixed('2')
X = np.random.rand(5,5) X = np.random.rand(5,5)
Y = np.ones((5,1)) Y = np.ones((5,1))
m = GPy.models.GP_regression(X,Y,K) m = GPy.models.GPRegression(X,Y,K)
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_fixedkernel(self): def test_fixedkernel(self):
@ -23,7 +23,7 @@ class KernelTests(unittest.TestCase):
K = np.dot(X, X.T) K = np.dot(X, X.T)
kernel = GPy.kern.fixed(4, K) kernel = GPy.kern.fixed(4, K)
Y = np.ones((30,1)) Y = np.ones((30,1))
m = GPy.models.GP_regression(X,Y,kernel=kernel) m = GPy.models.GPRegression(X,Y,kernel=kernel)
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_coregionalisation(self): def test_coregionalisation(self):
@ -36,9 +36,9 @@ class KernelTests(unittest.TestCase):
Y = np.vstack((Y1,Y2)) Y = np.vstack((Y1,Y2))
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1) k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
k2 = GPy.kern.coregionalise(2,1) k2 = GPy.kern.Coregionalise(2,1)
k = k1.prod(k2,tensor=True) k = k1.prod(k2,tensor=True)
m = GPy.models.GP_regression(X,Y,kernel=k) m = GPy.models.GPRegression(X,Y,kernel=k)
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())

View file

@ -20,7 +20,7 @@ class MRDTests(unittest.TestCase):
k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
K = k.K(X) K = k.K(X)
Ylist = [np.random.multivariate_normal(np.zeros(N), K, D).T for _ in range(num_m)] Ylist = [np.random.multivariate_normal(np.zeros(N), K, input_dim).T for _ in range(num_m)]
likelihood_list = [GPy.likelihoods.Gaussian(Y) for Y in Ylist] likelihood_list = [GPy.likelihoods.Gaussian(Y) for Y in Ylist]
m = GPy.models.MRD(likelihood_list, input_dim=input_dim, kernels=k, M=M) m = GPy.models.MRD(likelihood_list, input_dim=input_dim, kernels=k, M=M)

View file

@ -13,7 +13,7 @@ class PriorTests(unittest.TestCase):
y = b*X + C + 1*np.sin(X) y = b*X + C + 1*np.sin(X)
y += 0.05*np.random.randn(len(X)) y += 0.05*np.random.randn(len(X))
X, y = X[:, None], y[:, None] X, y = X[:, None], y[:, None]
m = GPy.models.GP_regression(X, y) m = GPy.models.GPRegression(X, y)
m.ensure_default_constraints() m.ensure_default_constraints()
lognormal = GPy.priors.LogGaussian(1, 2) lognormal = GPy.priors.LogGaussian(1, 2)
m.set_prior('rbf', lognormal) m.set_prior('rbf', lognormal)
@ -27,7 +27,7 @@ class PriorTests(unittest.TestCase):
y = b*X + C + 1*np.sin(X) y = b*X + C + 1*np.sin(X)
y += 0.05*np.random.randn(len(X)) y += 0.05*np.random.randn(len(X))
X, y = X[:, None], y[:, None] X, y = X[:, None], y[:, None]
m = GPy.models.GP_regression(X, y) m = GPy.models.GPRegression(X, y)
m.ensure_default_constraints() m.ensure_default_constraints()
Gamma = GPy.priors.Gamma(1, 1) Gamma = GPy.priors.Gamma(1, 1)
m.set_prior('rbf', Gamma) m.set_prior('rbf', Gamma)
@ -41,7 +41,7 @@ class PriorTests(unittest.TestCase):
y = b*X + C + 1*np.sin(X) y = b*X + C + 1*np.sin(X)
y += 0.05*np.random.randn(len(X)) y += 0.05*np.random.randn(len(X))
X, y = X[:, None], y[:, None] X, y = X[:, None], y[:, None]
m = GPy.models.GP_regression(X, y) m = GPy.models.GPRegression(X, y)
m.ensure_default_constraints() m.ensure_default_constraints()
gaussian = GPy.priors.Gaussian(1, 1) gaussian = GPy.priors.Gaussian(1, 1)
success = False success = False

View file

@ -21,36 +21,36 @@ def ard(p):
@testing.deepTest(__test__) @testing.deepTest(__test__)
class Test(unittest.TestCase): class Test(unittest.TestCase):
D = 9 input_dim = 9
M = 4 num_inducing = 4
N = 3 N = 3
Nsamples = 6e6 Nsamples = 6e6
def setUp(self): def setUp(self):
self.kerns = ( self.kerns = (
# (GPy.kern.rbf(self.D, ARD=True) + # (GPy.kern.rbf(self.input_dim, ARD=True) +
# GPy.kern.linear(self.D, ARD=True) + # GPy.kern.linear(self.input_dim, ARD=True) +
# GPy.kern.bias(self.D) + # GPy.kern.bias(self.input_dim) +
# GPy.kern.white(self.D)), # GPy.kern.white(self.input_dim)),
(GPy.kern.rbf(self.D, np.random.rand(), np.random.rand(self.D), ARD=True) + (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
GPy.kern.rbf(self.D, np.random.rand(), np.random.rand(self.D), ARD=True) + GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
GPy.kern.linear(self.D, np.random.rand(self.D), ARD=True) + GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) +
GPy.kern.bias(self.D) + GPy.kern.bias(self.input_dim) +
GPy.kern.white(self.D)), GPy.kern.white(self.input_dim)),
# GPy.kern.rbf(self.D), GPy.kern.rbf(self.D, ARD=True), # GPy.kern.rbf(self.input_dim), GPy.kern.rbf(self.input_dim, ARD=True),
# GPy.kern.linear(self.D, ARD=False), GPy.kern.linear(self.D, ARD=True), # GPy.kern.linear(self.input_dim, ARD=False), GPy.kern.linear(self.input_dim, ARD=True),
# GPy.kern.linear(self.D) + GPy.kern.bias(self.D), # GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim),
# GPy.kern.rbf(self.D) + GPy.kern.bias(self.D), # GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim),
# GPy.kern.linear(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D), # GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim),
# GPy.kern.rbf(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D), # GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim),
# GPy.kern.bias(self.D), GPy.kern.white(self.D), # GPy.kern.bias(self.input_dim), GPy.kern.white(self.input_dim),
) )
self.q_x_mean = np.random.randn(self.D) self.q_x_mean = np.random.randn(self.input_dim)
self.q_x_variance = np.exp(np.random.randn(self.D)) self.q_x_variance = np.exp(np.random.randn(self.input_dim))
self.q_x_samples = np.random.randn(self.Nsamples, self.D) * np.sqrt(self.q_x_variance) + self.q_x_mean self.q_x_samples = np.random.randn(self.Nsamples, self.input_dim) * np.sqrt(self.q_x_variance) + self.q_x_mean
self.Z = np.random.randn(self.M, self.D) self.Z = np.random.randn(self.num_inducing, self.input_dim)
self.q_x_mean.shape = (1, self.D) self.q_x_mean.shape = (1, self.input_dim)
self.q_x_variance.shape = (1, self.D) self.q_x_variance.shape = (1, self.input_dim)
def test_psi0(self): def test_psi0(self):
for kern in self.kerns: for kern in self.kerns:
@ -63,7 +63,7 @@ class Test(unittest.TestCase):
for kern in self.kerns: for kern in self.kerns:
Nsamples = 100 Nsamples = 100
psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance) psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance)
K_ = np.zeros((Nsamples, self.M)) K_ = np.zeros((Nsamples, self.num_inducing))
diffs = [] diffs = []
for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)): for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
K = kern.K(q_x_sample_stripe, self.Z) K = kern.K(q_x_sample_stripe, self.Z)
@ -89,7 +89,7 @@ class Test(unittest.TestCase):
for kern in self.kerns: for kern in self.kerns:
Nsamples = 100 Nsamples = 100
psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance) psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
K_ = np.zeros((self.M, self.M)) K_ = np.zeros((self.num_inducing, self.num_inducing))
diffs = [] diffs = []
for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)): for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
K = kern.K(q_x_sample_stripe, self.Z) K = kern.K(q_x_sample_stripe, self.Z)

View file

@ -17,14 +17,14 @@ class PsiStatModel(model):
self.X_variance = X_variance self.X_variance = X_variance
self.Z = Z self.Z = Z
self.N, self.input_dim = X.shape self.N, self.input_dim = X.shape
self.M, input_dim = Z.shape self.num_inducing, input_dim = Z.shape
assert self.input_dim == input_dim, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape) assert self.input_dim == input_dim, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape)
self.kern = kernel self.kern = kernel
super(PsiStatModel, self).__init__() super(PsiStatModel, self).__init__()
self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance) self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance)
def _get_param_names(self): def _get_param_names(self):
Xnames = ["{}_{}_{}".format(what, i, j) for what, i, j in itertools.product(['X', 'X_variance'], range(self.N), range(self.input_dim))] Xnames = ["{}_{}_{}".format(what, i, j) for what, i, j in itertools.product(['X', 'X_variance'], range(self.N), range(self.input_dim))]
Znames = ["Z_{}_{}".format(i, j) for i, j in itertools.product(range(self.M), range(self.input_dim))] Znames = ["Z_{}_{}".format(i, j) for i, j in itertools.product(range(self.num_inducing), range(self.input_dim))]
return Xnames + Znames + self.kern._get_param_names() return Xnames + Znames + self.kern._get_param_names()
def _get_params(self): def _get_params(self):
return numpy.hstack([self.X.flatten(), self.X_variance.flatten(), self.Z.flatten(), self.kern._get_params()]) return numpy.hstack([self.X.flatten(), self.X_variance.flatten(), self.Z.flatten(), self.kern._get_params()])
@ -34,7 +34,7 @@ class PsiStatModel(model):
start, end = end, end + self.X_variance.size start, end = end, end + self.X_variance.size
self.X_variance = x[start: end].reshape(self.N, self.input_dim) self.X_variance = x[start: end].reshape(self.N, self.input_dim)
start, end = end, end + self.Z.size start, end = end, end + self.Z.size
self.Z = x[start: end].reshape(self.M, self.input_dim) self.Z = x[start: end].reshape(self.num_inducing, self.input_dim)
self.kern._set_params(x[end:]) self.kern._set_params(x[end:])
def log_likelihood(self): def log_likelihood(self):
return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum() return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum()
@ -43,19 +43,19 @@ class PsiStatModel(model):
try: try:
psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance) psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
except AttributeError: except AttributeError:
psiZ = numpy.zeros(self.M * self.input_dim) psiZ = numpy.zeros(self.num_inducing * self.input_dim)
thetagrad = self.kern.__getattribute__("d" + self.which + "_dtheta")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance).flatten() thetagrad = self.kern.__getattribute__("d" + self.which + "_dtheta")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance).flatten()
return numpy.hstack((psimu.flatten(), psiS.flatten(), psiZ.flatten(), thetagrad)) return numpy.hstack((psimu.flatten(), psiS.flatten(), psiZ.flatten(), thetagrad))
class DPsiStatTest(unittest.TestCase): class DPsiStatTest(unittest.TestCase):
input_dim = 5 input_dim = 5
N = 50 N = 50
M = 10 num_inducing = 10
D = 20 input_dim = 20
X = numpy.random.randn(N, input_dim) X = numpy.random.randn(N, input_dim)
X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1) X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
Z = numpy.random.permutation(X)[:M] Z = numpy.random.permutation(X)[:num_inducing]
Y = X.dot(numpy.random.randn(input_dim, D)) Y = X.dot(numpy.random.randn(input_dim, input_dim))
# kernels = [GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)), GPy.kern.rbf(input_dim, ARD=True), GPy.kern.bias(input_dim)] # kernels = [GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)), GPy.kern.rbf(input_dim, ARD=True), GPy.kern.bias(input_dim)]
kernels = [GPy.kern.linear(input_dim), GPy.kern.rbf(input_dim), GPy.kern.bias(input_dim), kernels = [GPy.kern.linear(input_dim), GPy.kern.rbf(input_dim), GPy.kern.bias(input_dim),
@ -65,7 +65,7 @@ class DPsiStatTest(unittest.TestCase):
def testPsi0(self): def testPsi0(self):
for k in self.kernels: for k in self.kernels:
m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.M, kernel=k) M=self.num_inducing, kernel=k)
try: try:
assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts)))
except: except:
@ -80,27 +80,27 @@ class DPsiStatTest(unittest.TestCase):
def testPsi2_lin(self): def testPsi2_lin(self):
k = self.kernels[0] k = self.kernels[0]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.M, kernel=k) M=self.num_inducing, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_lin_bia(self): def testPsi2_lin_bia(self):
k = self.kernels[3] k = self.kernels[3]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.M, kernel=k) M=self.num_inducing, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_rbf(self): def testPsi2_rbf(self):
k = self.kernels[1] k = self.kernels[1]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.M, kernel=k) M=self.num_inducing, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_rbf_bia(self): def testPsi2_rbf_bia(self):
k = self.kernels[-1] k = self.kernels[-1]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.M, kernel=k) M=self.num_inducing, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_bia(self): def testPsi2_bia(self):
k = self.kernels[2] k = self.kernels[2]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.M, kernel=k) M=self.num_inducing, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
@ -108,14 +108,14 @@ if __name__ == "__main__":
import sys import sys
interactive = 'i' in sys.argv interactive = 'i' in sys.argv
if interactive: if interactive:
# N, M, input_dim, D = 30, 5, 4, 30 # N, num_inducing, input_dim, input_dim = 30, 5, 4, 30
# X = numpy.random.rand(N, input_dim) # X = numpy.random.rand(N, input_dim)
# k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) # k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
# K = k.K(X) # K = k.K(X)
# Y = numpy.random.multivariate_normal(numpy.zeros(N), K, D).T # Y = numpy.random.multivariate_normal(numpy.zeros(N), K, input_dim).T
# Y -= Y.mean(axis=0) # Y -= Y.mean(axis=0)
# k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) # k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
# m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel=k, M=M) # m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
# m.ensure_default_constraints() # m.ensure_default_constraints()
# m.randomize() # m.randomize()
# # self.assertTrue(m.checkgrad()) # # self.assertTrue(m.checkgrad())
@ -136,22 +136,22 @@ if __name__ == "__main__":
# for k in kernels: # for k in kernels:
# m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z, # m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=k) # num_inducing=num_inducing, kernel=k)
# assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts))) # assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
# #
# m0 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z, # m0 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=GPy.kern.linear(input_dim)) # num_inducing=num_inducing, kernel=GPy.kern.linear(input_dim))
# m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z, # m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=kernel) # num_inducing=num_inducing, kernel=kernel)
# m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z, # m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=kernel) # num_inducing=num_inducing, kernel=kernel)
# m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, # m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=GPy.kern.rbf(input_dim)) # num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim))
m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
M=M, kernel=GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim))) M=M, kernel=GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)))
m3.ensure_default_constraints() m3.ensure_default_constraints()
# + GPy.kern.bias(input_dim)) # + GPy.kern.bias(input_dim))
# m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, # m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim)) # num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim))
else: else:
unittest.main() unittest.main()

View file

@ -4,6 +4,7 @@
import unittest import unittest
import numpy as np import numpy as np
import GPy import GPy
from GPy.models.sparse_gplvm import SparseGPLVM
class sparse_GPLVMTests(unittest.TestCase): class sparse_GPLVMTests(unittest.TestCase):
def test_bias_kern(self): def test_bias_kern(self):
@ -11,9 +12,9 @@ class sparse_GPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.sparse_GPLVM(Y, input_dim, kernel = k, M=M) m = SparseGPLVM(Y, input_dim, kernel=k, M=M)
m.ensure_default_constraints() m.ensure_default_constraints()
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -24,9 +25,9 @@ class sparse_GPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.sparse_GPLVM(Y, input_dim, kernel = k, M=M) m = SparseGPLVM(Y, input_dim, kernel=k, M=M)
m.ensure_default_constraints() m.ensure_default_constraints()
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -36,9 +37,9 @@ class sparse_GPLVMTests(unittest.TestCase):
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.sparse_GPLVM(Y, input_dim, kernel = k, M=M) m = SparseGPLVM(Y, input_dim, kernel=k, M=M)
m.ensure_default_constraints() m.ensure_default_constraints()
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())

View file

@ -5,33 +5,33 @@
import unittest import unittest
import numpy as np import numpy as np
import GPy import GPy
from GPy.likelihoods.likelihood_functions import Binomial
class GradientTests(unittest.TestCase): class GradientTests(unittest.TestCase):
def setUp(self): def setUp(self):
###################################### ######################################
## 1 dimensional example # # 1 dimensional example
# sample inputs and outputs # sample inputs and outputs
self.X1D = np.random.uniform(-3.,3.,(20,1)) self.X1D = np.random.uniform(-3., 3., (20, 1))
self.Y1D = np.sin(self.X1D)+np.random.randn(20,1)*0.05 self.Y1D = np.sin(self.X1D) + np.random.randn(20, 1) * 0.05
###################################### ######################################
## 2 dimensional example # # 2 dimensional example
# sample inputs and outputs # sample inputs and outputs
self.X2D = np.random.uniform(-3.,3.,(40,2)) self.X2D = np.random.uniform(-3., 3., (40, 2))
self.Y2D = np.sin(self.X2D[:,0:1]) * np.sin(self.X2D[:,1:2])+np.random.randn(40,1)*0.05 self.Y2D = np.sin(self.X2D[:, 0:1]) * np.sin(self.X2D[:, 1:2]) + np.random.randn(40, 1) * 0.05
def check_model_with_white(self, kern, model_type='GP_regression', dimension=1): def check_model_with_white(self, kern, model_type='GPRegression', dimension=1):
#Get the correct gradients # Get the correct gradients
if dimension == 1: if dimension == 1:
X = self.X1D X = self.X1D
Y = self.Y1D Y = self.Y1D
else: else:
X = self.X2D X = self.X2D
Y = self.Y2D Y = self.Y2D
# Get model type (GPRegression, SparseGPRegression, etc)
#Get model type (GP_regression, GP_sparse_regression, etc)
model_fit = getattr(GPy.models, model_type) model_fit = getattr(GPy.models, model_type)
noise = GPy.kern.white(dimension) noise = GPy.kern.white(dimension)
@ -42,114 +42,114 @@ class GradientTests(unittest.TestCase):
# contrain all parameters to be positive # contrain all parameters to be positive
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_gp_regression_rbf_1d(self): def test_GPRegression_rbf_1d(self):
''' Testing the GP regression with rbf kernel with white kernel on 1d data ''' ''' Testing the GP regression with rbf kernel with white kernel on 1d data '''
rbf = GPy.kern.rbf(1) rbf = GPy.kern.rbf(1)
self.check_model_with_white(rbf, model_type='GP_regression', dimension=1) self.check_model_with_white(rbf, model_type='GPRegression', dimension=1)
def test_GP_regression_rbf_2D(self): def test_GPRegression_rbf_2D(self):
''' Testing the GP regression with rbf and white kernel on 2d data ''' ''' Testing the GP regression with rbf and white kernel on 2d data '''
rbf = GPy.kern.rbf(2) rbf = GPy.kern.rbf(2)
self.check_model_with_white(rbf, model_type='GP_regression', dimension=2) self.check_model_with_white(rbf, model_type='GPRegression', dimension=2)
def test_GP_regression_rbf_ARD_2D(self): def test_GPRegression_rbf_ARD_2D(self):
''' Testing the GP regression with rbf and white kernel on 2d data ''' ''' Testing the GP regression with rbf and white kernel on 2d data '''
k = GPy.kern.rbf(2,ARD=True) k = GPy.kern.rbf(2, ARD=True)
self.check_model_with_white(k, model_type='GP_regression', dimension=2) self.check_model_with_white(k, model_type='GPRegression', dimension=2)
def test_GP_regression_matern52_1D(self): def test_GPRegression_matern52_1D(self):
''' Testing the GP regression with matern52 kernel on 1d data ''' ''' Testing the GP regression with matern52 kernel on 1d data '''
matern52 = GPy.kern.Matern52(1) matern52 = GPy.kern.Matern52(1)
self.check_model_with_white(matern52, model_type='GP_regression', dimension=1) self.check_model_with_white(matern52, model_type='GPRegression', dimension=1)
def test_GP_regression_matern52_2D(self): def test_GPRegression_matern52_2D(self):
''' Testing the GP regression with matern52 kernel on 2d data ''' ''' Testing the GP regression with matern52 kernel on 2d data '''
matern52 = GPy.kern.Matern52(2) matern52 = GPy.kern.Matern52(2)
self.check_model_with_white(matern52, model_type='GP_regression', dimension=2) self.check_model_with_white(matern52, model_type='GPRegression', dimension=2)
def test_GP_regression_matern52_ARD_2D(self): def test_GPRegression_matern52_ARD_2D(self):
''' Testing the GP regression with matern52 kernel on 2d data ''' ''' Testing the GP regression with matern52 kernel on 2d data '''
matern52 = GPy.kern.Matern52(2,ARD=True) matern52 = GPy.kern.Matern52(2, ARD=True)
self.check_model_with_white(matern52, model_type='GP_regression', dimension=2) self.check_model_with_white(matern52, model_type='GPRegression', dimension=2)
def test_GP_regression_matern32_1D(self): def test_GPRegression_matern32_1D(self):
''' Testing the GP regression with matern32 kernel on 1d data ''' ''' Testing the GP regression with matern32 kernel on 1d data '''
matern32 = GPy.kern.Matern32(1) matern32 = GPy.kern.Matern32(1)
self.check_model_with_white(matern32, model_type='GP_regression', dimension=1) self.check_model_with_white(matern32, model_type='GPRegression', dimension=1)
def test_GP_regression_matern32_2D(self): def test_GPRegression_matern32_2D(self):
''' Testing the GP regression with matern32 kernel on 2d data ''' ''' Testing the GP regression with matern32 kernel on 2d data '''
matern32 = GPy.kern.Matern32(2) matern32 = GPy.kern.Matern32(2)
self.check_model_with_white(matern32, model_type='GP_regression', dimension=2) self.check_model_with_white(matern32, model_type='GPRegression', dimension=2)
def test_GP_regression_matern32_ARD_2D(self): def test_GPRegression_matern32_ARD_2D(self):
''' Testing the GP regression with matern32 kernel on 2d data ''' ''' Testing the GP regression with matern32 kernel on 2d data '''
matern32 = GPy.kern.Matern32(2,ARD=True) matern32 = GPy.kern.Matern32(2, ARD=True)
self.check_model_with_white(matern32, model_type='GP_regression', dimension=2) self.check_model_with_white(matern32, model_type='GPRegression', dimension=2)
def test_GP_regression_exponential_1D(self): def test_GPRegression_exponential_1D(self):
''' Testing the GP regression with exponential kernel on 1d data ''' ''' Testing the GP regression with exponential kernel on 1d data '''
exponential = GPy.kern.exponential(1) exponential = GPy.kern.exponential(1)
self.check_model_with_white(exponential, model_type='GP_regression', dimension=1) self.check_model_with_white(exponential, model_type='GPRegression', dimension=1)
def test_GP_regression_exponential_2D(self): def test_GPRegression_exponential_2D(self):
''' Testing the GP regression with exponential kernel on 2d data ''' ''' Testing the GP regression with exponential kernel on 2d data '''
exponential = GPy.kern.exponential(2) exponential = GPy.kern.exponential(2)
self.check_model_with_white(exponential, model_type='GP_regression', dimension=2) self.check_model_with_white(exponential, model_type='GPRegression', dimension=2)
def test_GP_regression_exponential_ARD_2D(self): def test_GPRegression_exponential_ARD_2D(self):
''' Testing the GP regression with exponential kernel on 2d data ''' ''' Testing the GP regression with exponential kernel on 2d data '''
exponential = GPy.kern.exponential(2,ARD=True) exponential = GPy.kern.exponential(2, ARD=True)
self.check_model_with_white(exponential, model_type='GP_regression', dimension=2) self.check_model_with_white(exponential, model_type='GPRegression', dimension=2)
def test_GP_regression_bias_kern_1D(self): def test_GPRegression_bias_kern_1D(self):
''' Testing the GP regression with bias kernel on 1d data ''' ''' Testing the GP regression with bias kernel on 1d data '''
bias = GPy.kern.bias(1) bias = GPy.kern.bias(1)
self.check_model_with_white(bias, model_type='GP_regression', dimension=1) self.check_model_with_white(bias, model_type='GPRegression', dimension=1)
def test_GP_regression_bias_kern_2D(self): def test_GPRegression_bias_kern_2D(self):
''' Testing the GP regression with bias kernel on 2d data ''' ''' Testing the GP regression with bias kernel on 2d data '''
bias = GPy.kern.bias(2) bias = GPy.kern.bias(2)
self.check_model_with_white(bias, model_type='GP_regression', dimension=2) self.check_model_with_white(bias, model_type='GPRegression', dimension=2)
def test_GP_regression_linear_kern_1D_ARD(self): def test_GPRegression_linear_kern_1D_ARD(self):
''' Testing the GP regression with linear kernel on 1d data ''' ''' Testing the GP regression with linear kernel on 1d data '''
linear = GPy.kern.linear(1,ARD=True) linear = GPy.kern.linear(1, ARD=True)
self.check_model_with_white(linear, model_type='GP_regression', dimension=1) self.check_model_with_white(linear, model_type='GPRegression', dimension=1)
def test_GP_regression_linear_kern_2D_ARD(self): def test_GPRegression_linear_kern_2D_ARD(self):
''' Testing the GP regression with linear kernel on 2d data ''' ''' Testing the GP regression with linear kernel on 2d data '''
linear = GPy.kern.linear(2,ARD=True) linear = GPy.kern.linear(2, ARD=True)
self.check_model_with_white(linear, model_type='GP_regression', dimension=2) self.check_model_with_white(linear, model_type='GPRegression', dimension=2)
def test_GP_regression_linear_kern_1D(self): def test_GPRegression_linear_kern_1D(self):
''' Testing the GP regression with linear kernel on 1d data ''' ''' Testing the GP regression with linear kernel on 1d data '''
linear = GPy.kern.linear(1) linear = GPy.kern.linear(1)
self.check_model_with_white(linear, model_type='GP_regression', dimension=1) self.check_model_with_white(linear, model_type='GPRegression', dimension=1)
def test_GP_regression_linear_kern_2D(self): def test_GPRegression_linear_kern_2D(self):
''' Testing the GP regression with linear kernel on 2d data ''' ''' Testing the GP regression with linear kernel on 2d data '''
linear = GPy.kern.linear(2) linear = GPy.kern.linear(2)
self.check_model_with_white(linear, model_type='GP_regression', dimension=2) self.check_model_with_white(linear, model_type='GPRegression', dimension=2)
def test_sparse_GP_regression_rbf_white_kern_1d(self): def test_SparseGPRegression_rbf_white_kern_1d(self):
''' Testing the sparse GP regression with rbf kernel with white kernel on 1d data ''' ''' Testing the sparse GP regression with rbf kernel with white kernel on 1d data '''
rbf = GPy.kern.rbf(1) rbf = GPy.kern.rbf(1)
self.check_model_with_white(rbf, model_type='sparse_GP_regression', dimension=1) self.check_model_with_white(rbf, model_type='SparseGPRegression', dimension=1)
def test_sparse_GP_regression_rbf_white_kern_2D(self): def test_SparseGPRegression_rbf_white_kern_2D(self):
''' Testing the sparse GP regression with rbf and white kernel on 2d data ''' ''' Testing the sparse GP regression with rbf and white kernel on 2d data '''
rbf = GPy.kern.rbf(2) rbf = GPy.kern.rbf(2)
self.check_model_with_white(rbf, model_type='sparse_GP_regression', dimension=2) self.check_model_with_white(rbf, model_type='SparseGPRegression', dimension=2)
def test_GPLVM_rbf_bias_white_kern_2D(self): def test_GPLVM_rbf_bias_white_kern_2D(self):
""" Testing GPLVM with rbf + bias and white kernel """ """ Testing GPLVM with rbf + bias and white kernel """
N, input_dim, D = 50, 1, 2 N, input_dim, D = 50, 1, 2
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim, 0.5, 0.9*np.ones((1,))) + GPy.kern.bias(input_dim, 0.1) + GPy.kern.white(input_dim, 0.05) k = GPy.kern.rbf(input_dim, 0.5, 0.9 * np.ones((1,))) + GPy.kern.bias(input_dim, 0.1) + GPy.kern.white(input_dim, 0.05)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T
m = GPy.models.GPLVM(Y, input_dim, kernel = k) m = GPy.models.GPLVM(Y, input_dim, kernel=k)
m.ensure_default_constraints() m.ensure_default_constraints()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -159,42 +159,43 @@ class GradientTests(unittest.TestCase):
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim, 0.1) + GPy.kern.white(input_dim, 0.05) k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim, 0.1) + GPy.kern.white(input_dim, 0.05)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T
m = GPy.models.GPLVM(Y, input_dim, init = 'PCA', kernel = k) m = GPy.models.GPLVM(Y, input_dim, init='PCA', kernel=k)
m.ensure_default_constraints() m.ensure_default_constraints()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_GP_EP_probit(self): def test_GP_EP_probit(self):
N = 20 N = 20
X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None] X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None]
Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None] Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
kernel = GPy.kern.rbf(1) kernel = GPy.kern.rbf(1)
distribution = GPy.likelihoods.likelihood_functions.binomial() distribution = GPy.likelihoods.likelihood_functions.Binomial()
likelihood = GPy.likelihoods.EP(Y, distribution) likelihood = GPy.likelihoods.EP(Y, distribution)
m = GPy.core.GP(X, likelihood, kernel) m = GPy.core.GP(X, likelihood, kernel)
m.ensure_default_constraints() m.ensure_default_constraints()
m.update_likelihood_approximation() m.update_likelihood_approximation()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
#self.assertTrue(m.EPEM) # self.assertTrue(m.EPEM)
def test_sparse_EP_DTC_probit(self): def test_sparse_EP_DTC_probit(self):
N = 20 N = 20
X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None] X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None]
Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None] Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
Z = np.linspace(0,15,4)[:,None] Z = np.linspace(0, 15, 4)[:, None]
kernel = GPy.kern.rbf(1) kernel = GPy.kern.rbf(1)
distribution = GPy.likelihoods.likelihood_functions.binomial() distribution = GPy.likelihoods.likelihood_functions.Binomial()
likelihood = GPy.likelihoods.EP(Y, distribution) likelihood = GPy.likelihoods.EP(Y, distribution)
m = GPy.core.sparse_GP(X, likelihood, kernel,Z) m = GPy.core.SparseGP(X, likelihood, kernel, Z)
m.ensure_default_constraints() m.ensure_default_constraints()
m.update_likelihood_approximation() m.update_likelihood_approximation()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_generalized_FITC(self): def test_generalized_FITC(self):
N = 20 N = 20
X = np.hstack([np.random.rand(N/2)+1,np.random.rand(N/2)-1])[:,None] X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None]
k = GPy.kern.rbf(1) + GPy.kern.white(1) k = GPy.kern.rbf(1) + GPy.kern.white(1)
Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None] Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None]
distribution = GPy.likelihoods.likelihood_functions.binomial() distribution = GPy.likelihoods.likelihood_functions.binomial()
likelihood = GPy.likelihoods.EP(Y, distribution) likelihood = GPy.likelihoods.EP(Y, distribution)
#likelihood = GPy.inference.likelihoods.binomial(Y) #likelihood = GPy.inference.likelihoods.binomial(Y)

View file

@ -13,7 +13,7 @@ default_seed = 10000
# Some general utilities. # Some general utilities.
def sample_class(f): def sample_class(f):
p = 1. / (1. + np.exp(-f)) p = 1. / (1. + np.exp(-f))
c = np.random.binomial(1, p) c = np.random.Binomial(1, p)
c = np.where(c, 1, -1) c = np.where(c, 1, -1)
return c return c

View file

@ -1,87 +1,80 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
#tdot function courtesy of Ian Murray: # tdot function courtesy of Ian Murray:
# Iain Murray, April 2013. iain contactable via iainmurray.net # Iain Murray, April 2013. iain contactable via iainmurray.net
# http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot.py # http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot.py
import numpy as np import numpy as np
from scipy import linalg, optimize, weave from scipy import linalg, weave
import pylab as pb
import Tango
import sys
import re
import pdb
import cPickle
import types import types
import ctypes import ctypes
from ctypes import byref, c_char, c_int, c_double # TODO from ctypes import byref, c_char, c_int, c_double # TODO
#import scipy.lib.lapack # import scipy.lib.lapack
import scipy as sp
try: try:
_blaslib = ctypes.cdll.LoadLibrary(np.core._dotblas.__file__) _blaslib = ctypes.cdll.LoadLibrary(np.core._dotblas.__file__) # @UndefinedVariable
_blas_available = True _blas_available = True
except: except:
_blas_available = False _blas_available = False
def trace_dot(a,b): def trace_dot(a, b):
""" """
efficiently compute the trace of the matrix product of a and b efficiently compute the trace of the matrix product of a and b
""" """
return np.sum(a*b) return np.sum(a * b)
def mdot(*args): def mdot(*args):
"""Multiply all the arguments using matrix product rules. """Multiply all the arguments using matrix product rules.
The output is equivalent to multiplying the arguments one by one The output is equivalent to multiplying the arguments one by one
from left to right using dot(). from left to right using dot().
Precedence can be controlled by creating tuples of arguments, Precedence can be controlled by creating tuples of arguments,
for instance mdot(a,((b,c),d)) multiplies a (a*((b*c)*d)). for instance mdot(a,((b,c),d)) multiplies a (a*((b*c)*d)).
Note that this means the output of dot(a,b) and mdot(a,b) will differ if Note that this means the output of dot(a,b) and mdot(a,b) will differ if
a or b is a pure tuple of numbers. a or b is a pure tuple of numbers.
""" """
if len(args)==1: if len(args) == 1:
return args[0] return args[0]
elif len(args)==2: elif len(args) == 2:
return _mdot_r(args[0],args[1]) return _mdot_r(args[0], args[1])
else: else:
return _mdot_r(args[:-1],args[-1]) return _mdot_r(args[:-1], args[-1])
def _mdot_r(a,b): def _mdot_r(a, b):
"""Recursive helper for mdot""" """Recursive helper for mdot"""
if type(a)==types.TupleType: if type(a) == types.TupleType:
if len(a)>1: if len(a) > 1:
a = mdot(*a) a = mdot(*a)
else: else:
a = a[0] a = a[0]
if type(b)==types.TupleType: if type(b) == types.TupleType:
if len(b)>1: if len(b) > 1:
b = mdot(*b) b = mdot(*b)
else: else:
b = b[0] b = b[0]
return np.dot(a,b) return np.dot(a, b)
def jitchol(A,maxtries=5): def jitchol(A, maxtries=5):
A = np.asfortranarray(A) A = np.asfortranarray(A)
L,info = linalg.lapack.flapack.dpotrf(A,lower=1) L, info = linalg.lapack.flapack.dpotrf(A, lower=1)
if info ==0: if info == 0:
return L return L
else: else:
diagA = np.diag(A) diagA = np.diag(A)
if np.any(diagA<0.): if np.any(diagA < 0.):
raise linalg.LinAlgError, "not pd: negative diagonal elements" raise linalg.LinAlgError, "not pd: negative diagonal elements"
jitter= diagA.mean()*1e-6 jitter = diagA.mean() * 1e-6
for i in range(1,maxtries+1): for i in range(1, maxtries + 1):
print 'Warning: adding jitter of {:.10e}'.format(jitter) print 'Warning: adding jitter of {:.10e}'.format(jitter)
try: try:
return linalg.cholesky(A+np.eye(A.shape[0]).T*jitter, lower = True) return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
except: except:
jitter *= 10 jitter *= 10
raise linalg.LinAlgError,"not positive definite, even with jitter." raise linalg.LinAlgError, "not positive definite, even with jitter."
def jitchol_old(A,maxtries=5): def jitchol_old(A, maxtries=5):
""" """
:param A : An almost pd square matrix :param A : An almost pd square matrix
@ -93,20 +86,20 @@ def jitchol_old(A,maxtries=5):
np.allclose(sp.linalg.cholesky(XXT, lower = True), np.triu(sp.linalg.cho_factor(XXT)[0]).T) np.allclose(sp.linalg.cholesky(XXT, lower = True), np.triu(sp.linalg.cho_factor(XXT)[0]).T)
""" """
try: try:
return linalg.cholesky(A, lower = True) return linalg.cholesky(A, lower=True)
except linalg.LinAlgError: except linalg.LinAlgError:
diagA = np.diag(A) diagA = np.diag(A)
if np.any(diagA<0.): if np.any(diagA < 0.):
raise linalg.LinAlgError, "not pd: negative diagonal elements" raise linalg.LinAlgError, "not pd: negative diagonal elements"
jitter= diagA.mean()*1e-6 jitter = diagA.mean() * 1e-6
for i in range(1,maxtries+1): for i in range(1, maxtries + 1):
print '\rWarning: adding jitter of {:.10e} '.format(jitter), print '\rWarning: adding jitter of {:.10e} '.format(jitter),
try: try:
return linalg.cholesky(A+np.eye(A.shape[0]).T*jitter, lower = True) return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
except: except:
jitter *= 10 jitter *= 10
raise linalg.LinAlgError,"not positive definite, even with jitter." raise linalg.LinAlgError, "not positive definite, even with jitter."
def pdinv(A, *args): def pdinv(A, *args):
""" """
@ -125,7 +118,7 @@ def pdinv(A, *args):
logdet = 2.*np.sum(np.log(np.diag(L))) logdet = 2.*np.sum(np.log(np.diag(L)))
Li = chol_inv(L) Li = chol_inv(L)
Ai, _ = linalg.lapack.flapack.dpotri(L) Ai, _ = linalg.lapack.flapack.dpotri(L)
#Ai = np.tril(Ai) + np.tril(Ai,-1).T # Ai = np.tril(Ai) + np.tril(Ai,-1).T
symmetrify(Ai) symmetrify(Ai)
return Ai, L, Li, logdet return Ai, L, Li, logdet
@ -140,7 +133,7 @@ def chol_inv(L):
""" """
return linalg.lapack.flapack.dtrtri(L, lower = True)[0] return linalg.lapack.flapack.dtrtri(L, lower=True)[0]
def multiple_pdinv(A): def multiple_pdinv(A):
@ -155,11 +148,11 @@ def multiple_pdinv(A):
hld: 0.5* the log of the determinants of A hld: 0.5* the log of the determinants of A
""" """
N = A.shape[-1] N = A.shape[-1]
chols = [jitchol(A[:,:,i]) for i in range(N)] chols = [jitchol(A[:, :, i]) for i in range(N)]
halflogdets = [np.sum(np.log(np.diag(L[0]))) for L in chols] halflogdets = [np.sum(np.log(np.diag(L[0]))) for L in chols]
invs = [linalg.lapack.flapack.dpotri(L[0],True)[0] for L in chols] invs = [linalg.lapack.flapack.dpotri(L[0], True)[0] for L in chols]
invs = [np.triu(I)+np.triu(I,1).T for I in invs] invs = [np.triu(I) + np.triu(I, 1).T for I in invs]
return np.dstack(invs),np.array(halflogdets) return np.dstack(invs), np.array(halflogdets)
def PCA(Y, input_dim): def PCA(Y, input_dim):
@ -179,18 +172,18 @@ def PCA(Y, input_dim):
if not np.allclose(Y.mean(axis=0), 0.0): if not np.allclose(Y.mean(axis=0), 0.0):
print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)" print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)"
#Y -= Y.mean(axis=0) # Y -= Y.mean(axis=0)
Z = linalg.svd(Y-Y.mean(axis=0), full_matrices = False) Z = linalg.svd(Y - Y.mean(axis=0), full_matrices=False)
[X, W] = [Z[0][:,0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:,0:input_dim]] [X, W] = [Z[0][:, 0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:, 0:input_dim]]
v = X.std(axis=0) v = X.std(axis=0)
X /= v; X /= v;
W *= v; W *= v;
return X, W.T return X, W.T
def tdot_numpy(mat,out=None): def tdot_numpy(mat, out=None):
return np.dot(mat,mat.T,out) return np.dot(mat, mat.T, out)
def tdot_blas(mat, out=None): def tdot_blas(mat, out=None):
"""returns np.dot(mat, mat.T), but faster for large 2D arrays of doubles.""" """returns np.dot(mat, mat.T), but faster for large 2D arrays of doubles."""
@ -198,16 +191,16 @@ def tdot_blas(mat, out=None):
return np.dot(mat, mat.T) return np.dot(mat, mat.T)
nn = mat.shape[0] nn = mat.shape[0]
if out is None: if out is None:
out = np.zeros((nn,nn)) out = np.zeros((nn, nn))
else: else:
assert(out.dtype == 'float64') assert(out.dtype == 'float64')
assert(out.shape == (nn,nn)) assert(out.shape == (nn, nn))
# FIXME: should allow non-contiguous out, and copy output into it: # FIXME: should allow non-contiguous out, and copy output into it:
assert(8 in out.strides) assert(8 in out.strides)
# zeroing needed because of dumb way I copy across triangular answer # zeroing needed because of dumb way I copy across triangular answer
out[:] = 0.0 out[:] = 0.0
## Call to DSYRK from BLAS # # Call to DSYRK from BLAS
# If already in Fortran order (rare), and has the right sorts of strides I # If already in Fortran order (rare), and has the right sorts of strides I
# could avoid the copy. I also thought swapping to cblas API would allow use # could avoid the copy. I also thought swapping to cblas API would allow use
# of C order. However, I tried that and had errors with large matrices: # of C order. However, I tried that and had errors with large matrices:
@ -226,17 +219,17 @@ def tdot_blas(mat, out=None):
_blaslib.dsyrk_(byref(UPLO), byref(TRANS), byref(N), byref(K), _blaslib.dsyrk_(byref(UPLO), byref(TRANS), byref(N), byref(K),
byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC)) byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC))
symmetrify(out,upper=True) symmetrify(out, upper=True)
return out return out
def tdot(*args, **kwargs): def tdot(*args, **kwargs):
if _blas_available: if _blas_available:
return tdot_blas(*args,**kwargs) return tdot_blas(*args, **kwargs)
else: else:
return tdot_numpy(*args,**kwargs) return tdot_numpy(*args, **kwargs)
def DSYR_blas(A,x,alpha=1.): def DSYR_blas(A, x, alpha=1.):
""" """
Performs a symmetric rank-1 update operation: Performs a symmetric rank-1 update operation:
A <- A + alpha * np.dot(x,x.T) A <- A + alpha * np.dot(x,x.T)
@ -256,9 +249,9 @@ def DSYR_blas(A,x,alpha=1.):
INCX = c_int(1) INCX = c_int(1)
_blaslib.dsyr_(byref(UPLO), byref(N), byref(ALPHA), _blaslib.dsyr_(byref(UPLO), byref(N), byref(ALPHA),
x_, byref(INCX), A_, byref(LDA)) x_, byref(INCX), A_, byref(LDA))
symmetrify(A,upper=True) symmetrify(A, upper=True)
def DSYR_numpy(A,x,alpha=1.): def DSYR_numpy(A, x, alpha=1.):
""" """
Performs a symmetric rank-1 update operation: Performs a symmetric rank-1 update operation:
A <- A + alpha * np.dot(x,x.T) A <- A + alpha * np.dot(x,x.T)
@ -269,23 +262,23 @@ def DSYR_numpy(A,x,alpha=1.):
:param x: Nx1 np.array :param x: Nx1 np.array
:param alpha: scalar :param alpha: scalar
""" """
A += alpha*np.dot(x[:,None],x[None,:]) A += alpha * np.dot(x[:, None], x[None, :])
def DSYR(*args, **kwargs): def DSYR(*args, **kwargs):
if _blas_available: if _blas_available:
return DSYR_blas(*args,**kwargs) return DSYR_blas(*args, **kwargs)
else: else:
return DSYR_numpy(*args,**kwargs) return DSYR_numpy(*args, **kwargs)
def symmetrify(A,upper=False): def symmetrify(A, upper=False):
""" """
Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
works IN PLACE. works IN PLACE.
""" """
N,M = A.shape N, M = A.shape
assert N==M assert N == M
c_contig_code = """ c_contig_code = """
int iN; int iN;
for (int i=1; i<N; i++){ for (int i=1; i<N; i++){
@ -305,13 +298,13 @@ def symmetrify(A,upper=False):
} }
""" """
if A.flags['C_CONTIGUOUS'] and upper: if A.flags['C_CONTIGUOUS'] and upper:
weave.inline(f_contig_code,['A','N'], extra_compile_args=['-O3']) weave.inline(f_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
elif A.flags['C_CONTIGUOUS'] and not upper: elif A.flags['C_CONTIGUOUS'] and not upper:
weave.inline(c_contig_code,['A','N'], extra_compile_args=['-O3']) weave.inline(c_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
elif A.flags['F_CONTIGUOUS'] and upper: elif A.flags['F_CONTIGUOUS'] and upper:
weave.inline(c_contig_code,['A','N'], extra_compile_args=['-O3']) weave.inline(c_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
elif A.flags['F_CONTIGUOUS'] and not upper: elif A.flags['F_CONTIGUOUS'] and not upper:
weave.inline(f_contig_code,['A','N'], extra_compile_args=['-O3']) weave.inline(f_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
else: else:
if upper: if upper:
tmp = np.tril(A.T) tmp = np.tril(A.T)
@ -319,15 +312,15 @@ def symmetrify(A,upper=False):
tmp = np.tril(A) tmp = np.tril(A)
A[:] = 0.0 A[:] = 0.0
A += tmp A += tmp
A += np.tril(tmp,-1).T A += np.tril(tmp, -1).T
def symmetrify_murray(A): def symmetrify_murray(A):
A += A.T A += A.T
nn = A.shape[0] nn = A.shape[0]
A[[range(nn),range(nn)]] /= 2.0 A[[range(nn), range(nn)]] /= 2.0
def cholupdate(L,x): def cholupdate(L, x):
""" """
update the LOWER cholesky factor of a pd matrix IN PLACE update the LOWER cholesky factor of a pd matrix IN PLACE
@ -337,7 +330,7 @@ def cholupdate(L,x):
support_code = """ support_code = """
#include <math.h> #include <math.h>
""" """
code=""" code = """
double r,c,s; double r,c,s;
int j,i; int j,i;
for(j=0; j<N; j++){ for(j=0; j<N; j++){
@ -353,11 +346,11 @@ def cholupdate(L,x):
""" """
x = x.copy() x = x.copy()
N = x.size N = x.size
weave.inline(code, support_code=support_code, arg_names=['N','L','x'], type_converters=weave.converters.blitz) weave.inline(code, support_code=support_code, arg_names=['N', 'L', 'x'], type_converters=weave.converters.blitz)
def backsub_both_sides(L, X,transpose='left'): def backsub_both_sides(L, X, transpose='left'):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky""" """ Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
if transpose=='left': if transpose == 'left':
tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1) tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1)
return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T
else: else:

View file

@ -221,7 +221,7 @@ class image_show(data_show):
if not self.palette == []: # Can just show the image (self.set_image() took care of setting the palette) if not self.palette == []: # Can just show the image (self.set_image() took care of setting the palette)
self.handle = self.axes.imshow(self.vals, interpolation='nearest') self.handle = self.axes.imshow(self.vals, interpolation='nearest')
else: # Use a boring gray map. else: # Use a boring gray map.
self.handle = self.axes.imshow(self.vals, cmap=plt.cm.gray, interpolation='nearest') self.handle = self.axes.imshow(self.vals, cmap=plt.cm.gray, interpolation='nearest') # @UndefinedVariable
plt.show() plt.show()
def modify(self, vals): def modify(self, vals):