REFACTORING: model names, lowercase, classes uppercase

This commit is contained in:
Max Zwiessele 2013-06-05 13:02:03 +01:00
parent 2a39440619
commit 2e5e8ac026
50 changed files with 436 additions and 3307 deletions

View file

@ -1,588 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import sys, pdb
from GPLVM import GPLVM
from ..core import sparse_GP
from GPy.util.linalg import pdinv
from ..likelihoods import Gaussian
from .. import kern
from numpy.linalg.linalg import LinAlgError
import itertools
from matplotlib.colors import colorConverter
from matplotlib.figure import SubplotParams
from GPy.inference.optimization import SCG
from GPy.util import plot_latent
class Bayesian_GPLVM(sparse_GP, GPLVM):
"""
Bayesian Gaussian Process Latent Variable Model
:param Y: observed data (np.ndarray) or GPy.likelihood
:type Y: np.ndarray| GPy.likelihood instance
:param input_dim: latent dimensionality
:type input_dim: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
"""
def __init__(self, likelihood_or_Y, input_dim, X=None, X_variance=None, init='PCA', M=10,
Z=None, kernel=None, oldpsave=10, _debug=False,
**kwargs):
if type(likelihood_or_Y) is np.ndarray:
likelihood = Gaussian(likelihood_or_Y)
else:
likelihood = likelihood_or_Y
if X == None:
X = self.initialise_latent(init, input_dim, likelihood.Y)
self.init = init
if X_variance is None:
X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1)
if Z is None:
Z = np.random.permutation(X.copy())[:M]
assert Z.shape[1] == X.shape[1]
if kernel is None:
kernel = kern.rbf(input_dim) + kern.white(input_dim)
self.oldpsave = oldpsave
self._oldps = []
self._debug = _debug
if self._debug:
self.f_call = 0
self._count = itertools.count()
self._savedklll = []
self._savedparams = []
self._savedgradients = []
self._savederrors = []
self._savedpsiKmm = []
self._savedABCD = []
sparse_GP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs)
self._set_params(self._get_params())
@property
def oldps(self):
return self._oldps
@oldps.setter
def oldps(self, p):
if len(self._oldps) == (self.oldpsave + 1):
self._oldps.pop()
# if len(self._oldps) == 0 or not np.any([np.any(np.abs(p - op) > 1e-5) for op in self._oldps]):
self._oldps.insert(0, p.copy())
def _get_param_names(self):
X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], [])
S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], [])
return (X_names + S_names + sparse_GP._get_param_names(self))
def _get_params(self):
"""
Horizontally stacks the parameters in order to present them to the optimizer.
The resulting 1-D array has this structure:
===============================================================
| mu | S | Z | theta | beta |
===============================================================
"""
x = np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self)))
return x
def _clipped(self, x):
return x # np.clip(x, -1e300, 1e300)
def _set_params(self, x, save_old=True, save_count=0):
# try:
x = self._clipped(x)
N, input_dim = self.N, self.input_dim
self.X = x[:self.X.size].reshape(N, input_dim).copy()
self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy()
sparse_GP._set_params(self, x[(2 * N * input_dim):])
# self.oldps = x
# except (LinAlgError, FloatingPointError, ZeroDivisionError):
# print "\rWARNING: Caught LinAlgError, continueing without setting "
# if self._debug:
# self._savederrors.append(self.f_call)
# if save_count > 10:
# raise
# self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1)
def dKL_dmuS(self):
dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
dKL_dmu = self.X
return dKL_dmu, dKL_dS
def dL_dmuS(self):
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.dL_dpsi0, self.Z, self.X, self.X_variance)
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2, self.Z, self.X, self.X_variance)
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
return dL_dmu, dL_dS
def KL_divergence(self):
var_mean = np.square(self.X).sum()
var_S = np.sum(self.X_variance - np.log(self.X_variance))
return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.N
def log_likelihood(self):
ll = sparse_GP.log_likelihood(self)
kl = self.KL_divergence()
# if ll < -2E4:
# ll = -2E4 + np.random.randn()
# if kl > 5E4:
# kl = 5E4 + np.random.randn()
if self._debug:
self.f_call = self._count.next()
if self.f_call % 1 == 0:
self._savedklll.append([self.f_call, ll, kl])
self._savedparams.append([self.f_call, self._get_params()])
self._savedgradients.append([self.f_call, self._log_likelihood_gradients()])
self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]])
# sf2 = self.scale_factor ** 2
if self.likelihood.is_heteroscedastic:
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y)
# B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
else:
A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
# B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
self._savedABCD.append([self.f_call, A, B, C, D])
# print "\nkl:", kl, "ll:", ll
return ll - kl
def _log_likelihood_gradients(self):
dKL_dmu, dKL_dS = self.dKL_dmuS()
dL_dmu, dL_dS = self.dL_dmuS()
# TODO: find way to make faster
d_dmu = (dL_dmu - dKL_dmu).flatten()
d_dS = (dL_dS - dKL_dS).flatten()
# TEST KL: ====================
# d_dmu = (dKL_dmu).flatten()
# d_dS = (dKL_dS).flatten()
# ========================
# TEST L: ====================
# d_dmu = (dL_dmu).flatten()
# d_dS = (dL_dS).flatten()
# ========================
self.dbound_dmuS = np.hstack((d_dmu, d_dS))
self.dbound_dZtheta = sparse_GP._log_likelihood_gradients(self)
return self._clipped(np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta)))
def plot_latent(self, *args, **kwargs):
return plot_latent.plot_latent_indices(self, *args, **kwargs)
def do_test_latents(self, Y):
"""
Compute the latent representation for a set of new points Y
Notes:
This will only work with a univariate Gaussian likelihood (for now)
"""
assert not self.likelihood.is_heteroscedastic
N_test = Y.shape[0]
input_dim = self.Z.shape[1]
means = np.zeros((N_test, input_dim))
covars = np.zeros((N_test, input_dim))
dpsi0 = -0.5 * self.D * self.likelihood.precision
dpsi2 = self.dL_dpsi2[0][None, :, :] # TODO: this may change if we ignore het. likelihoods
V = self.likelihood.precision * Y
dpsi1 = np.dot(self.Cpsi1V, V.T)
start = np.zeros(self.input_dim * 2)
for n, dpsi1_n in enumerate(dpsi1.T[:, :, None]):
args = (self.kern, self.Z, dpsi0, dpsi1_n, dpsi2)
xopt, fopt, neval, status = SCG(f=latent_cost, gradf=latent_grad, x=start, optargs=args, display=False)
mu, log_S = xopt.reshape(2, 1, -1)
means[n] = mu[0].copy()
covars[n] = np.exp(log_S[0]).copy()
return means, covars
def plot_X_1d(self, fignum=None, ax=None, colors=None):
"""
Plot latent space X in 1D:
-if fig is given, create input_dim subplots in fig and plot in these
-if ax is given plot input_dim 1D latent space plots of X into each `axis`
-if neither fig nor ax is given create a figure with fignum and plot in there
colors:
colors of different latent space dimensions input_dim
"""
import pylab
if ax is None:
fig = pylab.figure(num=fignum, figsize=(8, min(12, (2 * self.X.shape[1]))))
if colors is None:
colors = pylab.gca()._get_lines.color_cycle
pylab.clf()
else:
colors = iter(colors)
plots = []
x = np.arange(self.X.shape[0])
for i in range(self.X.shape[1]):
if ax is None:
a = fig.add_subplot(self.X.shape[1], 1, i + 1)
elif isinstance(ax, (tuple, list)):
a = ax[i]
else:
raise ValueError("Need one ax per latent dimnesion input_dim")
a.plot(self.X, c='k', alpha=.3)
plots.extend(a.plot(x, self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
a.fill_between(x,
self.X.T[i] - 2 * np.sqrt(self.X_variance.T[i]),
self.X.T[i] + 2 * np.sqrt(self.X_variance.T[i]),
facecolor=plots[-1].get_color(),
alpha=.3)
a.legend(borderaxespad=0.)
a.set_xlim(x.min(), x.max())
if i < self.X.shape[1] - 1:
a.set_xticklabels('')
pylab.draw()
fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
return fig
def __getstate__(self):
return (self.likelihood, self.input_dim, self.X, self.X_variance,
self.init, self.M, self.Z, self.kern,
self.oldpsave, self._debug)
def __setstate__(self, state):
self.__init__(*state)
def _debug_filter_params(self, x):
start, end = 0, self.X.size,
X = x[start:end].reshape(self.N, self.input_dim)
start, end = end, end + self.X_variance.size
X_v = x[start:end].reshape(self.N, self.input_dim)
start, end = end, end + (self.M * self.input_dim)
Z = x[start:end].reshape(self.M, self.input_dim)
start, end = end, end + self.input_dim
theta = x[start:]
return X, X_v, Z, theta
def _debug_get_axis(self, figs):
if figs[-1].axes:
ax1 = figs[-1].axes[0]
ax1.cla()
else:
ax1 = figs[-1].add_subplot(111)
return ax1
def _debug_plot(self):
assert self._debug, "must enable _debug, to debug-plot"
import pylab
# from mpl_toolkits.mplot3d import Axes3D
figs = [pylab.figure('BGPLVM DEBUG', figsize=(12, 4))]
# fig.clf()
# log like
# splotshape = (6, 4)
# ax1 = pylab.subplot2grid(splotshape, (0, 0), 1, 4)
ax1 = self._debug_get_axis(figs)
ax1.text(.5, .5, "Optimization", alpha=.3, transform=ax1.transAxes,
ha='center', va='center')
kllls = np.array(self._savedklll)
LL, = ax1.plot(kllls[:, 0], kllls[:, 1] - kllls[:, 2], '-', label=r'$\log p(\mathbf{Y})$', mew=1.5)
KL, = ax1.plot(kllls[:, 0], kllls[:, 2], '-', label=r'$\mathcal{KL}(p||q)$', mew=1.5)
L, = ax1.plot(kllls[:, 0], kllls[:, 1], '-', label=r'$L$', mew=1.5) # \mathds{E}_{q(\mathbf{X})}[p(\mathbf{Y|X})\frac{p(\mathbf{X})}{q(\mathbf{X})}]
param_dict = dict(self._savedparams)
gradient_dict = dict(self._savedgradients)
# kmm_dict = dict(self._savedpsiKmm)
iters = np.array(param_dict.keys())
ABCD_dict = np.array(self._savedABCD)
self.showing = 0
# ax2 = pylab.subplot2grid(splotshape, (1, 0), 2, 4)
figs.append(pylab.figure("BGPLVM DEBUG X", figsize=(12, 4)))
ax2 = self._debug_get_axis(figs)
ax2.text(.5, .5, r"$\mathbf{X}$", alpha=.5, transform=ax2.transAxes,
ha='center', va='center')
figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(0, 0, 1, .86))
# ax3 = pylab.subplot2grid(splotshape, (3, 0), 2, 4, sharex=ax2)
figs.append(pylab.figure("BGPLVM DEBUG S", figsize=(12, 4)))
ax3 = self._debug_get_axis(figs)
ax3.text(.5, .5, r"$\mathbf{S}$", alpha=.5, transform=ax3.transAxes,
ha='center', va='center')
figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(0, 0, 1, .86))
# ax4 = pylab.subplot2grid(splotshape, (5, 0), 2, 2)
figs.append(pylab.figure("BGPLVM DEBUG Z", figsize=(6, 4)))
ax4 = self._debug_get_axis(figs)
ax4.text(.5, .5, r"$\mathbf{Z}$", alpha=.5, transform=ax4.transAxes,
ha='center', va='center')
figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(0, 0, 1, .86))
# ax5 = pylab.subplot2grid(splotshape, (5, 2), 2, 2)
figs.append(pylab.figure("BGPLVM DEBUG theta", figsize=(6, 4)))
ax5 = self._debug_get_axis(figs)
ax5.text(.5, .5, r"${\theta}$", alpha=.5, transform=ax5.transAxes,
ha='center', va='center')
figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(.15, 0, 1, .86))
# figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6)))
# fig = figs[-1]
# ax6 = fig.add_subplot(121)
# ax6.text(.5, .5, r"${\mathbf{K}_{mm}}$", color='magenta', alpha=.5, transform=ax6.transAxes,
# ha='center', va='center')
# ax7 = fig.add_subplot(122)
# ax7.text(.5, .5, r"${\frac{dL}{dK_{mm}}}$", color='magenta', alpha=.5, transform=ax7.transAxes,
# ha='center', va='center')
figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6)))
fig = figs[-1]
ax8 = fig.add_subplot(121)
ax8.text(.5, .5, r"${\mathbf{A,B,C,D}}$", color='k', alpha=.5, transform=ax8.transAxes,
ha='center', va='center')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 1], label='A')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 2], label='B')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 3], label='C')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 4], label='D')
ax8.legend()
figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(.15, 0, 1, .86))
X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
# Xg, Sg, Zg, thetag = -Xg, -Sg, -Zg, -thetag
quiver_units = 'xy'
quiver_scale = 1
quiver_scale_units = 'xy'
Xlatentplts = ax2.plot(X, ls="-", marker="x")
colors = colorConverter.to_rgba_array([p.get_color() for p in Xlatentplts], .4)
Ulatent = np.zeros_like(X)
xlatent = np.tile(np.arange(0, X.shape[0])[:, None], X.shape[1])
Xlatentgrads = ax2.quiver(xlatent, X, Ulatent, Xg, color=colors,
units=quiver_units, scale_units=quiver_scale_units,
scale=quiver_scale)
Slatentplts = ax3.plot(S, ls="-", marker="x")
Slatentgrads = ax3.quiver(xlatent, S, Ulatent, Sg, color=colors,
units=quiver_units, scale_units=quiver_scale_units,
scale=quiver_scale)
ax3.set_ylim(0, 1.)
xZ = np.tile(np.arange(0, Z.shape[0])[:, None], Z.shape[1])
UZ = np.zeros_like(Z)
Zplts = ax4.plot(Z, ls="-", marker="x")
Zgrads = ax4.quiver(xZ, Z, UZ, Zg, color=colors,
units=quiver_units, scale_units=quiver_scale_units,
scale=quiver_scale)
xtheta = np.arange(len(theta))
Utheta = np.zeros_like(theta)
thetaplts = ax5.bar(xtheta - .4, theta, color=colors)
thetagrads = ax5.quiver(xtheta, theta, Utheta, thetag, color=colors,
units=quiver_units, scale_units=quiver_scale_units,
scale=quiver_scale,
edgecolors=('k',), linewidths=[1])
pylab.setp(thetaplts, zorder=0)
pylab.setp(thetagrads, zorder=10)
ax5.set_xticks(np.arange(len(theta)))
ax5.set_xticklabels(self._get_param_names()[-len(theta):], rotation=17)
# imkmm = ax6.imshow(kmm_dict[self.showing][0])
# from mpl_toolkits.axes_grid1 import make_axes_locatable
# divider = make_axes_locatable(ax6)
# caxkmm = divider.append_axes("right", "5%", pad="1%")
# cbarkmm = pylab.colorbar(imkmm, cax=caxkmm)
#
# imkmmdl = ax7.imshow(kmm_dict[self.showing][1])
# divider = make_axes_locatable(ax7)
# caxkmmdl = divider.append_axes("right", "5%", pad="1%")
# cbarkmmdl = pylab.colorbar(imkmmdl, cax=caxkmmdl)
# input_dimleg = ax1.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
# loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.15, 1, 1.15),
# borderaxespad=0, mode="expand")
ax2.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.1, 1, 1.1),
borderaxespad=0, mode="expand")
ax3.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.1, 1, 1.1),
borderaxespad=0, mode="expand")
ax4.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.1, 1, 1.1),
borderaxespad=0, mode="expand")
ax5.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.1, 1, 1.1),
borderaxespad=0, mode="expand")
Lleg = ax1.legend()
Lleg.draggable()
# ax1.add_artist(input_dimleg)
indicatorKL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 2], 'o', c=KL.get_color())
indicatorLL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1] - kllls[self.showing, 2], 'o', c=LL.get_color())
indicatorL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1], 'o', c=L.get_color())
# for err in self._savederrors:
# if err < kllls.shape[0]:
# ax1.scatter(kllls[err, 0], kllls[err, 2], s=50, marker=(5, 2), c=KL.get_color())
# ax1.scatter(kllls[err, 0], kllls[err, 1] - kllls[err, 2], s=50, marker=(5, 2), c=LL.get_color())
# ax1.scatter(kllls[err, 0], kllls[err, 1], s=50, marker=(5, 2), c=L.get_color())
# try:
# for f in figs:
# f.canvas.draw()
# f.tight_layout(box=(0, .15, 1, .9))
# # pylab.draw()
# # pylab.tight_layout(box=(0, .1, 1, .9))
# except:
# pass
# parameter changes
# ax2 = pylab.subplot2grid((4, 1), (1, 0), 3, 1, projection='3d')
button_options = [0, 0] # [0]: clicked -- [1]: dragged
def update_plots(event):
if button_options[0] and not button_options[1]:
# event.button, event.x, event.y, event.xdata, event.ydata)
tmp = np.abs(iters - event.xdata)
closest_hit = iters[tmp == tmp.min()][0]
if closest_hit != self.showing:
self.showing = closest_hit
# print closest_hit, iters, event.xdata
indicatorLL.set_data(self.showing, kllls[self.showing, 1] - kllls[self.showing, 2])
indicatorKL.set_data(self.showing, kllls[self.showing, 2])
indicatorL.set_data(self.showing, kllls[self.showing, 1])
X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
# Xg, Sg, Zg, thetag = -Xg, -Sg, -Zg, -thetag
for i, Xlatent in enumerate(Xlatentplts):
Xlatent.set_ydata(X[:, i])
Xlatentgrads.set_offsets(np.array([xlatent.ravel(), X.ravel()]).T)
Xlatentgrads.set_UVC(Ulatent, Xg)
for i, Slatent in enumerate(Slatentplts):
Slatent.set_ydata(S[:, i])
Slatentgrads.set_offsets(np.array([xlatent.ravel(), S.ravel()]).T)
Slatentgrads.set_UVC(Ulatent, Sg)
for i, Zlatent in enumerate(Zplts):
Zlatent.set_ydata(Z[:, i])
Zgrads.set_offsets(np.array([xZ.ravel(), Z.ravel()]).T)
Zgrads.set_UVC(UZ, Zg)
for p, t in zip(thetaplts, theta):
p.set_height(t)
thetagrads.set_offsets(np.array([xtheta.ravel(), theta.ravel()]).T)
thetagrads.set_UVC(Utheta, thetag)
# imkmm.set_data(kmm_dict[self.showing][0])
# imkmm.autoscale()
# cbarkmm.update_normal(imkmm)
#
# imkmmdl.set_data(kmm_dict[self.showing][1])
# imkmmdl.autoscale()
# cbarkmmdl.update_normal(imkmmdl)
ax2.relim()
# ax3.relim()
ax4.relim()
ax5.relim()
ax2.autoscale()
# ax3.autoscale()
ax4.autoscale()
ax5.autoscale()
[fig.canvas.draw() for fig in figs]
button_options[0] = 0
button_options[1] = 0
def onclick(event):
if event.inaxes is ax1 and event.button == 1:
button_options[0] = 1
def motion(event):
if button_options[0]:
button_options[1] = 1
cidr = figs[0].canvas.mpl_connect('button_release_event', update_plots)
cidp = figs[0].canvas.mpl_connect('button_press_event', onclick)
cidd = figs[0].canvas.mpl_connect('motion_notify_event', motion)
return ax1, ax2, ax3, ax4, ax5 # , ax6, ax7
def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
"""
objective function for fitting the latent variables for test points
(negative log-likelihood: should be minimised!)
"""
mu, log_S = mu_S.reshape(2, 1, -1)
S = np.exp(log_S)
psi0 = kern.psi0(Z, mu, S)
psi1 = kern.psi1(Z, mu, S)
psi2 = kern.psi2(Z, mu, S)
lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S)
mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S)
mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S)
dmu = mu0 + mu1 + mu2 - mu
# dS = S0 + S1 + S2 -0.5 + .5/S
dlnS = S * (S0 + S1 + S2 - 0.5) + .5
return -lik, -np.hstack((dmu.flatten(), dlnS.flatten()))
def latent_cost(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
"""
objective function for fitting the latent variables (negative log-likelihood: should be minimised!)
This is the same as latent_cost_and_grad but only for the objective
"""
mu, log_S = mu_S.reshape(2, 1, -1)
S = np.exp(log_S)
psi0 = kern.psi0(Z, mu, S)
psi1 = kern.psi1(Z, mu, S)
psi2 = kern.psi2(Z, mu, S)
lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
return -float(lik)
def latent_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
"""
This is the same as latent_cost_and_grad but only for the grad
"""
mu, log_S = mu_S.reshape(2, 1, -1)
S = np.exp(log_S)
mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S)
mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S)
mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S)
dmu = mu0 + mu1 + mu2 - mu
# dS = S0 + S1 + S2 -0.5 + .5/S
dlnS = S * (S0 + S1 + S2 - 0.5) + .5
return -np.hstack((dmu.flatten(), dlnS.flatten()))

View file

@ -1,252 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, tdot, symmetrify,pdinv
from ..util.plot import gpplot
from .. import kern
from scipy import stats, linalg
from ..core import sparse_GP
def backsub_both_sides(L,X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1)
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T
class FITC(sparse_GP):
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
super(FITC, self).__init__(X, likelihood, kernel, normalize_X=normalize_X)
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
this function does nothing
Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in sparse_GP.
The true precison is now 'true_precision' not 'precision'.
"""
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP
def _computations(self):
#factor Kmm
self.Lm = jitchol(self.Kmm)
self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1)
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1).copy()
self.Diag0 = self.psi0 - np.diag(self.Qnn)
self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #Includes Diag0 in the precision
self.V_star = self.beta_star * self.likelihood.Y
# The rather complex computations of self.A
if self.has_uncertain_inputs:
raise NotImplementedError
else:
if self.likelihood.is_heteroscedastic:
assert self.likelihood.D == 1
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.N)))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
# factor B
self.B = np.eye(self.M) + self.A
self.LB = jitchol(self.B)
self.LBi = chol_inv(self.LB)
self.psi1V = np.dot(self.psi1, self.V_star)
Lmi_psi1V, info = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(Lmi_psi1V), lower=1, trans=0)
Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1)
b_psi1_Ki = self.beta_star * Kmmipsi1.T
Ki_pbp_Ki = np.dot(Kmmipsi1,b_psi1_Ki)
Kmmi = np.dot(self.Lmi.T,self.Lmi)
LBiLmi = np.dot(self.LBi,self.Lmi)
LBL_inv = np.dot(LBiLmi.T,LBiLmi)
VVT = np.outer(self.V_star,self.V_star)
VV_p_Ki = np.dot(VVT,Kmmipsi1.T)
Ki_pVVp_Ki = np.dot(Kmmipsi1,VV_p_Ki)
psi1beta = self.psi1*self.beta_star.T
H = self.Kmm + mdot(self.psi1,psi1beta.T)
LH = jitchol(H)
LHi = chol_inv(LH)
Hi = np.dot(LHi.T,LHi)
betapsi1TLmiLBi = np.dot(psi1beta.T,LBiLmi.T)
alpha = np.array([np.dot(a.T,a) for a in betapsi1TLmiLBi])[:,None]
gamma_1 = mdot(VVT,self.psi1.T,Hi)
pHip = mdot(self.psi1.T,Hi,self.psi1)
gamma_2 = mdot(self.beta_star*pHip,self.V_star)
gamma_3 = self.V_star * gamma_2
self._dL_dpsi0 = -0.5 * self.beta_star#dA_dpsi0: logdet(self.beta_star)
self._dL_dpsi0 += .5 * self.V_star**2 #dA_psi0: yT*beta_star*y
self._dL_dpsi0 += .5 *alpha #dC_dpsi0
self._dL_dpsi0 += 0.5*mdot(self.beta_star*pHip,self.V_star)**2 - self.V_star * mdot(self.V_star.T,pHip*self.beta_star).T #dD_dpsi0
self._dL_dpsi1 = b_psi1_Ki.copy() #dA_dpsi1: logdet(self.beta_star)
self._dL_dpsi1 += -np.dot(psi1beta.T,LBL_inv) #dC_dpsi1
self._dL_dpsi1 += gamma_1 - mdot(psi1beta.T,Hi,self.psi1,gamma_1) #dD_dpsi1
self._dL_dKmm = -0.5 * np.dot(Kmmipsi1,b_psi1_Ki) #dA_dKmm: logdet(self.beta_star)
self._dL_dKmm += .5*(LBL_inv - Kmmi) + mdot(LBL_inv,psi1beta,Kmmipsi1.T) #dC_dKmm
self._dL_dKmm += -.5 * mdot(Hi,self.psi1,gamma_1) #dD_dKmm
self._dpsi1_dtheta = 0
self._dpsi1_dX = 0
self._dKmm_dtheta = 0
self._dKmm_dX = 0
self._dpsi1_dX_jkj = 0
self._dpsi1_dtheta_jkj = 0
for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.N),self.V_star,alpha,gamma_2,gamma_3):
K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T)
#Diag_dpsi1 = Diag_dA_dpsi1: yT*beta_star*y + Diag_dC_dpsi1 +Diag_dD_dpsi1
_dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:]
#Diag_dKmm = Diag_dA_dKmm: yT*beta_star*y +Diag_dC_dKmm +Diag_dD_dKmm
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z)
self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z)
self._dKmm_dX += 2.*self.kern.dK_dX(_dKmm ,self.Z)
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:])
# the partial derivative vector for the likelihood
if self.likelihood.Nparams == 0:
# save computation here.
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented"
else:
# likelihood is not heterscedatic
dbstar_dnoise = self.likelihood.precision * (self.beta_star**2 * self.Diag0[:,None] - self.beta_star)
Lmi_psi1 = mdot(self.Lmi,self.psi1)
LBiLmipsi1 = np.dot(self.LBi,Lmi_psi1)
aux_0 = np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_2 = np.dot(LBiLmipsi1.T,self._LBi_Lmi_psi1V)
dA_dnoise = 0.5 * self.D * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.D * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T)
alpha = mdot(LBiLmipsi1,self.V_star)
alpha_ = mdot(LBiLmipsi1.T,alpha)
dD_dnoise_2 = -0.5 * self.D * np.sum(alpha_**2 * dbstar_dnoise )
dD_dnoise_1 = mdot(self.V_star.T,self.psi1.T,self.Lmi.T,self.LBi.T,self.LBi,self.Lmi,self.psi1,dbstar_dnoise*self.likelihood.Y)
dD_dnoise_2 = 0.5*mdot(self.V_star.T,self.psi1.T,Hi,self.psi1,dbstar_dnoise*self.psi1.T,Hi,self.psi1,self.V_star)
dD_dnoise = dD_dnoise_1 + dD_dnoise_2
self.partial_for_likelihood = dA_dnoise + dC_dnoise + dD_dnoise
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
C = -self.D * (np.sum(np.log(np.diag(self.LB))))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + C + D
def _log_likelihood_gradients(self):
pass
return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
def dL_dtheta(self):
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0,self.X)
dL_dtheta += self.kern.dK_dtheta(self._dL_dpsi1,self.X,self.Z)
dL_dtheta += self.kern.dK_dtheta(self._dL_dKmm,X=self.Z)
dL_dtheta += self._dKmm_dtheta
dL_dtheta += self._dpsi1_dtheta
return dL_dtheta
def dL_dZ(self):
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X)
dL_dZ += 2. * self.kern.dK_dX(self._dL_dKmm,X=self.Z)
dL_dZ += self._dpsi1_dX
dL_dZ += self._dKmm_dX
return dL_dZ
def _raw_predict(self, Xnew, which_parts, full_cov=False):
if self.likelihood.is_heteroscedastic:
Iplus_Dprod_i = 1./(1.+ self.Diag0 * self.likelihood.precision.flatten())
self.Diag = self.Diag0 * Iplus_Dprod_i
self.P = Iplus_Dprod_i[:,None] * self.psi1.T
self.RPT0 = np.dot(self.Lmi,self.psi1)
self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T))
self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
self.w = self.Diag * self.likelihood.v_tilde
self.Gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde))
self.mu = self.w + np.dot(self.P,self.Gamma)
"""
Make a prediction for the generalized FITC model
Arguments
---------
X : Input prediction data - Nx1 numpy array (floats)
"""
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
# Ci = I + (RPT0)Di(RPT0).T
# C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T
# = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T
# = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
# = I - V.T * V
U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1)
C = np.eye(self.M) - np.dot(V.T,V)
mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:])
#self.C = C
#self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
#self.mu_u = mu_u
#self.U = U
# q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T)
mu_H = np.dot(mu_u,self.mu)
self.mu_H = mu_H
Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
KR0T = np.dot(Kx.T,self.Lmi.T)
mu_star = np.dot(KR0T,mu_H)
if full_cov:
Kxx = self.kern.K(Xnew,which_parts=which_parts)
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
else:
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None]
return mu_star[:,None],var
else:
raise NotImplementedError, "homoscedastic fitc not implemented"
"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu,var[:,None]
"""

View file

@ -1,67 +0,0 @@
### Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import sys, pdb
from .. import kern
from ..core import model
from ..util.linalg import pdinv, PCA
from ..core import GP
from ..likelihoods import Gaussian
from .. import util
from GPy.util import plot_latent
class GPLVM(GP):
"""
Gaussian Process Latent Variable Model
:param Y: observed data
:type Y: np.ndarray
:param input_dim: latent dimensionality
:type input_dim: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
"""
def __init__(self, Y, input_dim, init='PCA', X = None, kernel=None, normalize_Y=False):
if X is None:
X = self.initialise_latent(init, input_dim, Y)
if kernel is None:
kernel = kern.rbf(input_dim, ARD=input_dim>1) + kern.bias(input_dim, np.exp(-2)) + kern.white(input_dim, np.exp(-2))
likelihood = Gaussian(Y, normalize=normalize_Y)
GP.__init__(self, X, likelihood, kernel, normalize_X=False)
self._set_params(self._get_params())
def initialise_latent(self, init, input_dim, Y):
if init == 'PCA':
return PCA(Y, input_dim)[0]
else:
return np.random.randn(Y.shape[0], input_dim)
def _get_param_names(self):
return sum([['X_%i_%i'%(n,q) for q in range(self.input_dim)] for n in range(self.N)],[]) + GP._get_param_names(self)
def _get_params(self):
return np.hstack((self.X.flatten(), GP._get_params(self)))
def _set_params(self,x):
self.X = x[:self.N*self.input_dim].reshape(self.N,self.input_dim).copy()
GP._set_params(self, x[self.X.size:])
def _log_likelihood_gradients(self):
dL_dX = 2.*self.kern.dK_dX(self.dL_dK,self.X)
return np.hstack((dL_dX.flatten(),GP._log_likelihood_gradients(self)))
def plot(self):
assert self.likelihood.Y.shape[1]==2
pb.scatter(self.likelihood.Y[:,0],self.likelihood.Y[:,1],40,self.X[:,0].copy(),linewidth=0,cmap=pb.cm.jet)
Xnew = np.linspace(self.X.min(),self.X.max(),200)[:,None]
mu, var, upper, lower = self.predict(Xnew)
pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5)
def plot_latent(self, *args, **kwargs):
return util.plot_latent.plot_latent(self, *args, **kwargs)

View file

@ -1,41 +0,0 @@
# Copyright (c) 2013, Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import GP
from .. import likelihoods
from .. import kern
class GP_classification(GP):
"""
Gaussian Process classification
This is a thin wrapper around the models.GP class, with a set of sensible defalts
:param X: input observations
:param Y: observed values
:param likelihood: a GPy likelihood, defaults to binomial with probit link_function
:param kernel: a GPy kernel, defaults to rbf
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self,X,Y=None,likelihood=None,kernel=None,normalize_X=False,normalize_Y=False):
if kernel is None:
kernel = kern.rbf(X.shape[1])
if likelihood is None:
distribution = likelihoods.likelihood_functions.binomial()
likelihood = likelihoods.EP(Y, distribution)
elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()):
raise Warning, 'likelihood.data and Y are different.'
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self._set_params(self._get_params())

View file

@ -1,35 +0,0 @@
# Copyright (c) 2012, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import GP
from .. import likelihoods
from .. import kern
class GP_regression(GP):
"""
Gaussian Process model for regression
This is a thin wrapper around the models.GP class, with a set of sensible defalts
:param X: input observations
:param Y: observed values
:param kernel: a GPy kernel, defaults to rbf
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False):
if kernel is None:
kernel = kern.rbf(X.shape[1])
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self._set_params(self._get_params())

View file

@ -2,14 +2,12 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from GP_regression import GP_regression
from GP_classification import GP_classification
from sparse_GP_regression import sparse_GP_regression
from sparse_GP_classification import sparse_GP_classification
from GPLVM import GPLVM
from warped_GP import warpedGP
from sparse_GPLVM import sparse_GPLVM
from Bayesian_GPLVM import Bayesian_GPLVM
from gp_regression import GPRegression
from sparse_gp_regression import SparseGPRegression
from gplvm import GPLVM
from warped_gp import WarpedGP
from bayesian_gplvm import BayesianGPLVM
from mrd import MRD
from generalized_FITC import generalized_FITC
from FITC import FITC
from generalized_fitc import GeneralizedFITC
from fitc import FITC

View file

@ -1,221 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
from ..util.plot import gpplot
from .. import kern
from scipy import stats, linalg
from ..core import sparse_GP
def backsub_both_sides(L,X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1)
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T
class generalized_FITC(sparse_GP):
"""
Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC.
:param X: inputs
:type X: np.ndarray (N x input_dim)
:param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP)
:param kernel : the kernel/covariance function. See link kernels
:type kernel: a GPy kernel
:param X_variance: The variance in the measurements of X (Gaussian variance)
:type X_variance: np.ndarray (N x input_dim) | None
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x input_dim) | None
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
"""
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
self.Z = Z
self.M = self.Z.shape[0]
self.true_precision = likelihood.precision
super(generalized_FITC, self).__init__(X, likelihood, kernel=kernel, Z=self.Z, X_variance=X_variance, normalize_X=normalize_X)
self._set_params(self._get_params())
def _set_params(self, p):
self.Z = p[:self.M*self.input_dim].reshape(self.M, self.input_dim)
self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
self._compute_kernel_matrices()
self._computations()
self._FITC_computations()
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
this function does nothing
Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in sparse_GP.
The true precison is now 'true_precision' not 'precision'.
"""
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self.true_precision = self.likelihood.precision # Save the true precision
self.likelihood.precision = self.true_precision/(1. + self.true_precision*self.Diag0[:,None]) # Add the diagonal element of the FITC approximation
self._set_params(self._get_params()) # update the GP
def _FITC_computations(self):
"""
FITC approximation doesn't have the correction term in the log-likelihood bound,
but adds a diagonal term to the covariance matrix: diag(Knn - Qnn).
This function:
- computes the FITC diagonal term
- removes the extra terms computed in the sparse_GP approximation
- computes the likelihood gradients wrt the true precision.
"""
#NOTE the true precison is now 'true_precision' not 'precision'
if self.likelihood.is_heteroscedastic:
# Compute generalized FITC's diagonal term of the covariance
self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1)
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1)
#self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm)
#self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1)
#a = kj
self.Diag0 = self.psi0 - np.diag(self.Qnn)
Iplus_Dprod_i = 1./(1.+ self.Diag0 * self.true_precision.flatten())
self.Diag = self.Diag0 * Iplus_Dprod_i
self.P = Iplus_Dprod_i[:,None] * self.psi1.T
self.RPT0 = np.dot(self.Lmi,self.psi1)
self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T))
self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
self.w = self.Diag * self.likelihood.v_tilde
self.Gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde))
self.mu = self.w + np.dot(self.P,self.Gamma)
# Remove extra term from dL_dpsi1
self.dL_dpsi1 -= mdot(self.Lmi.T,Lmipsi1*self.likelihood.precision.flatten().reshape(1,self.N))
#self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm)
#self.dL_dpsi1 -= mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
#########333333
#self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
#########333333
else:
raise NotImplementedError, "homoscedastic fitc not implemented"
# Remove extra term from dL_dpsi1
#self.dL_dpsi1 += -mdot(self.Kmmi,self.psi1*self.likelihood.precision) #dB
sf = self.scale_factor
sf2 = sf**2
# Remove extra term from dL_dKmm
self.dL_dKmm += 0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
self.dL_dpsi0 = None
#the partial derivative vector for the likelihood
if self.likelihood.Nparams == 0:
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedastic derivates not implemented"
else:
raise NotImplementedError, "homoscedastic derivatives not implemented"
#likelihood is not heterscedatic
#self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2
#self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision
#self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))
#TODO partial derivative vector for the likelihood not implemented
def dL_dtheta(self):
"""
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
"""
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z)
if self.has_uncertain_inputs:
raise NotImplementedError, "heteroscedatic derivates not implemented"
else:
#NOTE in sparse_GP this would include the gradient wrt psi0
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1,self.Z,self.X)
return dL_dtheta
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor**2
if self.likelihood.is_heteroscedastic:
A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
else:
A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT
C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.M*np.log(sf2))
#C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
D = 0.5*np.sum(np.square(self._LBi_Lmi_psi1V))
#self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
#D_ = 0.5*np.trace(self.Cpsi1VVpsi1)
return A+C+D
def _raw_predict(self, Xnew, which_parts, full_cov=False):
if self.likelihood.is_heteroscedastic:
"""
Make a prediction for the generalized FITC model
Arguments
---------
X : Input prediction data - Nx1 numpy array (floats)
"""
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
# Ci = I + (RPT0)Di(RPT0).T
# C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T
# = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T
# = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
# = I - V.T * V
U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1)
C = np.eye(self.M) - np.dot(V.T,V)
mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:])
#self.C = C
#self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
#self.mu_u = mu_u
#self.U = U
# q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T)
mu_H = np.dot(mu_u,self.mu)
self.mu_H = mu_H
Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
KR0T = np.dot(Kx.T,self.Lmi.T)
mu_star = np.dot(KR0T,mu_H)
if full_cov:
Kxx = self.kern.K(Xnew,which_parts=which_parts)
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
else:
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
Kxx_ = self.kern.K(Xnew,which_parts=which_parts) # TODO: RA, is this line needed?
var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) # TODO: RA, is this line needed?
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None]
return mu_star[:,None],var
else:
raise NotImplementedError, "homoscedastic fitc not implemented"
"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu,var[:,None]
"""

View file

@ -4,14 +4,13 @@ Created on 10 Apr 2013
@author: Max Zwiessele
'''
from GPy.core import model
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
from GPy.core import sparse_GP
from GPy.core import SparseGP
from GPy.util.linalg import PCA
from scipy import linalg
import numpy
import itertools
import pylab
from GPy.kern.kern import kern
from GPy.models.bayesian_gplvm import BayesianGPLVM
class MRD(model):
"""
@ -38,7 +37,7 @@ class MRD(model):
*concat: PCA on concatenated outputs
*single: PCA on each output
*random: random
:param M:
:param num_inducing:
number of inducing inputs to use
:param Z:
initial inducing inputs
@ -62,22 +61,22 @@ class MRD(model):
assert not ('kernel' in kw), "pass kernels through `kernels` argument"
self.input_dim = input_dim
self.M = M
self.num_inducing = M
self._debug = _debug
self._init = True
X = self._init_X(initx, likelihood_or_Y_list)
Z = self._init_Z(initz, X)
self.bgplvms = [Bayesian_GPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, M=self.M, **kw) for l, k in zip(likelihood_or_Y_list, kernels)]
self.bgplvms = [BayesianGPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, M=self.num_inducing, **kw) for l, k in zip(likelihood_or_Y_list, kernels)]
del self._init
self.gref = self.bgplvms[0]
nparams = numpy.array([0] + [sparse_GP._get_params(g).size - g.Z.size for g in self.bgplvms])
nparams = numpy.array([0] + [SparseGP._get_params(g).size - g.Z.size for g in self.bgplvms])
self.nparams = nparams.cumsum()
self.N = self.gref.N
self.NQ = self.N * self.input_dim
self.MQ = self.M * self.input_dim
self.MQ = self.num_inducing * self.input_dim
model.__init__(self) # @UndefinedVariable
self._set_params(self._get_params())
@ -151,7 +150,7 @@ class MRD(model):
itertools.izip(ns,
itertools.repeat(name)))
return list(itertools.chain(n1var, *(map_names(\
sparse_GP._get_param_names(g)[self.MQ:], n) \
SparseGP._get_param_names(g)[self.MQ:], n) \
for g, n in zip(self.bgplvms, self.names))))
def _get_params(self):
@ -165,14 +164,14 @@ class MRD(model):
X = self.gref.X.ravel()
X_var = self.gref.X_variance.ravel()
Z = self.gref.Z.ravel()
thetas = [sparse_GP._get_params(g)[g.Z.size:] for g in self.bgplvms]
thetas = [SparseGP._get_params(g)[g.Z.size:] for g in self.bgplvms]
params = numpy.hstack([X, X_var, Z, numpy.hstack(thetas)])
return params
# def _set_var_params(self, g, X, X_var, Z):
# g.X = X.reshape(self.N, self.input_dim)
# g.X_variance = X_var.reshape(self.N, self.input_dim)
# g.Z = Z.reshape(self.M, self.input_dim)
# g.Z = Z.reshape(self.num_inducing, self.input_dim)
#
# def _set_kern_params(self, g, p):
# g.kern._set_params(p[:g.kern.Nparam])
@ -206,7 +205,7 @@ class MRD(model):
def log_likelihood(self):
ll = -self.gref.KL_divergence()
for g in self.bgplvms:
ll += sparse_GP.log_likelihood(g)
ll += SparseGP.log_likelihood(g)
return ll
def _log_likelihood_gradients(self):
@ -215,7 +214,7 @@ class MRD(model):
dLdmu -= dKLmu
dLdS -= dKLdS
dLdmuS = numpy.hstack((dLdmu.flatten(), dLdS.flatten())).flatten()
dldzt1 = reduce(lambda a, b: a + b, (sparse_GP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms))
dldzt1 = reduce(lambda a, b: a + b, (SparseGP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms))
return numpy.hstack((dLdmuS,
dldzt1,
@ -250,9 +249,9 @@ class MRD(model):
if X is None:
X = self.X
if init in "permute":
Z = numpy.random.permutation(X.copy())[:self.M]
Z = numpy.random.permutation(X.copy())[:self.num_inducing]
elif init in "random":
Z = numpy.random.randn(self.M, self.input_dim) * X.var()
Z = numpy.random.randn(self.num_inducing, self.input_dim) * X.var()
self.Z = Z
return Z

View file

@ -1,61 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import sys, pdb
# from .. import kern
# from ..core import model
# from ..util.linalg import pdinv, PCA
from GPLVM import GPLVM
from sparse_GP_regression import sparse_GP_regression
class sparse_GPLVM(sparse_GP_regression, GPLVM):
"""
Sparse Gaussian Process Latent Variable Model
:param Y: observed data
:type Y: np.ndarray
:param input_dim: latent dimensionality
:type input_dim: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
"""
def __init__(self, Y, input_dim, kernel=None, init='PCA', M=10):
X = self.initialise_latent(init, input_dim, Y)
sparse_GP_regression.__init__(self, X, Y, kernel=kernel,M=M)
def _get_param_names(self):
return (sum([['X_%i_%i'%(n,q) for q in range(self.input_dim)] for n in range(self.N)],[])
+ sparse_GP_regression._get_param_names(self))
def _get_params(self):
return np.hstack((self.X.flatten(), sparse_GP_regression._get_params(self)))
def _set_params(self,x):
self.X = x[:self.X.size].reshape(self.N,self.input_dim).copy()
sparse_GP_regression._set_params(self, x[self.X.size:])
def log_likelihood(self):
return sparse_GP_regression.log_likelihood(self)
def dL_dX(self):
dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0,self.X)
dL_dX += self.kern.dK_dX(self.dL_dpsi1.T,self.X,self.Z)
return dL_dX
def _log_likelihood_gradients(self):
return np.hstack((self.dL_dX().flatten(), sparse_GP_regression._log_likelihood_gradients(self)))
def plot(self):
GPLVM.plot(self)
#passing Z without a small amout of jitter will induce the white kernel where we don;t want it!
mu, var, upper, lower = sparse_GP_regression.predict(self, self.Z+np.random.randn(*self.Z.shape)*0.0001)
pb.plot(mu[:, 0] , mu[:, 1], 'ko')
def plot_latent(self, *args, **kwargs):
input_1, input_2 = GPLVM.plot_latent(*args, **kwargs)
pb.plot(m.Z[:, input_1], m.Z[:, input_2], '^w')

View file

@ -1,50 +0,0 @@
# Copyright (c) 2013, Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import sparse_GP
from .. import likelihoods
from .. import kern
from ..likelihoods import likelihood
from GP_regression import GP_regression
class sparse_GP_classification(sparse_GP):
"""
sparse Gaussian Process model for classification
This is a thin wrapper around the sparse_GP class, with a set of sensible defalts
:param X: input observations
:param Y: observed values
:param likelihood: a GPy likelihood, defaults to binomial with probit link_function
:param kernel: a GPy kernel, defaults to rbf+white
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
:rtype: model object
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10):
if kernel is None:
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
if likelihood is None:
distribution = likelihoods.likelihood_functions.binomial()
likelihood = likelihoods.EP(Y, distribution)
elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()):
raise Warning, 'likelihood.data and Y are different.'
if Z is None:
i = np.random.permutation(X.shape[0])[:M]
Z = X[i].copy()
else:
assert Z.shape[1]==X.shape[1]
sparse_GP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X)
self._set_params(self._get_params())

View file

@ -1,47 +0,0 @@
# Copyright (c) 2012, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import sparse_GP
from .. import likelihoods
from .. import kern
from ..likelihoods import likelihood
from GP_regression import GP_regression
class sparse_GP_regression(sparse_GP):
"""
Gaussian Process model for regression
This is a thin wrapper around the sparse_GP class, with a set of sensible defalts
:param X: input observations
:param Y: observed values
:param kernel: a GPy kernel, defaults to rbf+white
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
:rtype: model object
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10, X_variance=None):
#kern defaults to rbf (plus white for stability)
if kernel is None:
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
#Z defaults to a subset of the data
if Z is None:
i = np.random.permutation(X.shape[0])[:M]
Z = X[i].copy()
else:
assert Z.shape[1]==X.shape[1]
#likelihood defaults to Gaussian
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
sparse_GP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X, X_variance=X_variance)
self._set_params(self._get_params())

View file

@ -1,93 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from .. import kern
from ..core import model
from ..util.linalg import pdinv
from ..util.plot import gpplot
from ..util.warping_functions import *
from GP_regression import GP_regression
from ..core import GP
from .. import likelihoods
from .. import kern
class warpedGP(GP):
def __init__(self, X, Y, kernel=None, warping_function = None, warping_terms = 3, normalize_X=False, normalize_Y=False):
if kernel is None:
kernel = kern.rbf(X.shape[1])
if warping_function == None:
self.warping_function = TanhWarpingFunction_d(warping_terms)
self.warping_params = (np.random.randn(self.warping_function.n_terms*3+1,) * 1)
Y = self._scale_data(Y)
self.has_uncertain_inputs = False
self.Y_untransformed = Y.copy()
self.predict_in_warped_space = False
likelihood = likelihoods.Gaussian(self.transform_data(), normalize=normalize_Y)
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self._set_params(self._get_params())
def _scale_data(self, Y):
self._Ymax = Y.max()
self._Ymin = Y.min()
return (Y-self._Ymin)/(self._Ymax-self._Ymin) - 0.5
def _unscale_data(self, Y):
return (Y + 0.5)*(self._Ymax - self._Ymin) + self._Ymin
def _set_params(self, x):
self.warping_params = x[:self.warping_function.num_parameters]
Y = self.transform_data()
self.likelihood.set_data(Y)
GP._set_params(self, x[self.warping_function.num_parameters:].copy())
def _get_params(self):
return np.hstack((self.warping_params.flatten().copy(), GP._get_params(self).copy()))
def _get_param_names(self):
warping_names = self.warping_function._get_param_names()
param_names = GP._get_param_names(self)
return warping_names + param_names
def transform_data(self):
Y = self.warping_function.f(self.Y_untransformed.copy(), self.warping_params).copy()
return Y
def log_likelihood(self):
ll = GP.log_likelihood(self)
jacobian = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
return ll + np.log(jacobian).sum()
def _log_likelihood_gradients(self):
ll_grads = GP._log_likelihood_gradients(self)
alpha = np.dot(self.Ki, self.likelihood.Y.flatten())
warping_grads = self.warping_function_gradients(alpha)
warping_grads = np.append(warping_grads[:,:-1].flatten(), warping_grads[0,-1])
return np.hstack((warping_grads.flatten(), ll_grads.flatten()))
def warping_function_gradients(self, Kiy):
grad_y = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed, self.warping_params,
return_covar_chain = True)
djac_dpsi = ((1.0/grad_y[:,:, None, None])*grad_y_psi).sum(axis=0).sum(axis=0)
dquad_dpsi = (Kiy[:,None,None,None] * grad_psi).sum(axis=0).sum(axis=0)
return -dquad_dpsi + djac_dpsi
def plot_warping(self):
self.warping_function.plot(self.warping_params, self.Y_untransformed.min(), self.Y_untransformed.max())
def _raw_predict(self, *args, **kwargs):
mu, var = GP._raw_predict(self, *args, **kwargs)
if self.predict_in_warped_space:
mu = self.warping_function.f_inv(mu, self.warping_params)
var = self.warping_function.f_inv(var, self.warping_params)
mu = self._unscale_data(mu)
return mu, var