mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-13 14:03:20 +02:00
Merge branch 'devel' of github.com:SheffieldML/GPy into devel
This commit is contained in:
commit
dc1e747702
44 changed files with 4026 additions and 1575 deletions
|
|
@ -9,6 +9,10 @@ from sparse_GP import sparse_GP
|
|||
from GPy.util.linalg import pdinv
|
||||
from ..likelihoods import Gaussian
|
||||
from .. import kern
|
||||
from numpy.linalg.linalg import LinAlgError
|
||||
import itertools
|
||||
from matplotlib.colors import colorConverter
|
||||
from matplotlib.figure import SubplotParams
|
||||
|
||||
class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||
"""
|
||||
|
|
@ -22,12 +26,14 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
|||
:type init: 'PCA'|'random'
|
||||
|
||||
"""
|
||||
def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10, Z=None, kernel=None, **kwargs):
|
||||
def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10,
|
||||
Z=None, kernel=None, oldpsave=5, _debug=False,
|
||||
**kwargs):
|
||||
if X == None:
|
||||
X = self.initialise_latent(init, Q, Y)
|
||||
|
||||
if X_variance is None:
|
||||
X_variance = np.ones_like(X) * 0.5
|
||||
X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1)
|
||||
|
||||
if Z is None:
|
||||
Z = np.random.permutation(X.copy())[:M]
|
||||
|
|
@ -36,9 +42,31 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
|||
if kernel is None:
|
||||
kernel = kern.rbf(Q) + kern.white(Q)
|
||||
|
||||
self.oldpsave = oldpsave
|
||||
self._oldps = []
|
||||
self._debug = _debug
|
||||
|
||||
if self._debug:
|
||||
self.f_call = 0
|
||||
self._count = itertools.count()
|
||||
self._savedklll = []
|
||||
self._savedparams = []
|
||||
self._savedgradients = []
|
||||
self._savederrors = []
|
||||
self._savedpsiKmm = []
|
||||
|
||||
sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs)
|
||||
|
||||
@property
|
||||
def oldps(self):
|
||||
return self._oldps
|
||||
@oldps.setter
|
||||
def oldps(self, p):
|
||||
if len(self._oldps) == (self.oldpsave + 1):
|
||||
self._oldps.pop()
|
||||
# if len(self._oldps) == 0 or not np.any([np.any(np.abs(p - op) > 1e-5) for op in self._oldps]):
|
||||
self._oldps.insert(0, p.copy())
|
||||
|
||||
def _get_param_names(self):
|
||||
X_names = sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
|
||||
S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
|
||||
|
|
@ -54,17 +82,26 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
|||
===============================================================
|
||||
|
||||
"""
|
||||
return np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self)))
|
||||
|
||||
def _set_params(self, x):
|
||||
N, Q = self.N, self.Q
|
||||
self.X = x[:self.X.size].reshape(N, Q).copy()
|
||||
self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy()
|
||||
sparse_GP._set_params(self, x[(2 * N * Q):])
|
||||
x = np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self)))
|
||||
return x
|
||||
|
||||
def _set_params(self, x, save_old=True, save_count=0):
|
||||
try:
|
||||
N, Q = self.N, self.Q
|
||||
self.X = x[:self.X.size].reshape(N, Q).copy()
|
||||
self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy()
|
||||
sparse_GP._set_params(self, x[(2 * N * Q):])
|
||||
self.oldps = x
|
||||
except (LinAlgError, FloatingPointError, ZeroDivisionError):
|
||||
print "\rWARNING: Caught LinAlgError, continueing without setting "
|
||||
if self._debug:
|
||||
self._savederrors.append(self.f_call)
|
||||
if save_count > 10:
|
||||
raise
|
||||
self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1)
|
||||
|
||||
def dKL_dmuS(self):
|
||||
dKL_dS = (1. - (1. / self.X_variance)) * 0.5
|
||||
dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
|
||||
dKL_dmu = self.X
|
||||
return dKL_dmu, dKL_dS
|
||||
|
||||
|
|
@ -83,13 +120,40 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
|||
return 0.5 * (var_mean + var_S) - 0.5 * self.Q * self.N
|
||||
|
||||
def log_likelihood(self):
|
||||
return sparse_GP.log_likelihood(self) - self.KL_divergence()
|
||||
ll = sparse_GP.log_likelihood(self)
|
||||
kl = self.KL_divergence()
|
||||
|
||||
# if ll < -2E4:
|
||||
# ll = -2E4 + np.random.randn()
|
||||
# if kl > 5E4:
|
||||
# kl = 5E4 + np.random.randn()
|
||||
|
||||
if self._debug:
|
||||
self.f_call = self._count.next()
|
||||
if self.f_call % 1 == 0:
|
||||
self._savedklll.append([self.f_call, ll, kl])
|
||||
self._savedparams.append([self.f_call, self._get_params()])
|
||||
self._savedgradients.append([self.f_call, self._log_likelihood_gradients()])
|
||||
self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]])
|
||||
# print "\nkl:", kl, "ll:", ll
|
||||
return ll - kl
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
dKL_dmu, dKL_dS = self.dKL_dmuS()
|
||||
dL_dmu, dL_dS = self.dL_dmuS()
|
||||
# TODO: find way to make faster
|
||||
dbound_dmuS = np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten()))
|
||||
|
||||
d_dmu = (dL_dmu - dKL_dmu).flatten()
|
||||
d_dS = (dL_dS - dKL_dS).flatten()
|
||||
# TEST KL: ====================
|
||||
# d_dmu = (dKL_dmu).flatten()
|
||||
# d_dS = (dKL_dS).flatten()
|
||||
# ========================
|
||||
# TEST L: ====================
|
||||
# d_dmu = (dL_dmu).flatten()
|
||||
# d_dS = (dL_dS).flatten()
|
||||
# ========================
|
||||
dbound_dmuS = np.hstack((d_dmu, d_dS))
|
||||
return np.hstack((dbound_dmuS.flatten(), sparse_GP._log_likelihood_gradients(self)))
|
||||
|
||||
def plot_latent(self, which_indices=None, *args, **kwargs):
|
||||
|
|
@ -104,3 +168,288 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
|||
ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2], *args, **kwargs)
|
||||
ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
|
||||
return ax
|
||||
|
||||
def plot_X_1d(self, fig=None, axes=None, fig_num="MRD X 1d", colors=None):
|
||||
"""
|
||||
Plot latent space X in 1D:
|
||||
|
||||
-if fig is given, create Q subplots in fig and plot in these
|
||||
-if axes is given plot Q 1D latent space plots of X into each `axis`
|
||||
-if neither fig nor axes is given create a figure with fig_num and plot in there
|
||||
|
||||
colors:
|
||||
|
||||
colors of different latent space dimensions Q
|
||||
"""
|
||||
import pylab
|
||||
if fig is None and axes is None:
|
||||
fig = pylab.figure(num=fig_num, figsize=(8, min(12, (2 * self.X.shape[1]))))
|
||||
if colors is None:
|
||||
colors = pylab.gca()._get_lines.color_cycle
|
||||
pylab.clf()
|
||||
else:
|
||||
colors = iter(colors)
|
||||
plots = []
|
||||
for i in range(self.X.shape[1]):
|
||||
if axes is None:
|
||||
ax = fig.add_subplot(self.X.shape[1], 1, i + 1)
|
||||
else:
|
||||
ax = axes[i]
|
||||
ax.plot(self.X, c='k', alpha=.3)
|
||||
plots.extend(ax.plot(self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{}}}$".format(i)))
|
||||
ax.fill_between(np.arange(self.X.shape[0]),
|
||||
self.X.T[i] - 2 * np.sqrt(self.X_variance.T[i]),
|
||||
self.X.T[i] + 2 * np.sqrt(self.X_variance.T[i]),
|
||||
facecolor=plots[-1].get_color(),
|
||||
alpha=.3)
|
||||
ax.legend(borderaxespad=0.)
|
||||
if i < self.X.shape[1] - 1:
|
||||
ax.set_xticklabels('')
|
||||
pylab.draw()
|
||||
fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
|
||||
return fig
|
||||
|
||||
def _debug_filter_params(self, x):
|
||||
start, end = 0, self.X.size,
|
||||
X = x[start:end].reshape(self.N, self.Q)
|
||||
start, end = end, end + self.X_variance.size
|
||||
X_v = x[start:end].reshape(self.N, self.Q)
|
||||
start, end = end, end + (self.M * self.Q)
|
||||
Z = x[start:end].reshape(self.M, self.Q)
|
||||
start, end = end, end + self.Q
|
||||
theta = x[start:]
|
||||
return X, X_v, Z, theta
|
||||
|
||||
|
||||
def _debug_get_axis(self, figs):
|
||||
if figs[-1].axes:
|
||||
ax1 = figs[-1].axes[0]
|
||||
ax1.cla()
|
||||
else:
|
||||
ax1 = figs[-1].add_subplot(111)
|
||||
return ax1
|
||||
|
||||
def _debug_plot(self):
|
||||
assert self._debug, "must enable _debug, to debug-plot"
|
||||
import pylab
|
||||
# from mpl_toolkits.mplot3d import Axes3D
|
||||
figs = [pylab.figure('BGPLVM DEBUG', figsize=(12, 4))]
|
||||
# fig.clf()
|
||||
|
||||
# log like
|
||||
# splotshape = (6, 4)
|
||||
# ax1 = pylab.subplot2grid(splotshape, (0, 0), 1, 4)
|
||||
ax1 = self._debug_get_axis(figs)
|
||||
ax1.text(.5, .5, "Optimization", alpha=.3, transform=ax1.transAxes,
|
||||
ha='center', va='center')
|
||||
kllls = np.array(self._savedklll)
|
||||
LL, = ax1.plot(kllls[:, 0], kllls[:, 1] - kllls[:, 2], '-', label=r'$\log p(\mathbf{Y})$', mew=1.5)
|
||||
KL, = ax1.plot(kllls[:, 0], kllls[:, 2], '-', label=r'$\mathcal{KL}(p||q)$', mew=1.5)
|
||||
L, = ax1.plot(kllls[:, 0], kllls[:, 1], '-', label=r'$L$', mew=1.5) # \mathds{E}_{q(\mathbf{X})}[p(\mathbf{Y|X})\frac{p(\mathbf{X})}{q(\mathbf{X})}]
|
||||
|
||||
param_dict = dict(self._savedparams)
|
||||
gradient_dict = dict(self._savedgradients)
|
||||
kmm_dict = dict(self._savedpsiKmm)
|
||||
iters = np.array(param_dict.keys())
|
||||
self.showing = 0
|
||||
|
||||
# ax2 = pylab.subplot2grid(splotshape, (1, 0), 2, 4)
|
||||
figs.append(pylab.figure("BGPLVM DEBUG X", figsize=(12, 4)))
|
||||
ax2 = self._debug_get_axis(figs)
|
||||
ax2.text(.5, .5, r"$\mathbf{X}$", alpha=.5, transform=ax2.transAxes,
|
||||
ha='center', va='center')
|
||||
figs[-1].canvas.draw()
|
||||
figs[-1].tight_layout(rect=(0, 0, 1, .86))
|
||||
# ax3 = pylab.subplot2grid(splotshape, (3, 0), 2, 4, sharex=ax2)
|
||||
figs.append(pylab.figure("BGPLVM DEBUG S", figsize=(12, 4)))
|
||||
ax3 = self._debug_get_axis(figs)
|
||||
ax3.text(.5, .5, r"$\mathbf{S}$", alpha=.5, transform=ax3.transAxes,
|
||||
ha='center', va='center')
|
||||
figs[-1].canvas.draw()
|
||||
figs[-1].tight_layout(rect=(0, 0, 1, .86))
|
||||
# ax4 = pylab.subplot2grid(splotshape, (5, 0), 2, 2)
|
||||
figs.append(pylab.figure("BGPLVM DEBUG Z", figsize=(6, 4)))
|
||||
ax4 = self._debug_get_axis(figs)
|
||||
ax4.text(.5, .5, r"$\mathbf{Z}$", alpha=.5, transform=ax4.transAxes,
|
||||
ha='center', va='center')
|
||||
figs[-1].canvas.draw()
|
||||
figs[-1].tight_layout(rect=(0, 0, 1, .86))
|
||||
# ax5 = pylab.subplot2grid(splotshape, (5, 2), 2, 2)
|
||||
figs.append(pylab.figure("BGPLVM DEBUG theta", figsize=(6, 4)))
|
||||
ax5 = self._debug_get_axis(figs)
|
||||
ax5.text(.5, .5, r"${\theta}$", alpha=.5, transform=ax5.transAxes,
|
||||
ha='center', va='center')
|
||||
figs[-1].canvas.draw()
|
||||
figs[-1].tight_layout(rect=(.15, 0, 1, .86))
|
||||
figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6)))
|
||||
fig = figs[-1]
|
||||
ax6 = fig.add_subplot(121)
|
||||
ax6.text(.5, .5, r"${\mathbf{K}_{mm}}$", color='magenta', alpha=.5, transform=ax6.transAxes,
|
||||
ha='center', va='center')
|
||||
ax7 = fig.add_subplot(122)
|
||||
ax7.text(.5, .5, r"${\frac{dL}{dK_{mm}}}$", color='magenta', alpha=.5, transform=ax7.transAxes,
|
||||
ha='center', va='center')
|
||||
|
||||
X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
|
||||
Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
|
||||
# Xg, Sg, Zg, thetag = -Xg, -Sg, -Zg, -thetag
|
||||
|
||||
quiver_units = 'xy'
|
||||
quiver_scale = 1
|
||||
quiver_scale_units = 'xy'
|
||||
Xlatentplts = ax2.plot(X, ls="-", marker="x")
|
||||
colors = colorConverter.to_rgba_array([p.get_color() for p in Xlatentplts], .4)
|
||||
Ulatent = np.zeros_like(X)
|
||||
xlatent = np.tile(np.arange(0, X.shape[0])[:, None], X.shape[1])
|
||||
Xlatentgrads = ax2.quiver(xlatent, X, Ulatent, Xg, color=colors,
|
||||
units=quiver_units, scale_units=quiver_scale_units,
|
||||
scale=quiver_scale)
|
||||
|
||||
Slatentplts = ax3.plot(S, ls="-", marker="x")
|
||||
Slatentgrads = ax3.quiver(xlatent, S, Ulatent, Sg, color=colors,
|
||||
units=quiver_units, scale_units=quiver_scale_units,
|
||||
scale=quiver_scale)
|
||||
ax3.set_ylim(0, 1.)
|
||||
|
||||
xZ = np.tile(np.arange(0, Z.shape[0])[:, None], Z.shape[1])
|
||||
UZ = np.zeros_like(Z)
|
||||
Zplts = ax4.plot(Z, ls="-", marker="x")
|
||||
Zgrads = ax4.quiver(xZ, Z, UZ, Zg, color=colors,
|
||||
units=quiver_units, scale_units=quiver_scale_units,
|
||||
scale=quiver_scale)
|
||||
|
||||
xtheta = np.arange(len(theta))
|
||||
Utheta = np.zeros_like(theta)
|
||||
thetaplts = ax5.bar(xtheta - .4, theta, color=colors)
|
||||
thetagrads = ax5.quiver(xtheta, theta, Utheta, thetag, color=colors,
|
||||
units=quiver_units, scale_units=quiver_scale_units,
|
||||
scale=quiver_scale,
|
||||
edgecolors=('k',), linewidths=[1])
|
||||
pylab.setp(thetaplts, zorder=0)
|
||||
pylab.setp(thetagrads, zorder=10)
|
||||
ax5.set_xticks(np.arange(len(theta)))
|
||||
ax5.set_xticklabels(self._get_param_names()[-len(theta):], rotation=17)
|
||||
|
||||
imkmm = ax6.imshow(kmm_dict[self.showing][0])
|
||||
from mpl_toolkits.axes_grid1 import make_axes_locatable
|
||||
divider = make_axes_locatable(ax6)
|
||||
caxkmm = divider.append_axes("right", "5%", pad="1%")
|
||||
cbarkmm = pylab.colorbar(imkmm, cax=caxkmm)
|
||||
|
||||
imkmmdl = ax7.imshow(kmm_dict[self.showing][1])
|
||||
divider = make_axes_locatable(ax7)
|
||||
caxkmmdl = divider.append_axes("right", "5%", pad="1%")
|
||||
cbarkmmdl = pylab.colorbar(imkmmdl, cax=caxkmmdl)
|
||||
|
||||
# Qleg = ax1.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
|
||||
# loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.15, 1, 1.15),
|
||||
# borderaxespad=0, mode="expand")
|
||||
ax2.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
|
||||
loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.1, 1, 1.1),
|
||||
borderaxespad=0, mode="expand")
|
||||
ax3.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
|
||||
loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.1, 1, 1.1),
|
||||
borderaxespad=0, mode="expand")
|
||||
ax4.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
|
||||
loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.1, 1, 1.1),
|
||||
borderaxespad=0, mode="expand")
|
||||
ax5.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
|
||||
loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.1, 1, 1.1),
|
||||
borderaxespad=0, mode="expand")
|
||||
Lleg = ax1.legend()
|
||||
Lleg.draggable()
|
||||
# ax1.add_artist(Qleg)
|
||||
|
||||
indicatorKL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 2], 'o', c=KL.get_color())
|
||||
indicatorLL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1] - kllls[self.showing, 2], 'o', c=LL.get_color())
|
||||
indicatorL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1], 'o', c=L.get_color())
|
||||
# for err in self._savederrors:
|
||||
# if err < kllls.shape[0]:
|
||||
# ax1.scatter(kllls[err, 0], kllls[err, 2], s=50, marker=(5, 2), c=KL.get_color())
|
||||
# ax1.scatter(kllls[err, 0], kllls[err, 1] - kllls[err, 2], s=50, marker=(5, 2), c=LL.get_color())
|
||||
# ax1.scatter(kllls[err, 0], kllls[err, 1], s=50, marker=(5, 2), c=L.get_color())
|
||||
|
||||
# try:
|
||||
# for f in figs:
|
||||
# f.canvas.draw()
|
||||
# f.tight_layout(box=(0, .15, 1, .9))
|
||||
# # pylab.draw()
|
||||
# # pylab.tight_layout(box=(0, .1, 1, .9))
|
||||
# except:
|
||||
# pass
|
||||
|
||||
# parameter changes
|
||||
# ax2 = pylab.subplot2grid((4, 1), (1, 0), 3, 1, projection='3d')
|
||||
button_options = [0, 0] # [0]: clicked -- [1]: dragged
|
||||
|
||||
def update_plots(event):
|
||||
if button_options[0] and not button_options[1]:
|
||||
# event.button, event.x, event.y, event.xdata, event.ydata)
|
||||
tmp = np.abs(iters - event.xdata)
|
||||
closest_hit = iters[tmp == tmp.min()][0]
|
||||
|
||||
if closest_hit != self.showing:
|
||||
self.showing = closest_hit
|
||||
# print closest_hit, iters, event.xdata
|
||||
|
||||
indicatorLL.set_data(self.showing, kllls[self.showing, 1] - kllls[self.showing, 2])
|
||||
indicatorKL.set_data(self.showing, kllls[self.showing, 2])
|
||||
indicatorL.set_data(self.showing, kllls[self.showing, 1])
|
||||
|
||||
X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
|
||||
Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
|
||||
# Xg, Sg, Zg, thetag = -Xg, -Sg, -Zg, -thetag
|
||||
|
||||
for i, Xlatent in enumerate(Xlatentplts):
|
||||
Xlatent.set_ydata(X[:, i])
|
||||
Xlatentgrads.set_offsets(np.array([xlatent.ravel(), X.ravel()]).T)
|
||||
Xlatentgrads.set_UVC(Ulatent, Xg)
|
||||
|
||||
for i, Slatent in enumerate(Slatentplts):
|
||||
Slatent.set_ydata(S[:, i])
|
||||
Slatentgrads.set_offsets(np.array([xlatent.ravel(), S.ravel()]).T)
|
||||
Slatentgrads.set_UVC(Ulatent, Sg)
|
||||
|
||||
for i, Zlatent in enumerate(Zplts):
|
||||
Zlatent.set_ydata(Z[:, i])
|
||||
Zgrads.set_offsets(np.array([xZ.ravel(), Z.ravel()]).T)
|
||||
Zgrads.set_UVC(UZ, Zg)
|
||||
|
||||
for p, t in zip(thetaplts, theta):
|
||||
p.set_height(t)
|
||||
thetagrads.set_offsets(np.array([xtheta.ravel(), theta.ravel()]).T)
|
||||
thetagrads.set_UVC(Utheta, thetag)
|
||||
|
||||
imkmm.set_data(kmm_dict[self.showing][0])
|
||||
imkmm.autoscale()
|
||||
cbarkmm.update_normal(imkmm)
|
||||
|
||||
imkmmdl.set_data(kmm_dict[self.showing][1])
|
||||
imkmmdl.autoscale()
|
||||
cbarkmmdl.update_normal(imkmmdl)
|
||||
|
||||
ax2.relim()
|
||||
# ax3.relim()
|
||||
ax4.relim()
|
||||
ax5.relim()
|
||||
ax2.autoscale()
|
||||
# ax3.autoscale()
|
||||
ax4.autoscale()
|
||||
ax5.autoscale()
|
||||
|
||||
[fig.canvas.draw() for fig in figs]
|
||||
button_options[0] = 0
|
||||
button_options[1] = 0
|
||||
|
||||
def onclick(event):
|
||||
if event.inaxes is ax1 and event.button == 1:
|
||||
button_options[0] = 1
|
||||
def motion(event):
|
||||
if button_options[0]:
|
||||
button_options[1] = 1
|
||||
|
||||
cidr = figs[0].canvas.mpl_connect('button_release_event', update_plots)
|
||||
cidp = figs[0].canvas.mpl_connect('button_press_event', onclick)
|
||||
cidd = figs[0].canvas.mpl_connect('motion_notify_event', motion)
|
||||
|
||||
return ax1, ax2, ax3, ax4, ax5, ax6, ax7
|
||||
|
|
|
|||
222
GPy/models/GP.py
222
GPy/models/GP.py
|
|
@ -6,8 +6,8 @@ import numpy as np
|
|||
import pylab as pb
|
||||
from .. import kern
|
||||
from ..core import model
|
||||
from ..util.linalg import pdinv,mdot
|
||||
from ..util.plot import gpplot,x_frame1D,x_frame2D, Tango
|
||||
from ..util.linalg import pdinv, mdot
|
||||
from ..util.plot import gpplot, x_frame1D, x_frame2D, Tango
|
||||
from ..likelihoods import EP
|
||||
|
||||
class GP(model):
|
||||
|
|
@ -19,9 +19,6 @@ class GP(model):
|
|||
:parm likelihood: a GPy likelihood
|
||||
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
||||
:type normalize_X: False|True
|
||||
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
|
||||
:type normalize_Y: False|True
|
||||
:param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
|
||||
:rtype: model object
|
||||
:param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
|
||||
:param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.]
|
||||
|
|
@ -30,33 +27,31 @@ class GP(model):
|
|||
.. Note:: Multiple independent outputs are allowed using columns of Y
|
||||
|
||||
"""
|
||||
def __init__(self, X, likelihood, kernel, normalize_X=False, Xslices=None):
|
||||
def __init__(self, X, likelihood, kernel, normalize_X=False):
|
||||
|
||||
# parse arguments
|
||||
self.Xslices = Xslices
|
||||
self.X = X
|
||||
assert len(self.X.shape)==2
|
||||
assert len(self.X.shape) == 2
|
||||
self.N, self.Q = self.X.shape
|
||||
assert isinstance(kernel, kern.kern)
|
||||
self.kern = kernel
|
||||
|
||||
#here's some simple normalization for the inputs
|
||||
if normalize_X:
|
||||
self._Xmean = X.mean(0)[None,:]
|
||||
self._Xstd = X.std(0)[None,:]
|
||||
self.X = (X.copy() - self._Xmean) / self._Xstd
|
||||
if hasattr(self,'Z'):
|
||||
self.Z = (self.Z - self._Xmean) / self._Xstd
|
||||
else:
|
||||
self._Xmean = np.zeros((1,self.X.shape[1]))
|
||||
self._Xstd = np.ones((1,self.X.shape[1]))
|
||||
|
||||
self.likelihood = likelihood
|
||||
#assert self.X.shape[0] == self.likelihood.Y.shape[0]
|
||||
#self.N, self.D = self.likelihood.Y.shape
|
||||
assert self.X.shape[0] == self.likelihood.data.shape[0]
|
||||
self.N, self.D = self.likelihood.data.shape
|
||||
|
||||
# here's some simple normalization for the inputs
|
||||
if normalize_X:
|
||||
self._Xmean = X.mean(0)[None, :]
|
||||
self._Xstd = X.std(0)[None, :]
|
||||
self.X = (X.copy() - self._Xmean) / self._Xstd
|
||||
if hasattr(self, 'Z'):
|
||||
self.Z = (self.Z - self._Xmean) / self._Xstd
|
||||
else:
|
||||
self._Xmean = np.zeros((1, self.X.shape[1]))
|
||||
self._Xstd = np.ones((1, self.X.shape[1]))
|
||||
|
||||
if not hasattr(self,'has_uncertain_inputs'):
|
||||
self.has_uncertain_inputs = False
|
||||
model.__init__(self)
|
||||
|
||||
def dL_dZ(self):
|
||||
|
|
@ -65,24 +60,24 @@ class GP(model):
|
|||
"""
|
||||
return np.zeros_like(self.Z)
|
||||
|
||||
def _set_params(self,p):
|
||||
self.kern._set_params_transformed(p[:self.kern.Nparam])
|
||||
#self.likelihood._set_params(p[self.kern.Nparam:]) # test by Nicolas
|
||||
self.likelihood._set_params(p[self.kern.Nparam_transformed():]) # test by Nicolas
|
||||
def _set_params(self, p):
|
||||
self.kern._set_params_transformed(p[:self.kern.Nparam_transformed()])
|
||||
# self.likelihood._set_params(p[self.kern.Nparam:]) # test by Nicolas
|
||||
self.likelihood._set_params(p[self.kern.Nparam_transformed():]) # test by Nicolas
|
||||
|
||||
|
||||
self.K = self.kern.K(self.X,slices1=self.Xslices,slices2=self.Xslices)
|
||||
self.K = self.kern.K(self.X)
|
||||
self.K += self.likelihood.covariance_matrix
|
||||
|
||||
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
|
||||
|
||||
#the gradient of the likelihood wrt the covariance matrix
|
||||
# the gradient of the likelihood wrt the covariance matrix
|
||||
if self.likelihood.YYT is None:
|
||||
alpha = np.dot(self.Ki,self.likelihood.Y)
|
||||
self.dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
|
||||
alpha = np.dot(self.Ki, self.likelihood.Y)
|
||||
self.dL_dK = 0.5 * (np.dot(alpha, alpha.T) - self.D * self.Ki)
|
||||
else:
|
||||
tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
|
||||
self.dL_dK = 0.5*(tmp - self.D*self.Ki)
|
||||
self.dL_dK = 0.5 * (tmp - self.D * self.Ki)
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
|
||||
|
|
@ -94,20 +89,20 @@ class GP(model):
|
|||
"""
|
||||
Approximates a non-gaussian likelihood using Expectation Propagation
|
||||
|
||||
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
|
||||
For a Gaussian likelihood, no iteration is required:
|
||||
this function does nothing
|
||||
"""
|
||||
self.likelihood.fit_full(self.kern.K(self.X))
|
||||
self._set_params(self._get_params()) # update the GP
|
||||
self._set_params(self._get_params()) # update the GP
|
||||
|
||||
def _model_fit_term(self):
|
||||
"""
|
||||
Computes the model fit using YYT if it's available
|
||||
"""
|
||||
if self.likelihood.YYT is None:
|
||||
return -0.5*np.sum(np.square(np.dot(self.Li,self.likelihood.Y)))
|
||||
return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y)))
|
||||
else:
|
||||
return -0.5*np.sum(np.multiply(self.Ki, self.likelihood.YYT))
|
||||
return -0.5 * np.sum(np.multiply(self.Ki, self.likelihood.YYT))
|
||||
|
||||
def log_likelihood(self):
|
||||
"""
|
||||
|
|
@ -117,38 +112,40 @@ class GP(model):
|
|||
model for a new variable Y* = v_tilde/tau_tilde, with a covariance
|
||||
matrix K* = K + diag(1./tau_tilde) plus a normalization term.
|
||||
"""
|
||||
return -0.5*self.D*self.K_logdet + self._model_fit_term() + self.likelihood.Z
|
||||
return -0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z
|
||||
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
"""
|
||||
The gradient of all parameters.
|
||||
|
||||
For the kernel parameters, use the chain rule via dL_dK
|
||||
|
||||
For the likelihood parameters, pass in alpha = K^-1 y
|
||||
Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta
|
||||
"""
|
||||
return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK,X=self.X,slices1=self.Xslices,slices2=self.Xslices), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
|
||||
return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
|
||||
|
||||
def _raw_predict(self,_Xnew,slices=None, full_cov=False):
|
||||
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False):
|
||||
"""
|
||||
Internal helper function for making predictions, does not account
|
||||
for normalization or likelihood
|
||||
|
||||
#TODO: which_parts does nothing
|
||||
|
||||
|
||||
"""
|
||||
Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
|
||||
mu = np.dot(np.dot(Kx.T,self.Ki),self.likelihood.Y)
|
||||
KiKx = np.dot(self.Ki,Kx)
|
||||
Kx = self.kern.K(self.X, _Xnew,which_parts=which_parts)
|
||||
mu = np.dot(np.dot(Kx.T, self.Ki), self.likelihood.Y)
|
||||
KiKx = np.dot(self.Ki, Kx)
|
||||
if full_cov:
|
||||
Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
|
||||
var = Kxx - np.dot(KiKx.T,Kx)
|
||||
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
|
||||
var = Kxx - np.dot(KiKx.T, Kx)
|
||||
else:
|
||||
Kxx = self.kern.Kdiag(_Xnew, slices=slices)
|
||||
var = Kxx - np.sum(np.multiply(KiKx,Kx),0)
|
||||
var = var[:,None]
|
||||
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
|
||||
var = Kxx - np.sum(np.multiply(KiKx, Kx), 0)
|
||||
var = var[:, None]
|
||||
return mu, var
|
||||
|
||||
|
||||
def predict(self,Xnew, slices=None, full_cov=False):
|
||||
def predict(self, Xnew, which_parts='all', full_cov=False):
|
||||
"""
|
||||
Predict the function(s) at the new point(s) Xnew.
|
||||
|
||||
|
|
@ -156,35 +153,30 @@ class GP(model):
|
|||
---------
|
||||
:param Xnew: The points at which to make a prediction
|
||||
:type Xnew: np.ndarray, Nnew x self.Q
|
||||
:param slices: specifies which outputs kernel(s) the Xnew correspond to (see below)
|
||||
:type slices: (None, list of slice objects, list of ints)
|
||||
:param which_parts: specifies which outputs kernel(s) to use in prediction
|
||||
:type which_parts: ('all', list of bools)
|
||||
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
|
||||
:type full_cov: bool
|
||||
:rtype: posterior mean, a Numpy array, Nnew x self.D
|
||||
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
|
||||
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.D
|
||||
|
||||
.. Note:: "slices" specifies how the the points X_new co-vary wich the training points.
|
||||
|
||||
- If None, the new points covary throigh every kernel part (default)
|
||||
- If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
|
||||
- If a list of booleans, specifying which kernel parts are active
|
||||
|
||||
If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
|
||||
This is to allow for different normalizations of the output dimensions.
|
||||
|
||||
"""
|
||||
#normalize X values
|
||||
# normalize X values
|
||||
Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
|
||||
mu, var = self._raw_predict(Xnew, slices, full_cov)
|
||||
mu, var = self._raw_predict(Xnew, which_parts, full_cov)
|
||||
|
||||
#now push through likelihood TODO
|
||||
# now push through likelihood
|
||||
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
|
||||
|
||||
return mean, var, _025pm, _975pm
|
||||
|
||||
|
||||
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_functions='all', resolution=None, full_cov=False):
|
||||
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False):
|
||||
"""
|
||||
Plot the GP's view of the world, where the data is normalized and the likelihood is Gaussian
|
||||
|
||||
|
|
@ -192,8 +184,8 @@ class GP(model):
|
|||
:param which_data: which if the training data to plot (default all)
|
||||
:type which_data: 'all' or a slice object to slice self.X, self.Y
|
||||
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
||||
:param which_functions: which of the kernel functions to plot (additively)
|
||||
:type which_functions: list of bools
|
||||
:param which_parts: which of the kernel functions to plot (additively)
|
||||
:type which_parts: 'all', or list of bools
|
||||
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
||||
|
||||
Plot the posterior of the GP.
|
||||
|
|
@ -204,86 +196,86 @@ class GP(model):
|
|||
Can plot only part of the data and part of the posterior functions using which_data and which_functions
|
||||
Plot the data's view of the world, with non-normalized values and GP predictions passed through the likelihood
|
||||
"""
|
||||
if which_functions=='all':
|
||||
which_functions = [True]*self.kern.Nparts
|
||||
if which_data=='all':
|
||||
if which_data == 'all':
|
||||
which_data = slice(None)
|
||||
|
||||
if self.X.shape[1] == 1:
|
||||
Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
|
||||
if samples == 0:
|
||||
m,v = self._raw_predict(Xnew, slices=which_functions)
|
||||
gpplot(Xnew,m,m-2*np.sqrt(v),m+2*np.sqrt(v))
|
||||
pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
|
||||
m, v = self._raw_predict(Xnew, which_parts=which_parts)
|
||||
gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v))
|
||||
pb.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
|
||||
else:
|
||||
m,v = self._raw_predict(Xnew, slices=which_functions,full_cov=True)
|
||||
Ysim = np.random.multivariate_normal(m.flatten(),v,samples)
|
||||
gpplot(Xnew,m,m-2*np.sqrt(np.diag(v)[:,None]),m+2*np.sqrt(np.diag(v))[:,None])
|
||||
m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True)
|
||||
Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
|
||||
gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None])
|
||||
for i in range(samples):
|
||||
pb.plot(Xnew,Ysim[i,:],Tango.colorsHex['darkBlue'],linewidth=0.25)
|
||||
pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
|
||||
pb.xlim(xmin,xmax)
|
||||
ymin,ymax = min(np.append(self.likelihood.Y,m-2*np.sqrt(np.diag(v)[:,None]))), max(np.append(self.likelihood.Y,m+2*np.sqrt(np.diag(v)[:,None])))
|
||||
ymin, ymax = ymin - 0.1*(ymax - ymin), ymax + 0.1*(ymax - ymin)
|
||||
pb.ylim(ymin,ymax)
|
||||
if hasattr(self,'Z'):
|
||||
pb.plot(self.Z,self.Z*0+pb.ylim()[0],'r|',mew=1.5,markersize=12)
|
||||
pb.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
|
||||
pb.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
|
||||
pb.xlim(xmin, xmax)
|
||||
ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
|
||||
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
|
||||
pb.ylim(ymin, ymax)
|
||||
if hasattr(self, 'Z'):
|
||||
pb.plot(self.Z, self.Z * 0 + pb.ylim()[0], 'r|', mew=1.5, markersize=12)
|
||||
|
||||
elif self.X.shape[1] == 2:
|
||||
resolution = resolution or 50
|
||||
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits,resolution)
|
||||
m,v = self._raw_predict(Xnew, slices=which_functions)
|
||||
m = m.reshape(resolution,resolution).T
|
||||
pb.contour(xx,yy,m,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet)
|
||||
pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=m.min(), vmax=m.max())
|
||||
pb.xlim(xmin[0],xmax[0])
|
||||
pb.ylim(xmin[1],xmax[1])
|
||||
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
|
||||
m, v = self._raw_predict(Xnew, which_parts=which_parts)
|
||||
m = m.reshape(resolution, resolution).T
|
||||
pb.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
||||
pb.scatter(Xorig[:, 0], Xorig[:, 1], 40, Yorig, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max())
|
||||
pb.xlim(xmin[0], xmax[0])
|
||||
pb.ylim(xmin[1], xmax[1])
|
||||
else:
|
||||
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
||||
|
||||
def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,levels=20):
|
||||
def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20):
|
||||
"""
|
||||
TODO: Docstrings!
|
||||
:param levels: for 2D plotting, the number of contour levels to use
|
||||
|
||||
"""
|
||||
# TODO include samples
|
||||
if which_functions=='all':
|
||||
which_functions = [True]*self.kern.Nparts
|
||||
if which_data=='all':
|
||||
if which_data == 'all':
|
||||
which_data = slice(None)
|
||||
|
||||
if self.X.shape[1] == 1:
|
||||
|
||||
Xu = self.X * self._Xstd + self._Xmean #NOTE self.X are the normalized values now
|
||||
Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now
|
||||
|
||||
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
|
||||
m, var, lower, upper = self.predict(Xnew, slices=which_functions)
|
||||
gpplot(Xnew,m, lower, upper)
|
||||
pb.plot(Xu[which_data],self.likelihood.data[which_data],'kx',mew=1.5)
|
||||
ymin,ymax = min(np.append(self.likelihood.data,lower)), max(np.append(self.likelihood.data,upper))
|
||||
ymin, ymax = ymin - 0.1*(ymax - ymin), ymax + 0.1*(ymax - ymin)
|
||||
pb.xlim(xmin,xmax)
|
||||
pb.ylim(ymin,ymax)
|
||||
if hasattr(self,'Z'):
|
||||
Zu = self.Z*self._Xstd + self._Xmean
|
||||
pb.plot(Zu,Zu*0+pb.ylim()[0],'r|',mew=1.5,markersize=12)
|
||||
if self.has_uncertain_inputs:
|
||||
pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_variance.flatten()))
|
||||
m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
|
||||
gpplot(Xnew, m, lower, upper)
|
||||
pb.plot(Xu[which_data], self.likelihood.data[which_data], 'kx', mew=1.5)
|
||||
if self.has_uncertain_inputs:
|
||||
pb.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
|
||||
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
|
||||
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
|
||||
|
||||
elif self.X.shape[1]==2: #FIXME
|
||||
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
|
||||
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
|
||||
pb.xlim(xmin, xmax)
|
||||
pb.ylim(ymin, ymax)
|
||||
if hasattr(self, 'Z'):
|
||||
Zu = self.Z * self._Xstd + self._Xmean
|
||||
pb.plot(Zu, Zu * 0 + pb.ylim()[0], 'r|', mew=1.5, markersize=12)
|
||||
# pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_variance.flatten()))
|
||||
|
||||
elif self.X.shape[1] == 2: # FIXME
|
||||
resolution = resolution or 50
|
||||
Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits,resolution)
|
||||
x, y = np.linspace(xmin[0],xmax[0],resolution), np.linspace(xmin[1],xmax[1],resolution)
|
||||
m, var, lower, upper = self.predict(Xnew, slices=which_functions)
|
||||
m = m.reshape(resolution,resolution).T
|
||||
pb.contour(x,y,m,levels,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet)
|
||||
Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
|
||||
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
|
||||
m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
|
||||
m = m.reshape(resolution, resolution).T
|
||||
pb.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
||||
Yf = self.likelihood.Y.flatten()
|
||||
pb.scatter(self.X[:,0], self.X[:,1], 40, Yf, cmap=pb.cm.jet,vmin=m.min(),vmax=m.max(), linewidth=0.)
|
||||
pb.xlim(xmin[0],xmax[0])
|
||||
pb.ylim(xmin[1],xmax[1])
|
||||
if hasattr(self,'Z'):
|
||||
pb.plot(self.Z[:,0],self.Z[:,1],'wo')
|
||||
pb.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
|
||||
pb.xlim(xmin[0], xmax[0])
|
||||
pb.ylim(xmin[1], xmax[1])
|
||||
if hasattr(self, 'Z'):
|
||||
pb.plot(self.Z[:, 0], self.Z[:, 1], 'wo')
|
||||
|
||||
else:
|
||||
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
### Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
|
|
@ -24,12 +24,12 @@ class GPLVM(GP):
|
|||
:type init: 'PCA'|'random'
|
||||
|
||||
"""
|
||||
def __init__(self, Y, Q, init='PCA', X = None, kernel=None, **kwargs):
|
||||
def __init__(self, Y, Q, init='PCA', X = None, kernel=None, normalize_Y=False, **kwargs):
|
||||
if X is None:
|
||||
X = self.initialise_latent(init, Q, Y)
|
||||
if kernel is None:
|
||||
kernel = kern.rbf(Q) + kern.bias(Q)
|
||||
likelihood = Gaussian(Y)
|
||||
likelihood = Gaussian(Y, normalize=normalize_Y)
|
||||
GP.__init__(self, X, likelihood, kernel, **kwargs)
|
||||
|
||||
def initialise_latent(self, init, Q, Y):
|
||||
|
|
@ -91,8 +91,8 @@ class GPLVM(GP):
|
|||
Xtest_full[:, :2] = Xtest
|
||||
mu, var, low, up = self.predict(Xtest_full)
|
||||
var = var[:, :1]
|
||||
ax.imshow(var.reshape(resolution, resolution).T[::-1, :],
|
||||
extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear')
|
||||
ax.imshow(var.reshape(resolution, resolution).T,
|
||||
extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear',origin='lower')
|
||||
|
||||
for i,ul in enumerate(np.unique(labels)):
|
||||
if type(ul) is np.string_:
|
||||
|
|
|
|||
|
|
@ -11,26 +11,24 @@ class GP_regression(GP):
|
|||
"""
|
||||
Gaussian Process model for regression
|
||||
|
||||
This is a thin wrapper around the GP class, with a set of sensible defalts
|
||||
This is a thin wrapper around the models.GP class, with a set of sensible defalts
|
||||
|
||||
:param X: input observations
|
||||
:param Y: observed values
|
||||
:param kernel: a GPy kernel, defaults to rbf+white
|
||||
:param kernel: a GPy kernel, defaults to rbf
|
||||
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
||||
:type normalize_X: False|True
|
||||
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
|
||||
:type normalize_Y: False|True
|
||||
:param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
|
||||
:rtype: model object
|
||||
|
||||
.. Note:: Multiple independent outputs are allowed using columns of Y
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None):
|
||||
def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False):
|
||||
if kernel is None:
|
||||
kernel = kern.rbf(X.shape[1])
|
||||
|
||||
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
|
||||
|
||||
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X, Xslices=Xslices)
|
||||
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ from sparse_GP_regression import sparse_GP_regression
|
|||
from GPLVM import GPLVM
|
||||
from warped_GP import warpedGP
|
||||
from sparse_GPLVM import sparse_GPLVM
|
||||
from uncollapsed_sparse_GP import uncollapsed_sparse_GP
|
||||
from Bayesian_GPLVM import Bayesian_GPLVM
|
||||
from mrd import MRD
|
||||
from generalized_FITC import generalized_FITC
|
||||
|
|
|
|||
|
|
@ -9,6 +9,12 @@ from .. import kern
|
|||
from scipy import stats, linalg
|
||||
from sparse_GP import sparse_GP
|
||||
|
||||
def backsub_both_sides(L,X):
|
||||
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
|
||||
tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1)
|
||||
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T
|
||||
|
||||
|
||||
class generalized_FITC(sparse_GP):
|
||||
"""
|
||||
Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC.
|
||||
|
|
@ -23,20 +29,19 @@ class generalized_FITC(sparse_GP):
|
|||
:type X_variance: np.ndarray (N x Q) | None
|
||||
:param Z: inducing inputs (optional, see note)
|
||||
:type Z: np.ndarray (M x Q) | None
|
||||
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
|
||||
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
|
||||
:type M: int
|
||||
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
|
||||
:type normalize_(X|Y): bool
|
||||
"""
|
||||
|
||||
def __init__(self, X, likelihood, kernel, Z, X_variance=None, Xslices=None,Zslices=None, normalize_X=False):
|
||||
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
|
||||
|
||||
self.Z = Z
|
||||
self.M = self.Z.shape[0]
|
||||
self._precision = likelihood.precision
|
||||
self.true_precision = likelihood.precision
|
||||
|
||||
sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, Xslices=None,Zslices=None, normalize_X=False)
|
||||
sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False)
|
||||
|
||||
def _set_params(self, p):
|
||||
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
|
||||
|
|
@ -52,13 +57,16 @@ class generalized_FITC(sparse_GP):
|
|||
|
||||
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
|
||||
this function does nothing
|
||||
|
||||
Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in sparse_GP.
|
||||
The true precison is now 'true_precision' not 'precision'.
|
||||
"""
|
||||
if self.has_uncertain_inputs:
|
||||
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
|
||||
else:
|
||||
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
|
||||
self._precision = self.likelihood.precision # Save the true precision
|
||||
self.likelihood.precision = self._precision/(1. + self._precision*self.Diag0[:,None]) # Add the diagonal element of the FITC approximation
|
||||
self.true_precision = self.likelihood.precision # Save the true precision
|
||||
self.likelihood.precision = self.true_precision/(1. + self.true_precision*self.Diag0[:,None]) # Add the diagonal element of the FITC approximation
|
||||
self._set_params(self._get_params()) # update the GP
|
||||
|
||||
def _FITC_computations(self):
|
||||
|
|
@ -70,23 +78,23 @@ class generalized_FITC(sparse_GP):
|
|||
- removes the extra terms computed in the sparse_GP approximation
|
||||
- computes the likelihood gradients wrt the true precision.
|
||||
"""
|
||||
#NOTE the true precison is now '_precison' not 'precision'
|
||||
#NOTE the true precison is now 'true_precision' not 'precision'
|
||||
if self.likelihood.is_heteroscedastic:
|
||||
|
||||
# Compute generalized FITC's diagonal term of the covariance
|
||||
self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1)
|
||||
self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1)
|
||||
Lmipsi1 = np.dot(self.Lmi,self.psi1)
|
||||
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1)
|
||||
#self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm)
|
||||
#self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1)
|
||||
#a = kj
|
||||
self.Diag0 = self.psi0 - np.diag(self.Qnn)
|
||||
Iplus_Dprod_i = 1./(1.+ self.Diag0 * self._precision.flatten())
|
||||
Iplus_Dprod_i = 1./(1.+ self.Diag0 * self.true_precision.flatten())
|
||||
self.Diag = self.Diag0 * Iplus_Dprod_i
|
||||
#self.Diag = self.Diag0/(1.+ self.Diag0 * self._precision.flatten())
|
||||
|
||||
|
||||
self.P = Iplus_Dprod_i[:,None] * self.psi1.T
|
||||
#self.P = (self.Diag / self.Diag0)[:,None] * self.psi1.T
|
||||
self.RPT0 = np.dot(self.Lmi,self.psi1)
|
||||
self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T))
|
||||
#self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - Iplus_Dprod_i/self.Diag0)[:,None]*self.RPT0.T))
|
||||
#self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T))
|
||||
self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1)
|
||||
self.RPT = np.dot(self.R,self.P.T)
|
||||
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
|
||||
|
|
@ -95,7 +103,16 @@ class generalized_FITC(sparse_GP):
|
|||
self.mu = self.w + np.dot(self.P,self.gamma)
|
||||
|
||||
# Remove extra term from dL_dpsi1
|
||||
self.dL_dpsi1 -= mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
|
||||
self.dL_dpsi1 -= mdot(self.Lmi.T,Lmipsi1*self.likelihood.precision.flatten().reshape(1,self.N))
|
||||
#self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm)
|
||||
#self.dL_dpsi1 -= mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
|
||||
|
||||
#########333333
|
||||
#self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
|
||||
#########333333
|
||||
|
||||
|
||||
|
||||
else:
|
||||
raise NotImplementedError, "homoscedastic fitc not implemented"
|
||||
# Remove extra term from dL_dpsi1
|
||||
|
|
@ -141,11 +158,14 @@ class generalized_FITC(sparse_GP):
|
|||
A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
|
||||
else:
|
||||
A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT
|
||||
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
|
||||
D = 0.5*np.trace(self.Cpsi1VVpsi1)
|
||||
C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.M*np.log(sf2))
|
||||
#C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
|
||||
D = 0.5*np.sum(np.square(self._LBi_Lmi_psi1V))
|
||||
#self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
|
||||
#D_ = 0.5*np.trace(self.Cpsi1VVpsi1)
|
||||
return A+C+D
|
||||
|
||||
def _raw_predict(self, Xnew, slices, full_cov=False):
|
||||
def _raw_predict(self, Xnew, which_parts, full_cov=False):
|
||||
if self.likelihood.is_heteroscedastic:
|
||||
"""
|
||||
Make a prediction for the generalized FITC model
|
||||
|
|
@ -174,16 +194,16 @@ class generalized_FITC(sparse_GP):
|
|||
self.mu_H = mu_H
|
||||
Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
|
||||
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
|
||||
Kx = self.kern.K(self.Z, Xnew)
|
||||
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
|
||||
KR0T = np.dot(Kx.T,self.Lmi.T)
|
||||
mu_star = np.dot(KR0T,mu_H)
|
||||
if full_cov:
|
||||
Kxx = self.kern.K(Xnew)
|
||||
Kxx = self.kern.K(Xnew,which_parts=which_parts)
|
||||
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
|
||||
else:
|
||||
Kxx = self.kern.Kdiag(Xnew)
|
||||
Kxx_ = self.kern.K(Xnew)
|
||||
var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
|
||||
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
|
||||
Kxx_ = self.kern.K(Xnew,which_parts=which_parts) # TODO: RA, is this line needed?
|
||||
var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) # TODO: RA, is this line needed?
|
||||
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None]
|
||||
return mu_star[:,None],var
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -271,90 +271,52 @@ class MRD(model):
|
|||
self.Z = Z
|
||||
return Z
|
||||
|
||||
def plot_X_1d(self, colors=None):
|
||||
fig = pylab.figure(num="MRD X 1d", figsize=(min(8, (3 * len(self.bgplvms))), min(12, (2 * self.X.shape[1]))))
|
||||
fig.clf()
|
||||
ax1 = fig.add_subplot(self.X.shape[1], 1, 1)
|
||||
if colors is None:
|
||||
colors = ax1._get_lines.color_cycle
|
||||
ax1.plot(self.X, c='k', alpha=.3)
|
||||
plots = ax1.plot(self.X.T[0], c=colors.next())
|
||||
ax1.fill_between(numpy.arange(self.X.shape[0]),
|
||||
self.X.T[0] - 2 * numpy.sqrt(self.gref.X_variance.T[0]),
|
||||
self.X.T[0] + 2 * numpy.sqrt(self.gref.X_variance.T[0]),
|
||||
facecolor=plots[-1].get_color(),
|
||||
alpha=.3)
|
||||
ax1.text(1, 1, r"$\mathbf{{X_{}}}".format(1),
|
||||
horizontalalignment='right',
|
||||
verticalalignment='top',
|
||||
transform=ax1.transAxes)
|
||||
for i in range(self.X.shape[1] - 1):
|
||||
ax = fig.add_subplot(self.X.shape[1], 1, i + 2)
|
||||
ax.plot(self.X, c='k', alpha=.3)
|
||||
plots.extend(ax.plot(self.X.T[i + 1], c=colors.next()))
|
||||
ax.fill_between(numpy.arange(self.X.shape[0]),
|
||||
self.X.T[i + 1] - 2 * numpy.sqrt(self.gref.X_variance.T[i + 1]),
|
||||
self.X.T[i + 1] + 2 * numpy.sqrt(self.gref.X_variance.T[i + 1]),
|
||||
facecolor=plots[-1].get_color(),
|
||||
alpha=.3)
|
||||
if i < self.X.shape[1] - 2:
|
||||
ax.set_xticklabels('')
|
||||
ax1.set_xticklabels('')
|
||||
# ax1.legend(plots, [r"$\mathbf{{X_{}}}$".format(i + 1) for i in range(self.X.shape[1])],
|
||||
# bbox_to_anchor=(0., 1 + .01 * self.X.shape[1],
|
||||
# 1., 1. + .01 * self.X.shape[1]), loc=3,
|
||||
# ncol=self.X.shape[1], mode="expand", borderaxespad=0.)
|
||||
def _handle_plotting(self, fig_num, axes, plotf):
|
||||
if axes is None:
|
||||
fig = pylab.figure(num=fig_num, figsize=(4 * len(self.bgplvms), 3 * len(self.bgplvms)))
|
||||
for i, g in enumerate(self.bgplvms):
|
||||
if axes is None:
|
||||
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
|
||||
else:
|
||||
ax = axes[i]
|
||||
plotf(i, g, ax)
|
||||
pylab.draw()
|
||||
fig.tight_layout(h_pad=.01, rect=(0, 0, 1, .95))
|
||||
if axes is None:
|
||||
fig.tight_layout()
|
||||
return fig
|
||||
else:
|
||||
return pylab.gcf()
|
||||
|
||||
def plot_X(self, fig_num="MRD Predictions", axes=None):
|
||||
fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.X))
|
||||
return fig
|
||||
|
||||
def plot_X(self):
|
||||
fig = pylab.figure("MRD X", figsize=(4 * len(self.bgplvms), 3))
|
||||
fig.clf()
|
||||
for i, g in enumerate(self.bgplvms):
|
||||
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
|
||||
ax.imshow(g.X)
|
||||
pylab.draw()
|
||||
fig.tight_layout()
|
||||
def plot_predict(self, fig_num="MRD Predictions", axes=None):
|
||||
fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.predict(g.X)[0]))
|
||||
return fig
|
||||
|
||||
def plot_predict(self):
|
||||
fig = pylab.figure("MRD Predictions", figsize=(4 * len(self.bgplvms), 3))
|
||||
fig.clf()
|
||||
for i, g in enumerate(self.bgplvms):
|
||||
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
|
||||
ax.imshow(g.predict(g.X)[0])
|
||||
pylab.draw()
|
||||
fig.tight_layout()
|
||||
def plot_scales(self, fig_num="MRD Scales", axes=None, *args, **kwargs):
|
||||
fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: g.kern.plot_ARD(ax=ax, *args, **kwargs))
|
||||
return fig
|
||||
|
||||
def plot_scales(self, *args, **kwargs):
|
||||
fig = pylab.figure("MRD Scales", figsize=(4 * len(self.bgplvms), 3))
|
||||
fig.clf()
|
||||
for i, g in enumerate(self.bgplvms):
|
||||
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
|
||||
g.kern.plot_ARD(ax=ax, *args, **kwargs)
|
||||
pylab.draw()
|
||||
fig.tight_layout()
|
||||
return fig
|
||||
|
||||
def plot_latent(self, *args, **kwargs):
|
||||
fig = pylab.figure("MRD Latent Spaces", figsize=(4 * len(self.bgplvms), 3))
|
||||
fig.clf()
|
||||
for i, g in enumerate(self.bgplvms):
|
||||
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
|
||||
g.plot_latent(ax=ax, *args, **kwargs)
|
||||
pylab.draw()
|
||||
fig.tight_layout()
|
||||
def plot_latent(self, fig_num="MRD Latent Spaces", axes=None, *args, **kwargs):
|
||||
fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: g.plot_latent(ax=ax, *args, **kwargs))
|
||||
return fig
|
||||
|
||||
def _debug_plot(self):
|
||||
self.plot_X()
|
||||
self.plot_X_1d()
|
||||
self.plot_latent()
|
||||
self.plot_scales()
|
||||
fig = pylab.figure("MRD DEBUG PLOT", figsize=(4 * len(self.bgplvms), 9))
|
||||
fig.clf()
|
||||
axes = [fig.add_subplot(3, len(self.bgplvms), i + 1) for i in range(len(self.bgplvms))]
|
||||
self.plot_X(axes=axes)
|
||||
axes = [fig.add_subplot(3, len(self.bgplvms), i + len(self.bgplvms) + 1) for i in range(len(self.bgplvms))]
|
||||
self.plot_latent(axes=axes)
|
||||
axes = [fig.add_subplot(3, len(self.bgplvms), i + 2 * len(self.bgplvms) + 1) for i in range(len(self.bgplvms))]
|
||||
self.plot_scales(axes=axes)
|
||||
pylab.draw()
|
||||
fig.tight_layout()
|
||||
|
||||
def _debug_optimize(self, opt='scg', maxiters=500, itersteps=10):
|
||||
def _debug_optimize(self, opt='scg', maxiters=5000, itersteps=10):
|
||||
iters = 0
|
||||
optstep = lambda: self.optimize(opt, messages=1, max_f_eval=itersteps)
|
||||
self._debug_plot()
|
||||
|
|
|
|||
|
|
@ -3,15 +3,16 @@
|
|||
|
||||
import numpy as np
|
||||
import pylab as pb
|
||||
from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
|
||||
from ..util.linalg import mdot, jitchol, tdot, symmetrify
|
||||
from ..util.plot import gpplot
|
||||
from .. import kern
|
||||
from GP import GP
|
||||
from scipy import linalg
|
||||
|
||||
#Still TODO:
|
||||
# make use of slices properly (kernel can now do this)
|
||||
# enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array)
|
||||
def backsub_both_sides(L,X):
|
||||
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
|
||||
tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1)
|
||||
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T
|
||||
|
||||
class sparse_GP(GP):
|
||||
"""
|
||||
|
|
@ -27,19 +28,16 @@ class sparse_GP(GP):
|
|||
:type X_variance: np.ndarray (N x Q) | None
|
||||
:param Z: inducing inputs (optional, see note)
|
||||
:type Z: np.ndarray (M x Q) | None
|
||||
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
|
||||
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
|
||||
:type M: int
|
||||
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
|
||||
:type normalize_(X|Y): bool
|
||||
"""
|
||||
|
||||
def __init__(self, X, likelihood, kernel, Z, X_variance=None, Xslices=None,Zslices=None, normalize_X=False):
|
||||
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
|
||||
self.scale_factor = 100.0# a scaling factor to help keep the algorithm stable
|
||||
self.auto_scale_factor = False
|
||||
self.Z = Z
|
||||
self.Zslices = Zslices
|
||||
self.Xslices = Xslices
|
||||
self.M = Z.shape[0]
|
||||
self.likelihood = likelihood
|
||||
|
||||
|
|
@ -50,10 +48,7 @@ class sparse_GP(GP):
|
|||
self.has_uncertain_inputs=True
|
||||
self.X_variance = X_variance
|
||||
|
||||
if not self.likelihood.is_heteroscedastic:
|
||||
self.likelihood.trYYT = np.trace(np.dot(self.likelihood.Y, self.likelihood.Y.T)) # TODO: something more elegant here?
|
||||
|
||||
GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X, Xslices=Xslices)
|
||||
GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X)
|
||||
|
||||
#normalize X uncertainty also
|
||||
if self.has_uncertain_inputs:
|
||||
|
|
@ -68,87 +63,89 @@ class sparse_GP(GP):
|
|||
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_variance).T
|
||||
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_variance)
|
||||
else:
|
||||
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)
|
||||
self.psi0 = self.kern.Kdiag(self.X)
|
||||
self.psi1 = self.kern.K(self.Z,self.X)
|
||||
self.psi2 = None
|
||||
|
||||
def _computations(self):
|
||||
#TODO: find routine to multiply triangular matrices
|
||||
#TODO: slices for psi statistics (easy enough)
|
||||
|
||||
sf = self.scale_factor
|
||||
sf2 = sf**2
|
||||
|
||||
#The rather complex computations of psi2_beta_scaled
|
||||
#factor Kmm
|
||||
self.Lm = jitchol(self.Kmm)
|
||||
|
||||
#The rather complex computations of self.A
|
||||
if self.likelihood.is_heteroscedastic:
|
||||
assert self.likelihood.D == 1 #TODO: what if the likelihood is heterscedatic and there are multiple independent outputs?
|
||||
if self.has_uncertain_inputs:
|
||||
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0)
|
||||
psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0)
|
||||
evals, evecs = linalg.eigh(psi2_beta_scaled)
|
||||
clipped_evals = np.clip(evals,0.,1e6) # TODO: make clipping configurable
|
||||
if not np.allclose(evals, clipped_evals):
|
||||
print "Warning: clipping posterior eigenvalues"
|
||||
tmp = evecs*np.sqrt(clipped_evals)
|
||||
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1)
|
||||
self.A = tdot(tmp)
|
||||
else:
|
||||
tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf)
|
||||
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
|
||||
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1)
|
||||
self.A = tdot(tmp)
|
||||
else:
|
||||
if self.has_uncertain_inputs:
|
||||
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0)
|
||||
psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0)
|
||||
evals, evecs = linalg.eigh(psi2_beta_scaled)
|
||||
clipped_evals = np.clip(evals,0.,1e6) # TODO: make clipping configurable
|
||||
if not np.allclose(evals, clipped_evals):
|
||||
print "Warning: clipping posterior eigenvalues"
|
||||
tmp = evecs*np.sqrt(clipped_evals)
|
||||
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1)
|
||||
self.A = tdot(tmp)
|
||||
else:
|
||||
tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
|
||||
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
|
||||
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1)
|
||||
self.A = tdot(tmp)
|
||||
|
||||
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
|
||||
#factor B
|
||||
self.B = np.eye(self.M)/sf2 + self.A
|
||||
self.LB = jitchol(self.B)
|
||||
|
||||
self.V = (self.likelihood.precision/self.scale_factor)*self.likelihood.Y
|
||||
|
||||
#Compute A = L^-1 psi2 beta L^-T
|
||||
#self. A = mdot(self.Lmi,self.psi2_beta_scaled,self.Lmi.T)
|
||||
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,self.psi2_beta_scaled.T,lower=1)[0]
|
||||
self.A = linalg.lapack.flapack.dtrtrs(self.Lm,np.asarray(tmp.T,order='F'),lower=1)[0]
|
||||
|
||||
self.B = np.eye(self.M)/sf2 + self.A
|
||||
|
||||
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
|
||||
|
||||
self.psi1V = np.dot(self.psi1, self.V)
|
||||
#tmp = np.dot(self.Lmi.T, self.LBi.T)
|
||||
tmp = linalg.lapack.clapack.dtrtrs(self.Lm.T,np.asarray(self.LBi.T,order='C'),lower=0)[0]
|
||||
self.C = np.dot(tmp,tmp.T) #TODO: tmp is triangular. replace with dtrmm (blas) when available
|
||||
self.Cpsi1V = np.dot(self.C,self.psi1V)
|
||||
self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
|
||||
#self.E = np.dot(self.Cpsi1VVpsi1,self.C)/sf2
|
||||
self.E = np.dot(self.Cpsi1V/sf,self.Cpsi1V.T/sf)
|
||||
|
||||
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
|
||||
self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
|
||||
self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T)
|
||||
if self.likelihood.is_heteroscedastic:
|
||||
if self.has_uncertain_inputs:
|
||||
self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
|
||||
self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
|
||||
self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
|
||||
else:
|
||||
self.dL_dpsi1 += mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
|
||||
self.dL_dpsi1 += -mdot(self.C,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)/sf2) #dC
|
||||
self.dL_dpsi1 += -mdot(self.E,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dD
|
||||
self.dL_dpsi2 = None
|
||||
|
||||
else:
|
||||
self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB
|
||||
self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C # dC
|
||||
self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E # dD
|
||||
if self.has_uncertain_inputs:
|
||||
#repeat for each of the N psi_2 matrices
|
||||
self.dL_dpsi2 = np.repeat(self.dL_dpsi2[None,:,:],self.N,axis=0)
|
||||
else:
|
||||
self.dL_dpsi1 += 2.*np.dot(self.dL_dpsi2,self.psi1)
|
||||
self.dL_dpsi2 = None
|
||||
|
||||
#back substutue C into psi1V
|
||||
tmp,info1 = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.psi1V),lower=1,trans=0)
|
||||
self._LBi_Lmi_psi1V,_ = linalg.lapack.flapack.dtrtrs(self.LB,np.asfortranarray(tmp),lower=1,trans=0)
|
||||
tmp,info2 = linalg.lapack.flapack.dpotrs(self.LB,tmp,lower=1)
|
||||
self.Cpsi1V,info3 = linalg.lapack.flapack.dtrtrs(self.Lm,tmp,lower=1,trans=1)
|
||||
|
||||
# Compute dL_dKmm
|
||||
#self.dL_dKmm_old = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
|
||||
#self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
|
||||
#self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
|
||||
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.A),lower=1,trans=1)[0]
|
||||
self.dL_dKmm = -0.5*self.D*sf2*linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0] #dA
|
||||
self.dL_dKmm += 0.5*(self.D*(self.C/sf2 -self.Kmmi) + self.E) + np.dot(np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1,self.Kmmi) # d(C+D)
|
||||
tmp = tdot(self._LBi_Lmi_psi1V)
|
||||
self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.D*np.eye(self.M) + tmp)
|
||||
tmp = -0.5*self.DBi_plus_BiPBi/sf2
|
||||
tmp += -0.5*self.B*sf2*self.D
|
||||
tmp += self.D*np.eye(self.M)
|
||||
self.dL_dKmm = backsub_both_sides(self.Lm,tmp)
|
||||
|
||||
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
|
||||
self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
|
||||
self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T)
|
||||
dL_dpsi2_beta = 0.5*backsub_both_sides(self.Lm,self.D*np.eye(self.M) - self.DBi_plus_BiPBi)
|
||||
if self.likelihood.is_heteroscedastic:
|
||||
if self.has_uncertain_inputs:
|
||||
self.dL_dpsi2 = self.likelihood.precision[:,None,None]*dL_dpsi2_beta[None,:,:]
|
||||
else:
|
||||
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta,self.psi1*self.likelihood.precision.reshape(1,self.N))
|
||||
self.dL_dpsi2 = None
|
||||
else:
|
||||
dL_dpsi2 = self.likelihood.precision*dL_dpsi2_beta
|
||||
if self.has_uncertain_inputs:
|
||||
#repeat for each of the N psi_2 matrices
|
||||
self.dL_dpsi2 = np.repeat(dL_dpsi2[None,:,:],self.N,axis=0)
|
||||
else:
|
||||
#subsume back into psi1 (==Kmn)
|
||||
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2,self.psi1)
|
||||
self.dL_dpsi2 = None
|
||||
|
||||
|
||||
#the partial derivative vector for the likelihood
|
||||
if self.likelihood.Nparams ==0:
|
||||
|
|
@ -156,16 +153,11 @@ class sparse_GP(GP):
|
|||
self.partial_for_likelihood = None
|
||||
elif self.likelihood.is_heteroscedastic:
|
||||
raise NotImplementedError, "heteroscedatic derivates not implemented"
|
||||
#self.partial_for_likelihood = - 0.5 * self.D*self.likelihood.precision + 0.5 * (self.likelihood.Y**2).sum(1)*self.likelihood.precision**2 #dA
|
||||
#self.partial_for_likelihood += 0.5 * self.D * (self.psi0*self.likelihood.precision**2 - (self.psi2*self.Kmmi[None,:,:]*self.likelihood.precision[:,None,None]**2).sum(1).sum(1)/sf2) #dB
|
||||
#self.partial_for_likelihood += 0.5 * self.D * np.sum(self.Bi*self.A)*self.likelihood.precision #dC
|
||||
#self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD
|
||||
else:
|
||||
#likelihood is not heterscedatic
|
||||
self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2
|
||||
self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * self.likelihood.trYYT*self.likelihood.precision**2
|
||||
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2)
|
||||
self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision
|
||||
self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))
|
||||
self.partial_for_likelihood += self.likelihood.precision*(0.5*np.sum(self.A*self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
|
||||
|
||||
|
||||
|
||||
|
|
@ -178,8 +170,8 @@ class sparse_GP(GP):
|
|||
else:
|
||||
A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT
|
||||
B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2)
|
||||
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
|
||||
D = 0.5*np.trace(self.Cpsi1VVpsi1)
|
||||
C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.M*np.log(sf2))
|
||||
D = 0.5*np.sum(np.square(self._LBi_Lmi_psi1V))
|
||||
return A+B+C+D
|
||||
|
||||
def _set_params(self, p):
|
||||
|
|
@ -187,13 +179,14 @@ class sparse_GP(GP):
|
|||
self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
|
||||
self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
|
||||
self._compute_kernel_matrices()
|
||||
if self.auto_scale_factor:
|
||||
self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
|
||||
#if self.auto_scale_factor:
|
||||
# if self.likelihood.is_heteroscedastic:
|
||||
# self.scale_factor = max(1,np.sqrt(self.psi2_beta_scaled.sum(0).mean()))
|
||||
# else:
|
||||
# self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
|
||||
# self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
|
||||
#if self.auto_scale_factor:
|
||||
#if self.likelihood.is_heteroscedastic:
|
||||
#self.scale_factor = max(100,np.sqrt(self.psi2_beta_scaled.sum(0).mean()))
|
||||
#else:
|
||||
#self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
|
||||
self.scale_factor = 1.
|
||||
self._computations()
|
||||
|
||||
def _get_params(self):
|
||||
|
|
@ -239,24 +232,28 @@ class sparse_GP(GP):
|
|||
"""
|
||||
The derivative of the bound wrt the inducing inputs Z
|
||||
"""
|
||||
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
|
||||
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
|
||||
if self.has_uncertain_inputs:
|
||||
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_variance)
|
||||
dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_variance) # 'stripes'
|
||||
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
|
||||
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
|
||||
else:
|
||||
dL_dZ += self.kern.dK_dX(self.dL_dpsi1,self.Z,self.X)
|
||||
dL_dZ += self.kern.dK_dX(self.dL_dpsi1, self.Z, self.X)
|
||||
return dL_dZ
|
||||
|
||||
def _raw_predict(self, Xnew, slices, full_cov=False):
|
||||
def _raw_predict(self, Xnew, which_parts='all', full_cov=False):
|
||||
"""Internal helper function for making predictions, does not account for normalization"""
|
||||
|
||||
Kx = self.kern.K(self.Z, Xnew)
|
||||
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
|
||||
Bi,_ = linalg.lapack.flapack.dpotri(self.LB,lower=0) # WTH? this lower switch should be 1, but that doesn't work!
|
||||
symmetrify(Bi)
|
||||
Kmmi_LmiBLmi = backsub_both_sides(self.Lm,np.eye(self.M) - Bi)
|
||||
|
||||
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
|
||||
mu = np.dot(Kx.T, self.Cpsi1V/self.scale_factor)
|
||||
if full_cov:
|
||||
Kxx = self.kern.K(Xnew)
|
||||
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
|
||||
Kxx = self.kern.K(Xnew,which_parts=which_parts)
|
||||
var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) #NOTE this won't work for plotting
|
||||
else:
|
||||
Kxx = self.kern.Kdiag(Xnew)
|
||||
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
|
||||
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
|
||||
var = Kxx - np.sum(Kx*np.dot(Kmmi_LmiBLmi, Kx),0)
|
||||
|
||||
return mu,var[:,None]
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ class sparse_GP_regression(sparse_GP):
|
|||
"""
|
||||
Gaussian Process model for regression
|
||||
|
||||
This is a thin wrapper around the GP class, with a set of sensible defalts
|
||||
This is a thin wrapper around the sparse_GP class, with a set of sensible defalts
|
||||
|
||||
:param X: input observations
|
||||
:param Y: observed values
|
||||
|
|
@ -22,25 +22,25 @@ class sparse_GP_regression(sparse_GP):
|
|||
:type normalize_X: False|True
|
||||
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
|
||||
:type normalize_Y: False|True
|
||||
:param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
|
||||
:rtype: model object
|
||||
|
||||
.. Note:: Multiple independent outputs are allowed using columns of Y
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None,Z=None, M=10):
|
||||
#kern defaults to rbf
|
||||
def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10):
|
||||
#kern defaults to rbf (plus white for stability)
|
||||
if kernel is None:
|
||||
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
|
||||
|
||||
#Z defaults to a subset of the data
|
||||
if Z is None:
|
||||
Z = np.random.permutation(X.copy())[:M]
|
||||
i = np.random.permutation(X.shape[0])[:M]
|
||||
Z = X[i].copy()
|
||||
else:
|
||||
assert Z.shape[1]==X.shape[1]
|
||||
|
||||
#likelihood defaults to Gaussian
|
||||
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
|
||||
|
||||
sparse_GP.__init__(self, X, likelihood, kernel, Z, normalize_X=normalize_X, Xslices=Xslices)
|
||||
sparse_GP.__init__(self, X, likelihood, kernel, Z, normalize_X=normalize_X)
|
||||
|
|
|
|||
|
|
@ -1,151 +0,0 @@
|
|||
# Copyright (c) 2012 James Hensman
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
import pylab as pb
|
||||
from ..util.linalg import mdot, jitchol, chol_inv, pdinv
|
||||
from .. import kern
|
||||
from ..likelihoods import likelihood
|
||||
from sparse_GP import sparse_GP
|
||||
|
||||
class uncollapsed_sparse_GP(sparse_GP):
|
||||
"""
|
||||
Variational sparse GP model (Regression), where the approximating distribution q(u) is represented explicitly
|
||||
|
||||
:param X: inputs
|
||||
:type X: np.ndarray (N x Q)
|
||||
:param likelihood: GPy likelihood class, containing observed data
|
||||
:param q_u: canonical parameters of the distribution squasehd into a 1D array
|
||||
:type q_u: np.ndarray
|
||||
:param kernel : the kernel/covariance function. See link kernels
|
||||
:type kernel: a GPy kernel
|
||||
:param Z: inducing inputs (optional, see note)
|
||||
:type Z: np.ndarray (M x Q) | None
|
||||
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
|
||||
:param normalize_X : whether to normalize the data before computing (predictions will be in original scales)
|
||||
:type normalize_X: bool
|
||||
"""
|
||||
|
||||
def __init__(self, X, likelihood, kernel, Z, q_u=None, **kwargs):
|
||||
self.M = Z.shape[0]
|
||||
if q_u is None:
|
||||
q_u = np.hstack((np.random.randn(self.M*likelihood.D),-0.5*np.eye(self.M).flatten()))
|
||||
self.likelihood = likelihood
|
||||
self.set_vb_param(q_u)
|
||||
sparse_GP.__init__(self, X, likelihood, kernel, Z, **kwargs)
|
||||
|
||||
def _computations(self):
|
||||
# kernel computations, using BGPLVM notation
|
||||
self.Kmm = self.kern.K(self.Z)
|
||||
if self.has_uncertain_inputs:
|
||||
raise NotImplementedError
|
||||
else:
|
||||
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)
|
||||
self.psi1 = self.kern.K(self.Z,self.X)
|
||||
if self.likelihood.is_heteroscedastic:
|
||||
raise NotImplementedError
|
||||
else:
|
||||
tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
|
||||
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
|
||||
self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:]
|
||||
|
||||
|
||||
self.V = self.likelihood.precision*self.Y
|
||||
self.VmT = np.dot(self.V,self.q_u_expectation[0].T)
|
||||
self.psi1V = np.dot(self.psi1, self.V)
|
||||
self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
|
||||
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
|
||||
self.A = mdot(self.Lmi, self.beta*self.psi2, self.Lmi.T)
|
||||
self.B = np.eye(self.M) + self.A
|
||||
self.Lambda = mdot(self.Lmi.T,self.B,self.Lmi)
|
||||
self.trace_K = self.psi0 - np.trace(self.A)/self.beta
|
||||
self.projected_mean = mdot(self.psi1.T,self.Kmmi,self.q_u_expectation[0])
|
||||
|
||||
# Compute dL_dpsi
|
||||
self.dL_dpsi0 = - 0.5 * self.likelihood.D * self.beta * np.ones(self.N)
|
||||
self.dL_dpsi1 = np.dot(self.VmT,self.Kmmi).T # This is the correct term for E I think...
|
||||
self.dL_dpsi2 = 0.5 * self.beta * self.likelihood.D * (self.Kmmi - mdot(self.Kmmi,self.q_u_expectation[1],self.Kmmi))
|
||||
|
||||
# Compute dL_dKmm
|
||||
tmp = self.beta*mdot(self.psi2,self.Kmmi,self.q_u_expectation[1]) -np.dot(self.q_u_expectation[0],self.psi1V.T)
|
||||
tmp += tmp.T
|
||||
tmp += self.likelihood.D*(-self.beta*self.psi2 - self.Kmm + self.q_u_expectation[1])
|
||||
self.dL_dKmm = 0.5*mdot(self.Kmmi,tmp,self.Kmmi)
|
||||
|
||||
#Compute the gradient of the log likelihood wrt noise variance
|
||||
#TODO: suport heteroscedatic noise
|
||||
dbeta = 0.5 * self.N*self.likelihood.D/self.beta
|
||||
dbeta += - 0.5 * self.likelihood.D * self.trace_K
|
||||
dbeta += - 0.5 * self.likelihood.D * np.sum(self.q_u_expectation[1]*mdot(self.Kmmi,self.psi2,self.Kmmi))
|
||||
dbeta += - 0.5 * self.trYYT
|
||||
dbeta += np.sum(np.dot(self.Y.T,self.projected_mean))
|
||||
self.partial_for_likelihood = -dbeta*self.likelihood.precision**2
|
||||
|
||||
def log_likelihood(self):
|
||||
"""
|
||||
Compute the (lower bound on the) log marginal likelihood
|
||||
"""
|
||||
A = -0.5*self.N*self.likelihood.D*(np.log(2.*np.pi) - np.log(self.beta))
|
||||
B = -0.5*self.beta*self.likelihood.D*self.trace_K
|
||||
C = -0.5*self.likelihood.D *(self.Kmm_logdet-self.q_u_logdet + np.sum(self.Lambda * self.q_u_expectation[1]) - self.M)
|
||||
D = -0.5*self.beta*self.trYYT
|
||||
E = np.sum(np.dot(self.V.T,self.projected_mean))
|
||||
return A+B+C+D+E
|
||||
|
||||
def _raw_predict(self, Xnew, slices,full_cov=False):
|
||||
"""Internal helper function for making predictions, does not account for normalization"""
|
||||
Kx = self.kern.K(Xnew,self.Z)
|
||||
mu = mdot(Kx,self.Kmmi,self.q_u_expectation[0])
|
||||
|
||||
tmp = self.Kmmi- mdot(self.Kmmi,self.q_u_cov,self.Kmmi)
|
||||
if full_cov:
|
||||
Kxx = self.kern.K(Xnew)
|
||||
var = Kxx - mdot(Kx,tmp,Kx.T)
|
||||
else:
|
||||
Kxx = self.kern.Kdiag(Xnew)
|
||||
var = (Kxx - np.sum(Kx*np.dot(Kx,tmp),1))[:,None]
|
||||
return mu,var
|
||||
|
||||
|
||||
def set_vb_param(self,vb_param):
|
||||
"""set the distribution q(u) from the canonical parameters"""
|
||||
self.q_u_prec = -2.*vb_param[-self.M**2:].reshape(self.M, self.M)
|
||||
self.q_u_cov, q_u_Li, q_u_L, tmp = pdinv(self.q_u_prec)
|
||||
self.q_u_logdet = -tmp
|
||||
self.q_u_mean = np.dot(self.q_u_cov,vb_param[:self.M*self.likelihood.D].reshape(self.M,self.likelihood.D))
|
||||
|
||||
self.q_u_expectation = (self.q_u_mean, np.dot(self.q_u_mean,self.q_u_mean.T)+self.q_u_cov*self.likelihood.D)
|
||||
|
||||
self.q_u_canonical = (np.dot(self.q_u_prec, self.q_u_mean),-0.5*self.q_u_prec)
|
||||
#TODO: computations now?
|
||||
|
||||
def get_vb_param(self):
|
||||
"""
|
||||
Return the canonical parameters of the distribution q(u)
|
||||
"""
|
||||
return np.hstack([e.flatten() for e in self.q_u_canonical])
|
||||
|
||||
def vb_grad_natgrad(self):
|
||||
"""
|
||||
Compute the gradients of the lower bound wrt the canonical and
|
||||
Expectation parameters of u.
|
||||
|
||||
Note that the natural gradient in either is given by the gradient in the other (See Hensman et al 2012 Fast Variational inference in the conjugate exponential Family)
|
||||
"""
|
||||
dL_dmmT_S = -0.5*self.Lambda-self.q_u_canonical[1]
|
||||
dL_dm = np.dot(self.Kmmi,self.psi1V) - np.dot(self.Lambda,self.q_u_mean)
|
||||
|
||||
#dL_dSim =
|
||||
#dL_dmhSi =
|
||||
|
||||
return np.hstack((dL_dm.flatten(),dL_dmmT_S.flatten())) # natgrad only, grad TODO
|
||||
|
||||
|
||||
def plot(self, *args, **kwargs):
|
||||
"""
|
||||
add the distribution q(u) to the plot from sparse_GP
|
||||
"""
|
||||
sparse_GP.plot(self,*args,**kwargs)
|
||||
if self.Q==1:
|
||||
pb.errorbar(self.Z[:,0],self.q_u_expectation[0][:,0],yerr=2.*np.sqrt(np.diag(self.q_u_cov)),fmt=None,ecolor='b')
|
||||
|
||||
|
|
@ -14,7 +14,7 @@ from .. import likelihoods
|
|||
from .. import kern
|
||||
|
||||
class warpedGP(GP):
|
||||
def __init__(self, X, Y, kernel=None, warping_function = None, warping_terms = 3, normalize_X=False, normalize_Y=False, Xslices=None):
|
||||
def __init__(self, X, Y, kernel=None, warping_function = None, warping_terms = 3, normalize_X=False, normalize_Y=False):
|
||||
|
||||
if kernel is None:
|
||||
kernel = kern.rbf(X.shape[1])
|
||||
|
|
@ -29,7 +29,7 @@ class warpedGP(GP):
|
|||
self.predict_in_warped_space = False
|
||||
likelihood = likelihoods.Gaussian(self.transform_data(), normalize=normalize_Y)
|
||||
|
||||
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X, Xslices=Xslices)
|
||||
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
||||
|
||||
def _scale_data(self, Y):
|
||||
self._Ymax = Y.max()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue