Merged conflict of model.py

This commit is contained in:
Neil Lawrence 2013-09-14 20:05:42 +01:00
commit dffa9541c2
42 changed files with 2297 additions and 453 deletions

View file

@ -140,7 +140,6 @@ class FITC(SparseGP):
dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T)
alpha = mdot(LBiLmipsi1,self.V_star)

View file

@ -19,9 +19,6 @@ class GP(GPBase):
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:rtype: model object
:param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
:param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.]
:type powerep: list
.. Note:: Multiple independent outputs are allowed using columns of Y
@ -105,7 +102,12 @@ class GP(GPBase):
Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta
"""
return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
#return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
if not isinstance(self.likelihood,EP):
tmp = np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
else:
tmp = np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
return tmp
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False):
"""
@ -136,7 +138,7 @@ class GP(GPBase):
:type Xnew: np.ndarray, Nnew x self.input_dim
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
:param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
@ -153,5 +155,71 @@ class GP(GPBase):
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args)
return mean, var, _025pm, _975pm
def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False):
"""
For a specific output, predict the function at the new point(s) Xnew.
Arguments
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param output: output to predict
:type output: integer in {0,..., num_outputs-1}
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
.. Note:: For multiple output models only
"""
assert hasattr(self,'multioutput')
index = np.ones_like(Xnew)*output
Xnew = np.hstack((Xnew,index))
# normalize X values
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output)
return mean, var, _025pm, _975pm
def _raw_predict_single_output(self, _Xnew, output=0, which_parts='all', full_cov=False,stop=False):
"""
Internal helper function for making predictions for a specific output,
does not account for normalization or likelihood
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param output: output to predict
:type output: integer in {0,..., num_outputs-1}
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just the diagonal
.. Note:: For multiple output models only
"""
assert hasattr(self,'multioutput')
# creates an index column and appends it to _Xnew
index = np.ones_like(_Xnew)*output
_Xnew = np.hstack((_Xnew,index))
Kx = self.kern.K(_Xnew,self.X,which_parts=which_parts).T
KiKx, _ = dpotrs(self.L, np.asfortranarray(Kx), lower=1)
mu = np.dot(KiKx.T, self.likelihood.Y)
if full_cov:
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
var = Kxx - np.dot(KiKx.T, Kx)
else:
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
var = Kxx - np.sum(np.multiply(KiKx, Kx), 0)
var = var[:, None]
if stop:
debug_this # @UndefinedVariable
return mu, var

View file

@ -57,34 +57,30 @@ class GPBase(Model):
self.X = state.pop()
Model.setstate(self, state)
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None,output=None):
"""
Plot the GP's view of the world, where the data is normalized and the
likelihood is Gaussian.
Plot the GP's view of the world, where the data is normalized and the
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- Not implemented in higher dimensions
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, we've no implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions
using which_data and which_functions
:param samples: the number of a posteriori samples to plot
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param full_cov:
:type full_cov: bool
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:param samples: the number of a posteriori samples to plot
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param full_cov:
:type full_cov: bool
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:param output: which output to plot (for multiple output models only)
:type output: integer (first output is 0)
"""
if which_data == 'all':
which_data = slice(None)
@ -93,7 +89,7 @@ class GPBase(Model):
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if self.X.shape[1] == 1:
if self.X.shape[1] == 1 and not hasattr(self,'multioutput'):
Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
if samples == 0:
m, v = self._raw_predict(Xnew, which_parts=which_parts)
@ -111,7 +107,7 @@ class GPBase(Model):
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 2:
elif self.X.shape[1] == 2 and not hasattr(self,'multioutput'):
resolution = resolution or 50
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
m, v = self._raw_predict(Xnew, which_parts=which_parts)
@ -120,17 +116,51 @@ class GPBase(Model):
ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable
ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1])
elif self.X.shape[1] == 2 and hasattr(self,'multioutput'):
output -= 1
assert self.num_outputs >= output, 'The model has only %s outputs.' %self.num_outputs
Xu = self.X[self.X[:,-1]==output ,0:1]
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
if samples == 0:
m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts)
gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5)
else:
m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts, full_cov=True)
Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
for i in range(samples):
ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
ax.set_xlim(xmin, xmax)
ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_ylim(ymin, ymax)
if hasattr(self,'Z'):
Zu = self.Z[self.Z[:,-1]==output,:]
Zu = self.Z * self._Xscale + self._Xoffset
Zu = self.Z[self.Z[:,-1]==output ,0:1] #??
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
elif self.X.shape[1] == 3 and hasattr(self,'multioutput'):
raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet"
output -= 1
assert self.num_outputs >= output, 'The model has only %s outputs.' %self.num_outputs
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, output=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
"""
Plot the GP with noise where the likelihood is Gaussian.
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, we've no implemented this yet !TODO!
- Not implemented in higher dimensions
Can plot only part of the data and part of the posterior functions
using which_data and which_functions
@ -151,15 +181,13 @@ class GPBase(Model):
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:type output: integer (first output is 0)
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param linecol: color of line to plot.
:type linecol:
:param fillcol: color of fill
:type fillcol:
:param levels: for 2D plotting, the number of contour levels to use
is ax is None, create a new figure
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
"""
# TODO include samples
if which_data == 'all':
@ -169,41 +197,81 @@ class GPBase(Model):
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
plotdims = self.input_dim - len(fixed_inputs)
if not hasattr(self,'multioutput'):
if plotdims == 1:
plotdims = self.input_dim - len(fixed_inputs)
if plotdims == 1:
resolution = resolution or 200
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
fixed_dims = np.array([i for i,v in fixed_inputs])
freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
fixed_dims = np.array([i for i,v in fixed_inputs])
freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
Xnew, xmin, xmax = x_frame1D(Xu[:,freedim], plot_limits=plot_limits)
Xgrid = np.empty((Xnew.shape[0],self.input_dim))
Xgrid[:,freedim] = Xnew
for i,v in fixed_inputs:
Xgrid[:,i] = v
Xnew, xmin, xmax = x_frame1D(Xu[:,freedim], plot_limits=plot_limits)
Xgrid = np.empty((Xnew.shape[0],self.input_dim))
Xgrid[:,freedim] = Xnew
for i,v in fixed_inputs:
Xgrid[:,i] = v
m, _, lower, upper = self.predict(Xgrid, which_parts=which_parts)
for d in range(m.shape[1]):
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
ax.plot(Xu[which_data,freedim], self.likelihood.data[which_data, d], 'kx', mew=1.5)
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
m, _, lower, upper = self.predict(Xgrid, which_parts=which_parts)
for d in range(m.shape[1]):
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
ax.plot(Xu[which_data,freedim], self.likelihood.data[which_data, d], 'kx', mew=1.5)
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 2: # FIXME
resolution = resolution or 50
Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T
ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
Yf = self.likelihood.data.flatten()
ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable
ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1])
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits,resolution=resolution)
m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
for d in range(m.shape[1]):
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax)
ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5)
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 2:
resolution = resolution or 50
Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T
ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
Yf = self.likelihood.Y.flatten()
ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable
ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1])
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
assert self.num_outputs > output, 'The model has only %s outputs.' %self.num_outputs
if self.X.shape[1] == 2:
resolution = resolution or 200
Xu = self.X[self.X[:,-1]==output,:] #keep the output of interest
Xu = self.X * self._Xscale + self._Xoffset
Xu = self.X[self.X[:,-1]==output ,0:1] #get rid of the index column
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
m, _, lower, upper = self.predict_single_output(Xnew, which_parts=which_parts,output=output)
for d in range(m.shape[1]):
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax)
ax.plot(Xu[which_data], self.likelihood.noise_model_list[output].data, 'kx', mew=1.5)
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 3:
raise NotImplementedError, "Plots not yet implemented for multioutput models with 2D inputs"
resolution = resolution or 50
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"

View file

@ -458,7 +458,7 @@ class Model(Parameterized):
numerical_gradient = (f1 - f2) / (2 * dx)
global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient==0, 1e-32, gradient)))
return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() - 1) < tolerance
return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() < tolerance)
else:
# check the gradient of each parameter individually, and do some pretty printing
try:
@ -549,7 +549,7 @@ class Model(Parameterized):
:type optimzer: string TODO: valid strings?
"""
assert isinstance(self.likelihood, likelihoods.EP), "pseudo_EM is only available for EP likelihoods"
assert isinstance(self.likelihood, likelihoods.EP) or isinstance(self.likelihood, likelihoods.EP_Mixed_Noise), "pseudo_EM is only available for EP likelihoods"
ll_change = epsilon + 1.
iteration = 0
last_ll = -np.inf

View file

@ -5,7 +5,7 @@ import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides, chol_inv, dtrtrs, dpotrs, dpotri
from scipy import linalg
from ..likelihoods import Gaussian
from ..likelihoods import Gaussian, EP,EP_Mixed_Noise
from gp_base import GPBase
class SparseGP(GPBase):
@ -109,7 +109,6 @@ class SparseGP(GPBase):
tmp, _ = dtrtrs(self._Lm, np.asfortranarray(tmp.T), lower=1)
self._A = tdot(tmp)
# factor B
self.B = np.eye(self.num_inducing) + self._A
self.LB = jitchol(self.B)
@ -139,6 +138,7 @@ class SparseGP(GPBase):
dL_dpsi2_beta = 0.5 * backsub_both_sides(self._Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
if self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs:
self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
else:
@ -160,9 +160,23 @@ class SparseGP(GPBase):
# save computation here.
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented"
if self.has_uncertain_inputs:
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
else:
Lmi_psi1, nil = dtrtrs(self._Lm, np.asfortranarray(self.psi1.T), lower=1, trans=0)
_LBi_Lmi_psi1, _ = dtrtrs(self.LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0)
_Bi_Lmi_psi1, _ = dtrtrs(self.LB.T, np.asfortranarray(_LBi_Lmi_psi1), lower=1, trans=0)
self.partial_for_likelihood = -0.5 * self.likelihood.precision + 0.5 * self.likelihood.V**2
self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0 - np.sum(Lmi_psi1**2,0))[:,None] * self.likelihood.precision**2
self.partial_for_likelihood += 0.5*np.sum(_Bi_Lmi_psi1*Lmi_psi1,0)[:,None]*self.likelihood.precision**2 #NOTE this term has numerical issues
self.partial_for_likelihood += -np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * self.likelihood.Y * self.likelihood.precision**2
self.partial_for_likelihood += 0.5*np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * self.likelihood.precision**2
else:
# likelihood is not heterscedatic
# likelihood is not heteroscedatic
self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self._A) * self.likelihood.precision)
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self._A * self.DBi_plus_BiPBi) - self.data_fit)
@ -298,7 +312,7 @@ class SparseGP(GPBase):
:type X_variance_new: np.ndarray, Nnew x self.input_dim
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
:param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
@ -322,18 +336,15 @@ class SparseGP(GPBase):
return mean, var, _025pm, _975pm
def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None):
def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None, output=None):
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if which_data is 'all':
which_data = slice(None)
GPBase.plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax)
# add the inducing inputs and some errorbars
if self.X.shape[1] == 1:
GPBase.plot(self, samples=0, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax, output=output)
if self.X.shape[1] == 1 and not hasattr(self,'multioutput'):
if self.has_uncertain_inputs:
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
@ -342,6 +353,109 @@ class SparseGP(GPBase):
Zu = self.Z * self._Xscale + self._Xoffset
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
elif self.X.shape[1] == 2:
elif self.X.shape[1] == 2 and not hasattr(self,'multioutput'):
Zu = self.Z * self._Xscale + self._Xoffset
ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
elif self.X.shape[1] == 2 and hasattr(self,'multioutput'):
Xu = self.X[self.X[:,-1]==output,:]
if self.has_uncertain_inputs:
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
Xu = self.X[self.X[:,-1]==output ,0:1] #??
ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
Zu = self.Z[self.Z[:,-1]==output,:]
Zu = self.Z * self._Xscale + self._Xoffset
Zu = self.Z[self.Z[:,-1]==output ,0:1] #??
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
#ax.set_ylim(ax.get_ylim()[0],)
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False):
"""
For a specific output, predict the function at the new point(s) Xnew.
Arguments
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param output: output to predict
:type output: integer in {0,..., num_outputs-1}
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
.. Note:: For multiple output models only
"""
assert hasattr(self,'multioutput')
index = np.ones_like(Xnew)*output
Xnew = np.hstack((Xnew,index))
# normalize X values
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output)
return mean, var, _025pm, _975pm
def _raw_predict_single_output(self, _Xnew, output=0, X_variance_new=None, which_parts='all', full_cov=False,stop=False):
"""
Internal helper function for making predictions for a specific output,
does not account for normalization or likelihood
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param output: output to predict
:type output: integer in {0,..., num_outputs-1}
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just the diagonal
.. Note:: For multiple output models only
"""
Bi, _ = dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work!
symmetrify(Bi)
Kmmi_LmiBLmi = backsub_both_sides(self._Lm, np.eye(self.num_inducing) - Bi)
if self.Cpsi1V is None:
psi1V = np.dot(self.psi1.T,self.likelihood.V)
tmp, _ = dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0)
tmp, _ = dpotrs(self.LB, tmp, lower=1)
self.Cpsi1V, _ = dtrtrs(self._Lm, tmp, lower=1, trans=1)
assert hasattr(self,'multioutput')
index = np.ones_like(_Xnew)*output
_Xnew = np.hstack((_Xnew,index))
if X_variance_new is None:
Kx = self.kern.K(self.Z, _Xnew, which_parts=which_parts)
mu = np.dot(Kx.T, self.Cpsi1V)
if full_cov:
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
else:
Kx = self.kern.psi1(self.Z, _Xnew, X_variance_new)
mu = np.dot(Kx, self.Cpsi1V)
if full_cov:
raise NotImplementedError, "TODO"
else:
Kxx = self.kern.psi0(self.Z, _Xnew, X_variance_new)
psi2 = self.kern.psi2(self.Z, _Xnew, X_variance_new)
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
return mu, var[:, None]

View file

@ -298,7 +298,7 @@ def bgplvm_simulation(optimize='scg',
return m
def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw):
D1, D2, D3, N, num_inducing, Q = 30, 10, 15, 60, 3, 10
D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
likelihood_list = [Gaussian(x, normalize=True) for x in Ylist]
@ -321,7 +321,7 @@ def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw):
if optimize:
print "Optimizing Model:"
m.optimize(messages=1, max_iters=8e3, max_f_eval=8e3, gtol=.1)
m.optimize(messages=1, max_iters=8e3, gtol=.1)
if plot:
m.plot_X_1d("MRD Latent Space 1D")
m.plot_scales("MRD Scales")

View file

@ -22,8 +22,8 @@ def coregionalisation_toy2(max_iters=100):
Y = np.vstack((Y1, Y2))
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
k2 = GPy.kern.coregionalise(2, 1)
k = k1**k2
k2 = GPy.kern.coregionalise(2,1)
k = k1**k2 #k = k1.prod(k2,tensor=True)
m = GPy.models.GPRegression(X, Y, kernel=k)
m.constrain_fixed('.*rbf_var', 1.)
# m.constrain_positive('.*kappa')
@ -46,32 +46,23 @@ def coregionalisation_toy(max_iters=100):
"""
X1 = np.random.rand(50, 1) * 8
X2 = np.random.rand(30, 1) * 5
index = np.vstack((np.zeros_like(X1), np.ones_like(X2)))
X = np.hstack((np.vstack((X1, X2)), index))
X = np.vstack((X1, X2))
Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
Y = np.vstack((Y1, Y2))
k1 = GPy.kern.rbf(1)
k2 = GPy.kern.coregionalise(2, 2)
k = k1**k2 #k1.prod(k2, tensor=True)
m = GPy.models.GPRegression(X, Y, kernel=k)
m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1])
m.constrain_fixed('.*rbf_var', 1.)
# m.constrain_positive('kappa')
m.optimize(max_iters=max_iters)
pb.figure()
Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1))))
Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1))))
mean, var, low, up = m.predict(Xtest1)
GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up)
mean, var, low, up = m.predict(Xtest2)
GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up)
pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2)
pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2)
fig, axes = pb.subplots(2,1)
m.plot(output=0,ax=axes[0])
m.plot(output=1,ax=axes[1])
axes[0].set_title('Output 0')
axes[1].set_title('Output 1')
return m
def coregionalisation_sparse(max_iters=100):
"""
A simple demonstration of coregionalisation on two sinusoidal functions using sparse approximations.
@ -86,31 +77,39 @@ def coregionalisation_sparse(max_iters=100):
num_inducing = 40
Z = np.hstack((np.random.rand(num_inducing, 1) * 8, np.random.randint(0, 2, num_inducing)[:, None]))
Z = np.hstack((np.random.rand(num_inducing, 1) * 8, np.random.randint(0, 2, num_inducing)[:, None]))
k1 = GPy.kern.rbf(1)
k2 = GPy.kern.coregionalise(2, 2)
k = k1**k2 #.prod(k2, tensor=True) # + GPy.kern.white(2,0.001)
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z)
m = GPy.models.SparseGPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1],num_inducing=20)
#k2 = GPy.kern.coregionalise(2, 2)
#k = k1**k2 #.prod(k2, tensor=True) # + GPy.kern.white(2,0.001)
#m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z)
m.constrain_fixed('.*rbf_var', 1.)
m.constrain_fixed('iip')
m.constrain_bounded('noise_variance', 1e-3, 1e-1)
#m.constrain_fixed('iip')
#m.constrain_bounded('noise_variance', 1e-3, 1e-1)
# m.optimize_restarts(5, robust=True, messages=1, max_iters=max_iters, optimizer='bfgs')
m.optimize(max_iters=max_iters)
fig, axes = pb.subplots(2,1)
m.plot(output=0,ax=axes[0])
m.plot(output=1,ax=axes[1])
axes[0].set_title('Output 0')
axes[1].set_title('Output 1')
# plotting:
pb.figure()
Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1))))
Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1))))
mean, var, low, up = m.predict(Xtest1)
GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up)
mean, var, low, up = m.predict(Xtest2)
GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up)
pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2)
pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2)
y = pb.ylim()[0]
pb.plot(Z[:, 0][Z[:, 1] == 0], np.zeros(np.sum(Z[:, 1] == 0)) + y, 'r|', mew=2)
pb.plot(Z[:, 0][Z[:, 1] == 1], np.zeros(np.sum(Z[:, 1] == 1)) + y, 'g|', mew=2)
#pb.figure()
#Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1))))
#Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1))))
#mean, var, low, up = m.predict(Xtest1)
#GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up)
#mean, var, low, up = m.predict(Xtest2)
#GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up)
#pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2)
#pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2)
#y = pb.ylim()[0]
#pb.plot(Z[:, 0][Z[:, 1] == 0], np.zeros(np.sum(Z[:, 1] == 0)) + y, 'r|', mew=2)
#pb.plot(Z[:, 0][Z[:, 1] == 1], np.zeros(np.sum(Z[:, 1] == 1)) + y, 'g|', mew=2)
return m
def epomeo_gpx(max_iters=100):
@ -401,8 +400,6 @@ def silhouette(max_iters=100):
print(m)
return m
def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100):
"""Run a 1D example of a sparse GP regression."""
# sample inputs and outputs

View file

@ -130,7 +130,7 @@ class opt_lbfgsb(Optimizer):
opt_dict['pgtol'] = self.gtol
opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint=iprint,
maxfun=self.max_f_eval, **opt_dict)
maxfun=self.max_iters, **opt_dict)
self.x_opt = opt_result[0]
self.f_opt = f_fp(self.x_opt)[0]
self.funct_eval = opt_result[2]['funcalls']

View file

@ -73,9 +73,9 @@ def gibbs(input_dim,variance=1., mapping=None):
Gibbs and MacKay non-stationary covariance function.
.. math::
r = sqrt((x_i - x_j)'*(x_i - x_j))
k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
Z = \sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')}
@ -89,7 +89,7 @@ def gibbs(input_dim,variance=1., mapping=None):
The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
:param input_dim: the number of input dimensions
:type input_dim: int
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param mapping: the mapping that gives the lengthscale across the input space.
@ -346,29 +346,30 @@ def symmetric(k):
k_.parts = [symmetric.Symmetric(p) for p in k.parts]
return k_
def coregionalise(output_dim, rank=1, W=None, kappa=None):
def coregionalise(num_outputs,W_columns=1, W=None, kappa=None):
"""
Coregionalisation kernel.
Used for computing covariance functions of the form
.. math::
k_2(x, y)=\mathbf{B} k(x, y)
where
Coregionlization matrix B, of the form:
.. math::
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}
:param output_dim: the number of output dimensions
:type output_dim: int
:param rank: the rank of the coregionalisation matrix.
:type rank: int
:param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B.
:type W: ndarray
:param kappa: a diagonal term which allows the outputs to behave independently.
An intrinsic/linear coregionalization kernel of the form
.. math::
k_2(x, y)=\mathbf{B} k(x, y)
it is obtainded as the tensor product between a kernel k(x,y) and B.
:param num_outputs: the number of outputs to corregionalise
:type num_outputs: int
:param W_columns: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
:type W_colunns: int
:param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B
:type W: numpy array of dimensionality (num_outpus, W_columns)
:param kappa: a vector which allows the outputs to behave independently
:type kappa: numpy array of dimensionality (num_outputs,)
:rtype: kernel object
.. Note: see coregionalisation examples in GPy.examples.regression for some usage.
"""
p = parts.coregionalise.Coregionalise(output_dim,rank,W,kappa)
p = parts.coregionalise.Coregionalise(num_outputs,W_columns,W,kappa)
return kern(1,[p])
@ -427,3 +428,31 @@ def hierarchical(k):
# assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
_parts = [parts.hierarchical.Hierarchical(k.parts)]
return kern(k.input_dim+len(k.parts),_parts)
def build_lcm(input_dim, num_outputs, kernel_list = [], W_columns=1,W=None,kappa=None):
"""
Builds a kernel of a linear coregionalization model
:input_dim: Input dimensionality
:num_outputs: Number of outputs
:kernel_list: List of coregionalized kernels, each element in the list will be multiplied by a different corregionalization matrix
:type kernel_list: list of GPy kernels
:param W_columns: number tuples of the corregionalization parameters 'coregion_W'
:type W_columns: integer
..Note the kernels dimensionality is overwritten to fit input_dim
"""
for k in kernel_list:
if k.input_dim <> input_dim:
k.input_dim = input_dim
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
k_coreg = coregionalise(num_outputs,W_columns,W,kappa)
kernel = kernel_list[0]**k_coreg.copy()
for k in kernel_list[1:]:
k_coreg = coregionalise(num_outputs,W_columns,W,kappa)
kernel += k**k_coreg.copy()
return kernel

View file

@ -421,19 +421,19 @@ class kern(Parameterized):
# TODO: input_slices needed
crossterms = 0
for p1, p2 in itertools.combinations(self.parts, 2):
for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self.parts, self.input_slices), 2):
if i_s1 == i_s2:
# TODO psi1 this must be faster/better/precached/more nice
tmp1 = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1)
tmp2 = np.zeros((mu.shape[0], Z.shape[0]))
p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2)
prod = np.multiply(tmp1, tmp2)
crossterms += prod[:, :, None] + prod[:, None, :]
# TODO psi1 this must be faster/better/precached/more nice
tmp1 = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, tmp1)
tmp2 = np.zeros((mu.shape[0], Z.shape[0]))
p2.psi1(Z, mu, S, tmp2)
prod = np.multiply(tmp1, tmp2)
crossterms += prod[:, :, None] + prod[:, None, :]
target += crossterms
return target
# target += crossterms
return target + crossterms
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S):
"""Gradient of the psi2 statistics with respect to the parameters."""

View file

@ -9,42 +9,45 @@ from scipy import weave
class Coregionalise(Kernpart):
"""
Coregionalisation kernel.
Kernel for intrinsic/linear coregionalization models
Used for computing covariance functions of the form
This kernel has the form
.. math::
k_2(x, y)=B k(x, y)
where
.. math::
B = WW^\top + diag(kappa)
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}
:param output_dim: the number of output dimensions
:type output_dim: int
:param rank: the rank of the coregionalisation matrix.
:type rank: int
:param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B.
:type W: ndarray
:param kappa: a diagonal term which allows the outputs to behave independently.
:rtype: kernel object
An intrinsic/linear coregionalization kernel of the form
.. math::
k_2(x, y)=\mathbf{B} k(x, y)
it is obtainded as the tensor product between a kernel k(x,y) and B.
:param num_outputs: number of outputs to coregionalize
:type num_outputs: int
:param W_columns: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
:type W_colunns: int
:param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B
:type W: numpy array of dimensionality (num_outpus, W_columns)
:param kappa: a vector which allows the outputs to behave independently
:type kappa: numpy array of dimensionality (num_outputs,)
.. Note: see coregionalisation examples in GPy.examples.regression for some usage.
"""
def __init__(self,output_dim,rank=1, W=None, kappa=None):
def __init__(self,num_outputs,W_columns=1, W=None, kappa=None):
self.input_dim = 1
self.name = 'coregion'
self.output_dim = output_dim
self.rank = rank
self.num_outputs = num_outputs
self.W_columns = W_columns
if W is None:
self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank)
self.W = 0.5*np.random.randn(self.num_outputs,self.W_columns)/np.sqrt(self.W_columns)
else:
assert W.shape==(self.output_dim,self.rank)
assert W.shape==(self.num_outputs,self.W_columns)
self.W = W
if kappa is None:
kappa = 0.5*np.ones(self.output_dim)
kappa = 0.5*np.ones(self.num_outputs)
else:
assert kappa.shape==(self.output_dim,)
assert kappa.shape==(self.num_outputs,)
self.kappa = kappa
self.num_params = self.output_dim*(self.rank + 1)
self.num_params = self.num_outputs*(self.W_columns + 1)
self._set_params(np.hstack([self.W.flatten(),self.kappa]))
def _get_params(self):
@ -52,12 +55,12 @@ class Coregionalise(Kernpart):
def _set_params(self,x):
assert x.size == self.num_params
self.kappa = x[-self.output_dim:]
self.W = x[:-self.output_dim].reshape(self.output_dim,self.rank)
self.kappa = x[-self.num_outputs:]
self.W = x[:-self.num_outputs].reshape(self.num_outputs,self.W_columns)
self.B = np.dot(self.W,self.W.T) + np.diag(self.kappa)
def _get_param_names(self):
return sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.output_dim)],[]) + ['kappa_%i'%i for i in range(self.output_dim)]
return sum([['W%i_%i'%(i,j) for j in range(self.W_columns)] for i in range(self.num_outputs)],[]) + ['kappa_%i'%i for i in range(self.num_outputs)]
def K(self,index,index2,target):
index = np.asarray(index,dtype=np.int)
@ -75,26 +78,26 @@ class Coregionalise(Kernpart):
if index2 is None:
code="""
for(int i=0;i<N; i++){
target[i+i*N] += B[index[i]+output_dim*index[i]];
target[i+i*N] += B[index[i]+num_outputs*index[i]];
for(int j=0; j<i; j++){
target[j+i*N] += B[index[i]+output_dim*index[j]];
target[j+i*N] += B[index[i]+num_outputs*index[j]];
target[i+j*N] += target[j+i*N];
}
}
"""
N,B,output_dim = index.size, self.B, self.output_dim
weave.inline(code,['target','index','N','B','output_dim'])
N,B,num_outputs = index.size, self.B, self.num_outputs
weave.inline(code,['target','index','N','B','num_outputs'])
else:
index2 = np.asarray(index2,dtype=np.int)
code="""
for(int i=0;i<num_inducing; i++){
for(int j=0; j<N; j++){
target[i+j*num_inducing] += B[output_dim*index[j]+index2[i]];
target[i+j*num_inducing] += B[num_outputs*index[j]+index2[i]];
}
}
"""
N,num_inducing,B,output_dim = index.size,index2.size, self.B, self.output_dim
weave.inline(code,['target','index','index2','N','num_inducing','B','output_dim'])
N,num_inducing,B,num_outputs = index.size,index2.size, self.B, self.num_outputs
weave.inline(code,['target','index','index2','N','num_inducing','B','num_outputs'])
def Kdiag(self,index,target):
@ -111,12 +114,12 @@ class Coregionalise(Kernpart):
code="""
for(int i=0; i<num_inducing; i++){
for(int j=0; j<N; j++){
dL_dK_small[index[j] + output_dim*index2[i]] += dL_dK[i+j*num_inducing];
dL_dK_small[index[j] + num_outputs*index2[i]] += dL_dK[i+j*num_inducing];
}
}
"""
N, num_inducing, output_dim = index.size, index2.size, self.output_dim
weave.inline(code, ['N','num_inducing','output_dim','dL_dK','dL_dK_small','index','index2'])
N, num_inducing, num_outputs = index.size, index2.size, self.num_outputs
weave.inline(code, ['N','num_inducing','num_outputs','dL_dK','dL_dK_small','index','index2'])
dkappa = np.diag(dL_dK_small)
dL_dK_small += dL_dK_small.T
@ -133,8 +136,8 @@ class Coregionalise(Kernpart):
ii,jj = ii.T, jj.T
dL_dK_small = np.zeros_like(self.B)
for i in range(self.output_dim):
for j in range(self.output_dim):
for i in range(self.num_outputs):
for j in range(self.num_outputs):
tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
dL_dK_small[i,j] = tmp
@ -146,8 +149,8 @@ class Coregionalise(Kernpart):
def dKdiag_dtheta(self,dL_dKdiag,index,target):
index = np.asarray(index,dtype=np.int).flatten()
dL_dKdiag_small = np.zeros(self.output_dim)
for i in range(self.output_dim):
dL_dKdiag_small = np.zeros(self.num_outputs)
for i in range(self.num_outputs):
dL_dKdiag_small[i] += np.sum(dL_dKdiag[index==i])
dW = 2.*self.W*dL_dKdiag_small[:,None]
dkappa = dL_dKdiag_small
@ -155,6 +158,3 @@ class Coregionalise(Kernpart):
def dK_dX(self,dL_dK,X,X2,target):
pass

View file

@ -18,7 +18,7 @@ class Prod(Kernpart):
"""
def __init__(self,k1,k2,tensor=False):
self.num_params = k1.num_params + k2.num_params
self.name = k1.name + '<times>' + k2.name
self.name = '['+k1.name + '(x)' + k2.name +']'
self.k1 = k1
self.k2 = k2
if tensor:

View file

@ -242,13 +242,14 @@ class RBF(Kernpart):
def _psi_computations(self, Z, mu, S):
# here are the "statistics" for psi1 and psi2
if not fast_array_equal(Z, self._Z):
Z_changed = not fast_array_equal(Z, self._Z)
if Z_changed:
# Z has changed, compute Z specific stuff
self._psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
self._psi2_Zdist_sq = np.square(self._psi2_Zdist / self.lengthscale) # M,M,Q
if not fast_array_equal(Z, self._Z) or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S):
if Z_changed or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S):
# something's changed. recompute EVERYTHING
# psi1

View file

@ -1,4 +1,7 @@
from ep import EP
from ep_mixed_noise import EP_Mixed_Noise
from gaussian import Gaussian
from gaussian_mixed_noise import Gaussian_Mixed_Noise
from noise_model_constructors import *
# TODO: from Laplace import Laplace
import likelihood_functions as functions

View file

@ -4,23 +4,23 @@ from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
from likelihood import likelihood
class EP(likelihood):
def __init__(self,data,LikelihoodFunction,epsilon=1e-3,power_ep=[1.,1.]):
def __init__(self,data,noise_model,epsilon=1e-3,power_ep=[1.,1.]):
"""
Expectation Propagation
Arguments
---------
epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
LikelihoodFunction : a likelihood function (see likelihood_functions.py)
noise_model : a likelihood function (see likelihood_functions.py)
"""
self.LikelihoodFunction = LikelihoodFunction
self.noise_model = noise_model
self.epsilon = epsilon
self.eta, self.delta = power_ep
self.data = data
self.N, self.output_dim = self.data.shape
self.is_heteroscedastic = True
self.Nparams = 0
self._transf_data = self.LikelihoodFunction._preprocess_values(data)
self._transf_data = self.noise_model._preprocess_values(data)
#Initial values - Likelihood approximation parameters:
#p(y|f) = t(f|tau_tilde,v_tilde)
@ -52,16 +52,23 @@ class EP(likelihood):
def predictive_values(self,mu,var,full_cov):
if full_cov:
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
return self.LikelihoodFunction.predictive_values(mu,var)
return self.noise_model.predictive_values(mu,var)
def _get_params(self):
return np.zeros(0)
#return np.zeros(0)
return self.noise_model._get_params()
def _get_param_names(self):
return []
#return []
return self.noise_model._get_param_names()
def _set_params(self,p):
pass # TODO: the EP likelihood might want to take some parameters...
#pass # TODO: the EP likelihood might want to take some parameters...
self.noise_model._set_params(p)
def _gradients(self,partial):
return np.zeros(0) # TODO: the EP likelihood might want to take some parameters...
#return np.zeros(0) # TODO: the EP likelihood might want to take some parameters...
return self.noise_model._gradients(partial)
def _compute_GP_variables(self):
#Variables to be called from GP
@ -116,7 +123,7 @@ class EP(likelihood):
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
@ -206,7 +213,7 @@ class EP(likelihood):
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
@ -301,7 +308,7 @@ class EP(likelihood):
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])

View file

@ -0,0 +1,385 @@
# Copyright (c) 2013, Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
from likelihood import likelihood
class EP_Mixed_Noise(likelihood):
def __init__(self,data_list,noise_model_list,epsilon=1e-3,power_ep=[1.,1.]):
"""
Expectation Propagation
Arguments
---------
:param data_list: list of outputs
:param noise_model_list: a list of noise models
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations
:type epsilon: float
:param power_ep: list of power ep parameters
"""
assert len(data_list) == len(noise_model_list)
self.noise_model_list = noise_model_list
n_list = [data.size for data in data_list]
self.n_models = len(data_list)
self.n_params = [noise_model._get_params().size for noise_model in noise_model_list]
self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.n_models),n_list)])
self.epsilon = epsilon
self.eta, self.delta = power_ep
self.data = np.vstack(data_list)
self.N, self.output_dim = self.data.shape
self.is_heteroscedastic = True
self.Nparams = 0#FIXME
self._transf_data = np.vstack([noise_model._preprocess_values(data) for noise_model,data in zip(noise_model_list,data_list)])
#TODO non-gaussian index
#Initial values - Likelihood approximation parameters:
#p(y|f) = t(f|tau_tilde,v_tilde)
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
#initial values for the GP variables
self.Y = np.zeros((self.N,1))
self.covariance_matrix = np.eye(self.N)
self.precision = np.ones(self.N)[:,None]
self.Z = 0
self.YYT = None
self.V = self.precision * self.Y
self.VVT_factor = self.V
self.trYYT = 0.
def restart(self):
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
self.Y = np.zeros((self.N,1))
self.covariance_matrix = np.eye(self.N)
self.precision = np.ones(self.N)[:,None]
self.Z = 0
self.YYT = None
self.V = self.precision * self.Y
self.VVT_factor = self.V
self.trYYT = 0.
def predictive_values(self,mu,var,full_cov,noise_model):
"""
Predicts the output given the GP
:param mu: GP's mean
:param var: GP's variance
:param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: False|True
:param noise_model: noise model to use
:type noise_model: integer
"""
if full_cov:
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
#_mu = []
#_var = []
#_q1 = []
#_q2 = []
#for m,v,o in zip(mu,var,output.flatten()):
# a,b,c,d = self.noise_model_list[int(o)].predictive_values(m,v)
# _mu.append(a)
# _var.append(b)
# _q1.append(c)
# _q2.append(d)
#return np.vstack(_mu),np.vstack(_var),np.vstack(_q1),np.vstack(_q2)
return self.noise_model_list[noise_model].predictive_values(mu,var)
def _get_params(self):
return np.hstack([noise_model._get_params().flatten() for noise_model in self.noise_model_list])
def _get_param_names(self):
names = []
for noise_model in self.noise_model_list:
names += noise_model._get_param_names()
return names
def _set_params(self,p):
cs_params = np.cumsum([0]+self.n_params)
for i in range(len(self.n_params)):
self.noise_model_list[i]._set_params(p[cs_params[i]:cs_params[i+1]])
def _gradients(self,partial):
#NOTE this is not tested
return np.hstack([noise_model._gradients(partial) for noise_model in self.noise_model_list])
def _compute_GP_variables(self):
#Variables to be called from GP
mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model
sigma_sum = 1./self.tau_ + 1./self.tau_tilde
mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
self.Y = mu_tilde[:,None]
self.YYT = np.dot(self.Y,self.Y.T)
self.covariance_matrix = np.diag(1./self.tau_tilde)
self.precision = self.tau_tilde[:,None]
self.V = self.precision * self.Y
self.VVT_factor = self.V
self.trYYT = np.trace(self.YYT)
def fit_full(self,K):
"""
The expectation-propagation algorithm.
For nomenclature see Rasmussen & Williams 2006.
"""
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(self.N)
Sigma = K.copy()
"""
Initial values - Cavity distribution parameters:
q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
#Approximation
epsilon_np1 = self.epsilon + 1.
epsilon_np2 = self.epsilon + 1.
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i])))
mu = np.dot(Sigma,self.v_tilde)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
L = jitchol(B)
V,info = dtrtrs(L,Sroot_tilde_K,lower=1)
Sigma = K - np.dot(V.T,V)
mu = np.dot(Sigma,self.v_tilde)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())
return self._compute_GP_variables()
def fit_DTC(self, Kmm, Kmn):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see ... 2013.
"""
num_inducing = Kmm.shape[0]
#TODO: this doesn't work with uncertain inputs!
"""
Prior approximation parameters:
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = Qnn = Knm*Kmmi*Kmn
"""
KmnKnm = np.dot(Kmn,Kmn.T)
Lm = jitchol(Kmm)
Lmi = chol_inv(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
KmmiKmn = np.dot(Kmmi,Kmn)
Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
LLT0 = Kmm.copy()
#Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
#KmnKnm = np.dot(Kmn, Kmn.T)
#KmmiKmn = np.dot(Kmmi,Kmn)
#Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
#LLT0 = Kmm.copy()
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*Gamma
"""
mu = np.zeros(self.N)
LLT = Kmm.copy()
Sigma_diag = Qnn_diag.copy()
"""
Initial values - Cavity distribution parameters:
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
#Approximation
epsilon_np1 = 1
epsilon_np2 = 1
self.iterations = 0
np1 = [self.tau_tilde.copy()]
np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
L = jitchol(LLT)
#cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau))
V,info = dtrtrs(L,Kmn,lower=1)
Sigma_diag = np.sum(V*V,-2)
si = np.sum(V.T*V[:,i],-1)
mu += (Delta_v-Delta_tau*mu[i])*si
self.iterations += 1
#Sigma recomputation with Cholesky decompositon
LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
L = jitchol(LLT)
V,info = dtrtrs(L,Kmn,lower=1)
V2,info = dtrtrs(L.T,V,lower=0)
Sigma_diag = np.sum(V*V,-2)
Knmv_tilde = np.dot(Kmn,self.v_tilde)
mu = np.dot(V2.T,Knmv_tilde)
epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.N
np1.append(self.tau_tilde.copy())
np2.append(self.v_tilde.copy())
self._compute_GP_variables()
def fit_FITC(self, Kmm, Kmn, Knn_diag):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see Naish-Guzman and Holden, 2008.
"""
num_inducing = Kmm.shape[0]
"""
Prior approximation parameters:
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
"""
Lm = jitchol(Kmm)
Lmi = chol_inv(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
P0 = Kmn.T
KmnKnm = np.dot(P0.T, P0)
KmmiKmn = np.dot(Kmmi,P0.T)
Qnn_diag = np.sum(P0.T*KmmiKmn,-2)
Diag0 = Knn_diag - Qnn_diag
R0 = jitchol(Kmmi).T
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*Gamma
"""
self.w = np.zeros(self.N)
self.Gamma = np.zeros(num_inducing)
mu = np.zeros(self.N)
P = P0.copy()
R = R0.copy()
Diag = Diag0.copy()
Sigma_diag = Knn_diag
RPT0 = np.dot(R0,P0.T)
"""
Initial values - Cavity distribution parameters:
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
#Approximation
epsilon_np1 = 1
epsilon_np2 = 1
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
dtd1 = Delta_tau*Diag[i] + 1.
dii = Diag[i]
Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
pi_ = P[i,:].reshape(1,num_inducing)
P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
Rp_i = np.dot(R,pi_.T)
RTR = np.dot(R.T,np.dot(np.eye(num_inducing) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
R = jitchol(RTR).T
self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1
self.Gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
mu = self.w + np.dot(P,self.Gamma)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde)
Diag = Diag0 * Iplus_Dprod_i
P = Iplus_Dprod_i[:,None] * P0
safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0)
L = jitchol(np.eye(num_inducing) + np.dot(RPT0,safe_diag[:,None]*RPT0.T))
R,info = dtrtrs(L,R0,lower=1)
RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
self.w = Diag * self.v_tilde
self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
mu = self.w + np.dot(P,self.Gamma)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())
return self._compute_GP_variables()

View file

@ -7,9 +7,9 @@ class Gaussian(likelihood):
"""
Likelihood class for doing Expectation propagation
:param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the likelihood_function used
:param variance :
:param data: observed output
:type data: Nx1 numpy.darray
:param variance: noise parameter
:param normalize: whether to normalize the data before computing (predictions will be in original scales)
:type normalize: False|True
"""

View file

@ -0,0 +1,106 @@
# Copyright (c) 2013, Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
from likelihood import likelihood
from . import Gaussian
class Gaussian_Mixed_Noise(likelihood):
"""
Gaussian Likelihood for multiple outputs
This is a wrapper around likelihood.Gaussian class
:param data_list: data observations
:type data_list: list of numpy arrays (num_data_output_i x 1), one array per output
:param noise_params: noise parameters of each output
:type noise_params: list of floats, one per output
:param normalize: whether to normalize the data before computing (predictions will be in original scales)
:type normalize: False|True
"""
def __init__(self, data_list, noise_params=None, normalize=True):
self.Nparams = len(data_list)
self.n_list = [data.size for data in data_list]
self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.Nparams),self.n_list)])
if noise_params is None:
noise_params = [1.] * self.Nparams
else:
assert self.Nparams == len(noise_params), 'Number of noise parameters does not match the number of noise models.'
self.noise_model_list = [Gaussian(Y,variance=v,normalize = normalize) for Y,v in zip(data_list,noise_params)]
self.n_params = [noise_model._get_params().size for noise_model in self.noise_model_list]
self.data = np.vstack(data_list)
self.N, self.output_dim = self.data.shape
self._offset = np.zeros((1, self.output_dim))
self._scale = np.ones((1, self.output_dim))
self.is_heteroscedastic = True
self.Z = 0. # a correction factor which accounts for the approximation made
self.set_data(data_list)
self._set_params(np.asarray(noise_params))
def set_data(self, data_list):
self.data = np.vstack(data_list)
self.N, D = self.data.shape
assert D == self.output_dim
self.Y = (self.data - self._offset) / self._scale
if D > self.N:
raise NotImplementedError
#self.YYT = np.dot(self.Y, self.Y.T)
#self.trYYT = np.trace(self.YYT)
#self.YYT_factor = jitchol(self.YYT)
else:
self.YYT = None
self.trYYT = np.sum(np.square(self.Y))
self.YYT_factor = self.Y
def predictive_values(self,mu,var,full_cov,noise_model):
"""
Predicts the output given the GP
:param mu: GP's mean
:param var: GP's variance
:param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: False|True
:param noise_model: noise model to use
:type noise_model: integer
"""
if full_cov:
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
return self.noise_model_list[noise_model].predictive_values(mu,var,full_cov)
def _get_params(self):
return np.hstack([noise_model._get_params().flatten() for noise_model in self.noise_model_list])
def _get_param_names(self):
if len(self.noise_model_list) == 1:
names = self.noise_model_list[0]._get_param_names()
else:
names = []
for noise_model,i in zip(self.noise_model_list,range(len(self.n_list))):
names.append(''.join(noise_model._get_param_names() + ['_%s' %i]))
return names
def _set_params(self,p):
cs_params = np.cumsum([0]+self.n_params)
for i in range(len(self.n_params)):
self.noise_model_list[i]._set_params(p[cs_params[i]:cs_params[i+1]])
self.precision = np.hstack([np.repeat(noise_model.precision,n) for noise_model,n in zip(self.noise_model_list,self.n_list)])[:,None]
self.V = self.precision * self.Y
self.VVT_factor = self.precision * self.YYT_factor
self.covariance_matrix = np.eye(self.N) * 1./self.precision
def _gradients(self,partial):
gradients = []
aux = np.cumsum([0]+self.n_list)
for ai,af,noise_model in zip(aux[:-1],aux[1:],self.noise_model_list):
gradients += [noise_model._gradients(partial[ai:af])]
return np.hstack(gradients)

View file

@ -1,166 +0,0 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats
import scipy as sp
import pylab as pb
from ..util.plot import gpplot
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import link_functions
class LikelihoodFunction(object):
"""
Likelihood class for doing Expectation propagation
:param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the LikelihoodFunction used
"""
def __init__(self,link):
if link == self._analytical:
self.moments_match = self._moments_match_analytical
else:
assert isinstance(link,link_functions.LinkFunction)
self.link = link
self.moments_match = self._moments_match_numerical
def _preprocess_values(self,Y):
return Y
def _product(self,gp,obs,mu,sigma):
return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._distribution(gp,obs)
def _nlog_product(self,gp,obs,mu,sigma):
return -(-.5*(gp-mu)**2/sigma**2 + self._log_distribution(gp,obs))
def _locate(self,obs,mu,sigma):
"""
Golden Search to find the mode in the _product function (cavity x exact likelihood) and define a grid around it for numerical integration
"""
golden_A = -1 if obs == 0 else np.array([np.log(obs),mu]).min() #Lower limit
golden_B = np.array([np.log(obs),mu]).max() #Upper limit
return sp.optimize.golden(self._nlog_product, args=(obs,mu,sigma), brack=(golden_A,golden_B)) #Better to work with _nlog_product than with _product
def _moments_match_numerical(self,obs,tau,v):
"""
Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
"""
mu = v/tau
sigma = np.sqrt(1./tau)
opt = self._locate(obs,mu,sigma)
width = 3./np.log(max(obs,2))
A = opt - width #Grid's lower limit
B = opt + width #Grid's Upper limit
K = 10*int(np.log(max(obs,150))) #Number of points in the grid
h = (B-A)/K # length of the intervals
grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
_aux1 = self._product(A,obs,mu,sigma)
_aux2 = self._product(B,obs,mu,sigma)
_aux3 = 4*self._product(grid_x[range(1,K,2)],obs,mu,sigma)
_aux4 = 2*self._product(grid_x[range(2,K-1,2)],obs,mu,sigma)
zeroth = np.hstack((_aux1,_aux2,_aux3,_aux4)) # grid of points (Y axis) rearranged
first = zeroth*x
second = first*x
Z_hat = sum(zeroth)*h/3 # Zero-th moment
mu_hat = sum(first)*h/(3*Z_hat) # First moment
m2 = sum(second)*h/(3*Z_hat) # Second moment
sigma2_hat = m2 - mu_hat**2 # Second central moment
return float(Z_hat), float(mu_hat), float(sigma2_hat)
class Binomial(LikelihoodFunction):
"""
Probit likelihood
Y is expected to take values in {-1,1}
-----
$$
L(x) = \\Phi (Y_i*f_i)
$$
"""
def __init__(self,link=None):
self._analytical = link_functions.Probit
if not link:
link = self._analytical
super(Binomial, self).__init__(link)
def _distribution(self,gp,obs):
pass
def _log_distribution(self,gp,obs):
pass
def _preprocess_values(self,Y):
"""
Check if the values of the observations correspond to the values
assumed by the likelihood function.
..Note:: Binary classification algorithm works better with classes {-1,1}
"""
Y_prep = Y.copy()
Y1 = Y[Y.flatten()==1].size
Y2 = Y[Y.flatten()==0].size
assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.'
Y_prep[Y.flatten() == 0] = -1
return Y_prep
def _moments_match_analytical(self,data_i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = std_norm_cdf(z)
phi = std_norm_pdf(z)
mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
return Z_hat, mu_hat, sigma2_hat
def predictive_values(self,mu,var):
"""
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction
:param mu: mean of the latent variable
:param var: variance of the latent variable
"""
mu = mu.flatten()
var = var.flatten()
mean = stats.norm.cdf(mu/np.sqrt(1+var))
norm_025 = [stats.norm.ppf(.025,m,v) for m,v in zip(mu,var)]
norm_975 = [stats.norm.ppf(.975,m,v) for m,v in zip(mu,var)]
p_025 = stats.norm.cdf(norm_025/np.sqrt(1+var))
p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var))
return mean[:,None], np.nan*var, p_025[:,None], p_975[:,None] # TODO: var
class Poisson(LikelihoodFunction):
"""
Poisson likelihood
Y is expected to take values in {0,1,2,...}
-----
$$
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
$$
"""
def __init__(self,link=None):
self._analytical = None
if not link:
link = link_functions.Log()
super(Poisson, self).__init__(link)
def _distribution(self,gp,obs):
return stats.poisson.pmf(obs,self.link.inv_transf(gp))
def _log_distribution(self,gp,obs):
return - self.link.inv_transf(gp) + obs * self.link.log_inv_transf(gp)
def predictive_values(self,mu,var):
"""
Compute mean, and conficence interval (percentiles 5 and 95) of the prediction
"""
mean = self.link.transf(mu)#np.exp(mu*self.scale + self.location)
tmp = stats.poisson.ppf(np.array([.025,.975]),mean)
p_025 = tmp[:,0]
p_975 = tmp[:,1]
return mean,np.nan*mean,p_025,p_975 # better variance here TODO

View file

@ -1,33 +0,0 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats
import scipy as sp
import pylab as pb
from ..util.plot import gpplot
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
class LinkFunction(object):
"""
Link function class for doing non-Gaussian likelihoods approximation
:param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the likelihood_function used
"""
def __init__(self):
pass
class Probit(LinkFunction):
"""
Probit link function: Squashes a likelihood between 0 and 1
"""
def transf(self,mu):
pass
def inv_transf(self,f):
pass
def log_inv_transf(self,f):
pass

View file

@ -0,0 +1,88 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import noise_models
def binomial(gp_link=None):
"""
Construct a binomial likelihood
:param gp_link: a GPy gp_link function
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Probit()
#else:
# assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.'
if isinstance(gp_link,noise_models.gp_transformations.Probit):
analytical_mean = True
analytical_variance = False
elif isinstance(gp_link,noise_models.gp_transformations.Step):
analytical_mean = True
analytical_variance = True
else:
analytical_mean = False
analytical_variance = False
return noise_models.binomial_noise.Binomial(gp_link,analytical_mean,analytical_variance)
def exponential(gp_link=None):
"""
Construct a binomial likelihood
:param gp_link: a GPy gp_link function
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Identity()
analytical_mean = False
analytical_variance = False
return noise_models.exponential_noise.Exponential(gp_link,analytical_mean,analytical_variance)
def gaussian(gp_link=None,variance=1.):
"""
Construct a gaussian likelihood
:param gp_link: a GPy gp_link function
:param variance: scalar
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Identity()
#else:
# assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.'
analytical_mean = False
analytical_variance = False
return noise_models.gaussian_noise.Gaussian(gp_link,analytical_mean,analytical_variance,variance)
def poisson(gp_link=None):
"""
Construct a Poisson likelihood
:param gp_link: a GPy gp_link function
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Log_ex_1()
#else:
# assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.'
analytical_mean = False
analytical_variance = False
return noise_models.poisson_noise.Poisson(gp_link,analytical_mean,analytical_variance)
def gamma(gp_link=None,beta=1.):
"""
Construct a Gamma likelihood
:param gp_link: a GPy gp_link function
:param beta: scalar
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Log_ex_1()
analytical_mean = False
analytical_variance = False
return noise_models.gamma_noise.Gamma(gp_link,analytical_mean,analytical_variance,beta)

View file

@ -0,0 +1,7 @@
import noise_distributions
import binomial_noise
import exponential_noise
import gaussian_noise
import gamma_noise
import poisson_noise
import gp_transformations

View file

@ -0,0 +1,118 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from noise_distributions import NoiseDistribution
class Binomial(NoiseDistribution):
"""
Probit likelihood
Y is expected to take values in {-1,1}
-----
$$
L(x) = \\Phi (Y_i*f_i)
$$
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
super(Binomial, self).__init__(gp_link,analytical_mean,analytical_variance)
def _preprocess_values(self,Y):
"""
Check if the values of the observations correspond to the values
assumed by the likelihood function.
..Note:: Binary classification algorithm works better with classes {-1,1}
"""
Y_prep = Y.copy()
Y1 = Y[Y.flatten()==1].size
Y2 = Y[Y.flatten()==0].size
assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.'
Y_prep[Y.flatten() == 0] = -1
return Y_prep
def _moments_match_analytical(self,data_i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
if isinstance(self.gp_link,gp_transformations.Probit):
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = std_norm_cdf(z)
phi = std_norm_pdf(z)
mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
elif isinstance(self.gp_link,gp_transformations.Step):
Z_hat = None
mu_hat = None
sigma2_hat = None
return Z_hat, mu_hat, sigma2_hat
def _predictive_mean_analytical(self,mu,sigma):
return stats.norm.cdf(mu/np.sqrt(1+sigma**2))
def _mass(self,gp,obs):
#NOTE obs must be in {0,1}
p = self.gp_link.transf(gp)
return p**obs * (1.-p)**(1.-obs)
def _nlog_mass(self,gp,obs):
p = self.gp_link.transf(gp)
return obs*np.log(p) + (1.-obs)*np.log(1-p)
def _dnlog_mass_dgp(self,gp,obs):
p = self.gp_link.transf(gp)
dp = self.gp_link.dtransf_df(gp)
return obs/p * dp - (1.-obs)/(1.-p) * dp
def _d2nlog_mass_dgp2(self,gp,obs):
p = self.gp_link.transf(gp)
return (obs/p + (1.-obs)/(1.-p))*self.gp_link.d2transf_df2(gp) + ((1.-obs)/(1.-p)**2-obs/p**2)*self.gp_link.dtransf_df(gp)
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _dmean_dgp(self,gp):
return self.gp_link.dtransf_df(gp)
def _d2mean_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
p = self.gp_link.transf(gp)
return p*(1.-p)
def _dvariance_dgp(self,gp):
return self.gp_link.dtransf_df(gp)*(1. - 2.*self.gp_link.transf(gp))
def _d2variance_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)*(1. - 2.*self.gp_link.transf(gp)) - 2*self.gp_link.dtransf_df(gp)**2
"""
def predictive_values(self,mu,var): #TODO remove
mu = mu.flatten()
var = var.flatten()
#mean = stats.norm.cdf(mu/np.sqrt(1+var))
mean = self._predictive_mean_analytical(mu,np.sqrt(var))
norm_025 = [stats.norm.ppf(.025,m,v) for m,v in zip(mu,var)]
norm_975 = [stats.norm.ppf(.975,m,v) for m,v in zip(mu,var)]
#p_025 = stats.norm.cdf(norm_025/np.sqrt(1+var))
#p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var))
p_025 = self._predictive_mean_analytical(norm_025,np.sqrt(var))
p_975 = self._predictive_mean_analytical(norm_975,np.sqrt(var))
return mean[:,None], np.nan*var, p_025[:,None], p_975[:,None] # TODO: var
"""

View file

@ -0,0 +1,68 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from noise_distributions import NoiseDistribution
class Exponential(NoiseDistribution):
"""
Gamma likelihood
Y is expected to take values in {0,1,2,...}
-----
$$
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
$$
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
super(Exponential, self).__init__(gp_link,analytical_mean,analytical_variance)
def _preprocess_values(self,Y):
return Y
def _mass(self,gp,obs):
"""
Mass (or density) function
"""
return np.exp(-obs/self.gp_link.transf(gp))/self.gp_link.transf(gp)
def _nlog_mass(self,gp,obs):
"""
Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
"""
return obs/self.gp_link.transf(gp) + np.log(self.gp_link.transf(gp))
def _dnlog_mass_dgp(self,gp,obs):
return ( 1./self.gp_link.transf(gp) - obs/self.gp_link.transf(gp)**2) * self.gp_link.dtransf_df(gp)
def _d2nlog_mass_dgp2(self,gp,obs):
fgp = self.gp_link.transf(gp)
return (2*obs/fgp**3 - 1./fgp**2) * self.gp_link.dtransf_df(gp)**2 + ( 1./fgp - obs/fgp**2) * self.gp_link.d2transf_df2(gp)
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _dmean_dgp(self,gp):
return self.gp_link.dtransf_df(gp)
def _d2mean_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)**2
def _dvariance_dgp(self,gp):
return 2*self.gp_link.transf(gp)*self.gp_link.dtransf_df(gp)
def _d2variance_dgp2(self,gp):
return 2 * (self.gp_link.dtransf_df(gp)**2 + self.gp_link.transf(gp)*self.gp_link.d2transf_df2(gp))

View file

@ -0,0 +1,71 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from noise_distributions import NoiseDistribution
class Gamma(NoiseDistribution):
"""
Gamma likelihood
Y is expected to take values in {0,1,2,...}
-----
$$
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
$$
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.):
self.beta = beta
super(Gamma, self).__init__(gp_link,analytical_mean,analytical_variance)
def _preprocess_values(self,Y):
return Y
def _mass(self,gp,obs):
"""
Mass (or density) function
"""
#return stats.gamma.pdf(obs,a = self.gp_link.transf(gp)/self.variance,scale=self.variance)
alpha = self.gp_link.transf(gp)*self.beta
return obs**(alpha - 1.) * np.exp(-self.beta*obs) * self.beta**alpha / special.gamma(alpha)
def _nlog_mass(self,gp,obs):
"""
Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
"""
alpha = self.gp_link.transf(gp)*self.beta
return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha))
def _dnlog_mass_dgp(self,gp,obs):
return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
def _d2nlog_mass_dgp2(self,gp,obs):
return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _dmean_dgp(self,gp):
return self.gp_link.dtransf_df(gp)
def _d2mean_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)/self.beta
def _dvariance_dgp(self,gp):
return self.gp_link.dtransf_df(gp)/self.beta
def _d2variance_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)/self.beta

View file

@ -0,0 +1,98 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from noise_distributions import NoiseDistribution
class Gaussian(NoiseDistribution):
"""
Gaussian likelihood
:param mean: mean value of the Gaussian distribution
:param variance: mean value of the Gaussian distribution
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,variance=1.):
self.variance = variance
super(Gaussian, self).__init__(gp_link,analytical_mean,analytical_variance)
def _get_params(self):
return np.array([self.variance])
def _get_param_names(self):
return ['noise_model_variance']
def _set_params(self,p):
self.variance = p
def _gradients(self,partial):
return np.zeros(1)
#return np.sum(partial)
def _preprocess_values(self,Y):
"""
Check if the values of the observations correspond to the values
assumed by the likelihood function.
"""
return Y
def _moments_match_analytical(self,data_i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
sigma2_hat = 1./(1./self.variance + tau_i)
mu_hat = sigma2_hat*(data_i/self.variance + v_i)
sum_var = self.variance + 1./tau_i
Z_hat = 1./np.sqrt(2.*np.pi*sum_var)*np.exp(-.5*(data_i - v_i/tau_i)**2./sum_var)
return Z_hat, mu_hat, sigma2_hat
def _predictive_mean_analytical(self,mu,sigma):
new_sigma2 = self.predictive_variance(mu,sigma)
return new_sigma2*(mu/sigma**2 + self.gp_link.transf(mu)/self.variance)
def _predictive_variance_analytical(self,mu,sigma,*args): #TODO *args?
return 1./(1./self.variance + 1./sigma**2)
def _mass(self,gp,obs):
#return std_norm_pdf( (self.gp_link.transf(gp)-obs)/np.sqrt(self.variance) )
return stats.norm.pdf(obs,self.gp_link.transf(gp),np.sqrt(self.variance)) #FIXME
def _nlog_mass(self,gp,obs):
return .5*((self.gp_link.transf(gp)-obs)**2/self.variance + np.log(2.*np.pi*self.variance))
def _dnlog_mass_dgp(self,gp,obs):
return (self.gp_link.transf(gp)-obs)/self.variance * self.gp_link.dtransf_df(gp)
def _d2nlog_mass_dgp2(self,gp,obs):
return ((self.gp_link.transf(gp)-obs)*self.gp_link.d2transf_df2(gp) + self.gp_link.dtransf_df(gp)**2)/self.variance
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _dmean_dgp(self,gp):
return self.gp_link.dtransf_df(gp)
def _d2mean_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
return self.variance
def _dvariance_dgp(self,gp):
return 0
def _d2variance_dgp2(self,gp):
return 0

View file

@ -0,0 +1,125 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats
import scipy as sp
import pylab as pb
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf
class GPTransformation(object):
"""
Link function class for doing non-Gaussian likelihoods approximation
:param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the likelihood_function used
"""
def __init__(self):
pass
class Identity(GPTransformation):
"""
$$
g(f) = f
$$
"""
#def transf(self,mu):
# return mu
def transf(self,f):
return f
def dtransf_df(self,f):
return 1.
def d2transf_df2(self,f):
return 0
class Probit(GPTransformation):
"""
$$
g(f) = \\Phi^{-1} (mu)
$$
"""
#def transf(self,mu):
# return inv_std_norm_cdf(mu)
def transf(self,f):
return std_norm_cdf(f)
def dtransf_df(self,f):
return std_norm_pdf(f)
def d2transf_df2(self,f):
return -f * std_norm_pdf(f)
class Log(GPTransformation):
"""
$$
g(f) = \log(\mu)
$$
"""
#def transf(self,mu):
# return np.log(mu)
def transf(self,f):
return np.exp(f)
def dtransf_df(self,f):
return np.exp(f)
def d2transf_df2(self,f):
return np.exp(f)
class Log_ex_1(GPTransformation):
"""
$$
g(f) = \log(\exp(\mu) - 1)
$$
"""
#def transf(self,mu):
# """
# function: output space -> latent space
# """
# return np.log(np.exp(mu) - 1)
def transf(self,f):
"""
function: latent space -> output space
"""
return np.log(1.+np.exp(f))
def dtransf_df(self,f):
return np.exp(f)/(1.+np.exp(f))
def d2transf_df2(self,f):
aux = np.exp(f)/(1.+np.exp(f))
return aux*(1.-aux)
class Reciprocal(GPTransformation):
def transf(sefl,f):
return 1./f
def dtransf_df(self,f):
return -1./f**2
def d2transf_df2(self,f):
return 2./f**3
class Step(GPTransformation):
"""
$$
g(f) = I_{x \in A}
$$
"""
def transf(self,f):
#transformation goes here
return np.where(f>0, 1, -1)
def dtransf_df(self,f):
pass
def d2transf_df2(self,f):
pass

View file

@ -0,0 +1,384 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
import pylab as pb
from GPy.util.plot import gpplot
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
class NoiseDistribution(object):
"""
Likelihood class for doing Expectation propagation
:param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the LikelihoodFunction used
"""
def __init__(self,gp_link,analytical_mean=False,analytical_variance=False):
#assert isinstance(gp_link,gp_transformations.GPTransformation), "gp_link is not a valid GPTransformation."#FIXME
self.gp_link = gp_link
self.analytical_mean = analytical_mean
self.analytical_variance = analytical_variance
if self.analytical_mean:
self.moments_match = self._moments_match_analytical
self.predictive_mean = self._predictive_mean_analytical
else:
self.moments_match = self._moments_match_numerical
self.predictive_mean = self._predictive_mean_numerical
if self.analytical_variance:
self.predictive_variance = self._predictive_variance_analytical
else:
self.predictive_variance = self._predictive_variance_numerical
def _get_params(self):
return np.zeros(0)
def _get_param_names(self):
return []
def _set_params(self,p):
pass
def _gradients(self,partial):
return np.zeros(0)
def _preprocess_values(self,Y):
"""
In case it is needed, this function assess the output values or makes any pertinent transformation on them.
:param Y: observed output (Nx1 numpy.darray)
"""
return Y
def _product(self,gp,obs,mu,sigma):
"""
Product between the cavity distribution and a likelihood factor.
:param gp: latent variable
:param obs: observed output
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._mass(gp,obs)
def _nlog_product_scaled(self,gp,obs,mu,sigma):
"""
Negative log-product between the cavity distribution and a likelihood factor.
..Note:: The constant term in the Gaussian distribution is ignored.
:param gp: latent variable
:param obs: observed output
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return .5*((gp-mu)/sigma)**2 + self._nlog_mass(gp,obs)
def _dnlog_product_dgp(self,gp,obs,mu,sigma):
"""
Derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor.
:param gp: latent variable
:param obs: observed output
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return (gp - mu)/sigma**2 + self._dnlog_mass_dgp(gp,obs)
def _d2nlog_product_dgp2(self,gp,obs,mu,sigma):
"""
Second derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor.
:param gp: latent variable
:param obs: observed output
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return 1./sigma**2 + self._d2nlog_mass_dgp2(gp,obs)
def _product_mode(self,obs,mu,sigma):
"""
Newton's CG method to find the mode in _product (cavity x likelihood factor).
:param obs: observed output
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return sp.optimize.fmin_ncg(self._nlog_product_scaled,x0=mu,fprime=self._dnlog_product_dgp,fhess=self._d2nlog_product_dgp2,args=(obs,mu,sigma))
def _moments_match_analytical(self,obs,tau,v):
"""
If available, this function computes the moments analytically.
"""
pass
def _moments_match_numerical(self,obs,tau,v):
"""
Lapace approximation to calculate the moments.
:param obs: observed output
:param tau: cavity distribution 1st natural parameter (precision)
:param v: cavity distribution 2nd natural paramenter (mu*precision)
"""
mu = v/tau
mu_hat = self._product_mode(obs,mu,np.sqrt(1./tau))
sigma2_hat = 1./(tau + self._d2nlog_mass_dgp2(mu_hat,obs))
Z_hat = np.exp(-.5*tau*(mu_hat-mu)**2) * self._mass(mu_hat,obs)*np.sqrt(tau*sigma2_hat)
return Z_hat,mu_hat,sigma2_hat
def _nlog_conditional_mean_scaled(self,gp,mu,sigma):
"""
Negative logarithm of the l.v.'s predictive distribution times the output's mean given the l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
..Note:: This function helps computing E(Y_star) = E(E(Y_star|f_star))
"""
return .5*((gp - mu)/sigma)**2 - np.log(self._mean(gp))
def _dnlog_conditional_mean_dgp(self,gp,mu,sigma):
"""
Derivative of _nlog_conditional_mean_scaled wrt. l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return (gp - mu)/sigma**2 - self._dmean_dgp(gp)/self._mean(gp)
def _d2nlog_conditional_mean_dgp2(self,gp,mu,sigma):
"""
Second derivative of _nlog_conditional_mean_scaled wrt. l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return 1./sigma**2 - self._d2mean_dgp2(gp)/self._mean(gp) + (self._dmean_dgp(gp)/self._mean(gp))**2
def _nlog_exp_conditional_variance_scaled(self,gp,mu,sigma):
"""
Negative logarithm of the l.v.'s predictive distribution times the output's variance given the l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
..Note:: This function helps computing E(V(Y_star|f_star))
"""
return .5*((gp - mu)/sigma)**2 - np.log(self._variance(gp))
def _dnlog_exp_conditional_variance_dgp(self,gp,mu,sigma):
"""
Derivative of _nlog_exp_conditional_variance_scaled wrt. l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return (gp - mu)/sigma**2 - self._dvariance_dgp(gp)/self._variance(gp)
def _d2nlog_exp_conditional_variance_dgp2(self,gp,mu,sigma):
"""
Second derivative of _nlog_exp_conditional_variance_scaled wrt. l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return 1./sigma**2 - self._d2variance_dgp2(gp)/self._variance(gp) + (self._dvariance_dgp(gp)/self._variance(gp))**2
def _nlog_exp_conditional_mean_sq_scaled(self,gp,mu,sigma):
"""
Negative logarithm of the l.v.'s predictive distribution times the output's mean squared given the l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
..Note:: This function helps computing E( E(Y_star|f_star)**2 )
"""
return .5*((gp - mu)/sigma)**2 - 2*np.log(self._mean(gp))
def _dnlog_exp_conditional_mean_sq_dgp(self,gp,mu,sigma):
"""
Derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return (gp - mu)/sigma**2 - 2*self._dmean_dgp(gp)/self._mean(gp)
def _d2nlog_exp_conditional_mean_sq_dgp2(self,gp,mu,sigma):
"""
Second derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return 1./sigma**2 - 2*( self._d2mean_dgp2(gp)/self._mean(gp) - (self._dmean_dgp(gp)/self._mean(gp))**2 )
def _predictive_mean_analytical(self,mu,sigma):
"""
If available, this function computes the predictive mean analytically.
"""
pass
def _predictive_variance_analytical(self,mu,sigma):
"""
If available, this function computes the predictive variance analytically.
"""
pass
def _predictive_mean_numerical(self,mu,sigma):
"""
Laplace approximation to the predictive mean: E(Y_star) = E( E(Y_star|f_star) )
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
maximum = sp.optimize.fmin_ncg(self._nlog_conditional_mean_scaled,x0=self._mean(mu),fprime=self._dnlog_conditional_mean_dgp,fhess=self._d2nlog_conditional_mean_dgp2,args=(mu,sigma))
mean = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma))*sigma)
"""
pb.figure()
x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)])
f = np.array([np.exp(-self._nlog_conditional_mean_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x])
pb.plot(x,f,'b-')
sigma2 = 1./self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma)
f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2)
k = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2)
pb.plot(x,f2*mean,'r-')
pb.vlines(maximum,0,f.max())
"""
return mean
def _predictive_mean_sq(self,mu,sigma):
"""
Laplace approximation to the predictive mean squared: E(Y_star**2) = E( E(Y_star|f_star)**2 )
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_mean_sq_scaled,x0=self._mean(mu),fprime=self._dnlog_exp_conditional_mean_sq_dgp,fhess=self._d2nlog_exp_conditional_mean_sq_dgp2,args=(mu,sigma))
mean_squared = np.exp(-self._nlog_exp_conditional_mean_sq_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_mean_sq_dgp2(maximum,mu,sigma))*sigma)
return mean_squared
def _predictive_variance_numerical(self,mu,sigma,predictive_mean=None):
"""
Laplace approximation to the predictive variance: V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
"""
# E( V(Y_star|f_star) )
maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_variance_scaled,x0=self._variance(mu),fprime=self._dnlog_exp_conditional_variance_dgp,fhess=self._d2nlog_exp_conditional_variance_dgp2,args=(mu,sigma))
exp_var = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma))*sigma)
"""
pb.figure()
x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)])
f = np.array([np.exp(-self._nlog_exp_conditional_variance_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x])
pb.plot(x,f,'b-')
sigma2 = 1./self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma)
f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2)
k = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2)
pb.plot(x,f2*exp_var,'r--')
pb.vlines(maximum,0,f.max())
"""
#V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star)**2 )
exp_exp2 = self._predictive_mean_sq(mu,sigma)
if predictive_mean is None:
predictive_mean = self.predictive_mean(mu,sigma)
var_exp = exp_exp2 - predictive_mean**2
return exp_var + var_exp
def _predictive_percentiles(self,p,mu,sigma):
"""
Percentiles of the predictive distribution
:parm p: lower tail probability
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
"""
qf = stats.norm.ppf(p,mu,sigma)
return self.gp_link.transf(qf)
def _nlog_joint_predictive_scaled(self,x,mu,sigma):
"""
Negative logarithm of the joint predictive distribution (latent variable and output).
:param x: tuple (latent variable,output)
:param mu: latent variable's predictive mean
:param sigma: latent variable's predictive standard deviation
"""
return self._nlog_product_scaled(x[0],x[1],mu,sigma)
def _gradient_nlog_joint_predictive(self,x,mu,sigma):
"""
Gradient of _nlog_joint_predictive_scaled.
:param x: tuple (latent variable,output)
:param mu: latent variable's predictive mean
:param sigma: latent variable's predictive standard deviation
..Note: Only avilable when the output is continuous
"""
assert not self.discrete, "Gradient not available for discrete outputs."
return np.array((self._dnlog_product_dgp(gp=x[0],obs=x[1],mu=mu,sigma=sigma),self._dnlog_mass_dobs(obs=x[1],gp=x[0])))
def _hessian_nlog_joint_predictive(self,x,mu,sigma):
"""
Hessian of _nlog_joint_predictive_scaled.
:param x: tuple (latent variable,output)
:param mu: latent variable's predictive mean
:param sigma: latent variable's predictive standard deviation
..Note: Only avilable when the output is continuous
"""
assert not self.discrete, "Hessian not available for discrete outputs."
cross_derivative = self._d2nlog_mass_dcross(gp=x[0],obs=x[1])
return np.array((self._d2nlog_product_dgp2(gp=x[0],obs=x[1],mu=mu,sigma=sigma),cross_derivative,cross_derivative,self._d2nlog_mass_dobs2(obs=x[1],gp=x[0]))).reshape(2,2)
def _joint_predictive_mode(self,mu,sigma):
"""
Negative logarithm of the joint predictive distribution (latent variable and output).
:param x: tuple (latent variable,output)
:param mu: latent variable's predictive mean
:param sigma: latent variable's predictive standard deviation
"""
return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.gp_link.transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma))
def predictive_values(self,mu,var):
"""
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction
:param mu: mean of the latent variable
:param var: variance of the latent variable
"""
if isinstance(mu,float) or isinstance(mu,int):
mu = [mu]
var = [var]
pred_mean = []
pred_var = []
q1 = []
q3 = []
for m,s in zip(mu,np.sqrt(var)):
pred_mean.append(self.predictive_mean(m,s))
pred_var.append(self.predictive_variance(m,s,pred_mean[-1]))
q1.append(self._predictive_percentiles(.025,m,s))
q3.append(self._predictive_percentiles(.975,m,s))
pred_mean = np.vstack(pred_mean)
pred_var = np.vstack(pred_var)
q1 = np.vstack(q1)
q3 = np.vstack(q3)
return pred_mean, pred_var, q1, q3

View file

@ -0,0 +1,85 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from noise_distributions import NoiseDistribution
class Poisson(NoiseDistribution):
"""
Poisson likelihood
Y is expected to take values in {0,1,2,...}
-----
$$
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
$$
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
#self.discrete = True
#self.support_limits = (0,np.inf)
#self.analytical_mean = False
super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance)
def _preprocess_values(self,Y): #TODO
#self.scale = .5*Y.max()
#self.shift = Y.mean()
return Y #(Y - self.shift)/self.scale
def _mass(self,gp,obs):
"""
Mass (or density) function
"""
#obs = obs*self.scale + self.shift
return stats.poisson.pmf(obs,self.gp_link.transf(gp))
def _nlog_mass(self,gp,obs):
"""
Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
"""
return self.gp_link.transf(gp) - obs * np.log(self.gp_link.transf(gp)) + np.log(special.gamma(obs+1))
def _dnlog_mass_dgp(self,gp,obs):
return self.gp_link.dtransf_df(gp) * (1. - obs/self.gp_link.transf(gp))
def _d2nlog_mass_dgp2(self,gp,obs):
d2_df = self.gp_link.d2transf_df2(gp)
transf = self.gp_link.transf(gp)
return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
def _dnlog_mass_dobs(self,obs,gp): #TODO not needed
return special.psi(obs+1) - np.log(self.gp_link.transf(gp))
def _d2nlog_mass_dobs2(self,obs,gp=None): #TODO not needed
return special.polygamma(1,obs)
def _d2nlog_mass_dcross(self,obs,gp): #TODO not needed
return -self.gp_link.dtransf_df(gp)/self.gp_link.transf(gp)
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _dmean_dgp(self,gp):
return self.gp_link.dtransf_df(gp)
def _d2mean_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _dvariance_dgp(self,gp):
return self.gp_link.dtransf_df(gp)
def _d2variance_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)

View file

@ -14,3 +14,5 @@ from warped_gp import WarpedGP
from bayesian_gplvm import BayesianGPLVM
from mrd import MRD
from gradient_checker import GradientChecker
from gp_multioutput_regression import GPMultioutputRegression
from sparse_gp_multioutput_regression import SparseGPMultioutputRegression

View file

@ -31,8 +31,8 @@ class FITCClassification(FITC):
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
if likelihood is None:
distribution = likelihoods.likelihood_functions.Binomial()
likelihood = likelihoods.EP(Y, distribution)
noise_model = likelihoods.binomial()
likelihood = likelihoods.EP(Y, noise_model)
elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()):
raise Warning, 'likelihood.data and Y are different.'

View file

@ -31,8 +31,8 @@ class GPClassification(GP):
kernel = kern.rbf(X.shape[1])
if likelihood is None:
distribution = likelihoods.likelihood_functions.Binomial()
likelihood = likelihoods.EP(Y, distribution)
noise_model = likelihoods.binomial()
likelihood = likelihoods.EP(Y, noise_model)
elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()):
raise Warning, 'likelihood.data and Y are different.'

View file

@ -0,0 +1,59 @@
# Copyright (c) 2013, Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import GP
from .. import likelihoods
from .. import kern
#from ..util import multioutput
class GPMultioutputRegression(GP):
"""
Multiple output Gaussian process with Gaussian noise
This is a wrapper around the models.GP class, with a set of sensible defaults
:param X_list: input observations
:type X_list: list of numpy arrays (num_data_output_i x input_dim), one array per output
:param Y_list: observed values
:type Y_list: list of numpy arrays (num_data_output_i x 1), one array per output
:param kernel_list: GPy kernels, defaults to rbf
:type kernel_list: list of GPy kernels
:param noise_variance_list: noise parameters per output, defaults to 1.0 for every output
:type noise_variance_list: list of floats
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
:param W_columns: number tuples of the corregionalization parameters 'coregion_W' (see coregionalize kernel documentation)
:type W_columns: integer
"""
def __init__(self,X_list,Y_list,kernel_list=None,noise_variance_list=None,normalize_X=False,normalize_Y=False,W_columns=1):
self.num_outputs = len(Y_list)
assert len(X_list) == self.num_outputs, 'Number of outputs do not match length of inputs list.'
#Inputs indexing
i = 0
index = []
for x,y in zip(X_list,Y_list):
assert x.shape[0] == y.shape[0]
index.append(np.repeat(i,x.size)[:,None])
i += 1
index = np.vstack(index)
X = np.hstack([np.vstack(X_list),index])
original_dim = X.shape[1] - 1
#Mixed noise likelihood definition
likelihood = likelihoods.Gaussian_Mixed_Noise(Y_list,noise_params=noise_variance_list,normalize=normalize_Y)
#Coregionalization kernel definition
if kernel_list is None:
kernel_list = [kern.rbf(original_dim)]
mkernel = kern.build_lcm(input_dim=original_dim, num_outputs=self.num_outputs, kernel_list = kernel_list, W_columns=W_columns)
self.multioutput = True
GP.__init__(self, X, likelihood, mkernel, normalize_X=normalize_X)
self.ensure_default_constraints()

View file

@ -28,11 +28,11 @@ class SparseGPClassification(SparseGP):
def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10):
if kernel is None:
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1], 1e-3)
kernel = kern.rbf(X.shape[1])# + kern.white(X.shape[1],1e-3)
if likelihood is None:
distribution = likelihoods.likelihood_functions.Binomial()
likelihood = likelihoods.EP(Y, distribution)
noise_model = likelihoods.binomial()
likelihood = likelihoods.EP(Y, noise_model)
elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()):
raise Warning, 'likelihood.data and Y are different.'

View file

@ -0,0 +1,80 @@
# Copyright (c) 2013, Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import SparseGP
from .. import likelihoods
from .. import kern
from ..util import multioutput
class SparseGPMultioutputRegression(SparseGP):
"""
Sparse multiple output Gaussian process with Gaussian noise
This is a wrapper around the models.SparseGP class, with a set of sensible defaults
:param X_list: input observations
:type X_list: list of numpy arrays (num_data_output_i x input_dim), one array per output
:param Y_list: observed values
:type Y_list: list of numpy arrays (num_data_output_i x 1), one array per output
:param kernel_list: GPy kernels, defaults to rbf
:type kernel_list: list of GPy kernels
:param noise_variance_list: noise parameters per output, defaults to 1.0 for every output
:type noise_variance_list: list of floats
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
:param Z_list: inducing inputs (optional)
:type Z_list: list of numpy arrays (num_inducing_output_i x input_dim), one array per output | empty list
:param num_inducing: number of inducing inputs per output, defaults to 10 (ignored if Z_list is not empty)
:type num_inducing: integer
:param W_columns: number tuples of the corregionalization parameters 'coregion_W' (see coregionalize kernel documentation)
:type W_columns: integer
"""
#NOTE not tested with uncertain inputs
def __init__(self,X_list,Y_list,kernel_list=None,noise_variance_list=None,normalize_X=False,normalize_Y=False,Z_list=[],num_inducing=10,W_columns=1):
self.num_outputs = len(Y_list)
assert len(X_list) == self.num_outputs, 'Number of outputs do not match length of inputs list.'
#Inducing inputs list
if len(Z_list):
assert len(Z_list) == self.num_outputs, 'Number of outputs do not match length of inducing inputs list.'
else:
if isinstance(num_inducing,np.int):
num_inducing = [num_inducing] * self.num_outputs
num_inducing = np.asarray(num_inducing)
assert num_inducing.size == self.num_outputs, 'Number of outputs do not match length of inducing inputs list.'
for ni,X in zip(num_inducing,X_list):
i = np.random.permutation(X.shape[0])[:ni]
Z_list.append(X[i].copy())
#Inputs and inducing inputs indexing
i = 0
index = []
index_z = []
for x,y,z in zip(X_list,Y_list,Z_list):
assert x.shape[0] == y.shape[0]
index.append(np.repeat(i,x.size)[:,None])
index_z.append(np.repeat(i,z.size)[:,None])
i += 1
index = np.vstack(index)
index_z = np.vstack(index_z)
X = np.hstack([np.vstack(X_list),index])
Z = np.hstack([np.vstack(Z_list),index_z])
original_dim = X.shape[1] - 1
#Mixed noise likelihood definition
likelihood = likelihoods.Gaussian_Mixed_Noise(Y_list,noise_params=noise_variance_list,normalize=normalize_Y)
#Coregionalization kernel definition
if kernel_list is None:
kernel_list = [kern.rbf(original_dim)]
mkernel = kern.build_lcm(input_dim=original_dim, num_outputs=self.num_outputs, kernel_list = kernel_list, W_columns=W_columns)
self.multioutput = True
SparseGP.__init__(self, X, likelihood, mkernel, Z=Z, normalize_X=normalize_X)
self.constrain_fixed('.*iip_\d+_1')
self.ensure_default_constraints()

View file

@ -20,7 +20,11 @@ class SparseGPRegression(SparseGP):
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (num_inducing x input_dim) | None
:rtype: model object
:param X_variance: The uncertainty in the measurements of X (Gaussian variance)
:type X_variance: np.ndarray (num_data x input_dim) | None
.. Note:: Multiple independent outputs are allowed using columns of Y

View file

@ -4,15 +4,18 @@
import unittest
import numpy as np
import GPy
<<<<<<< HEAD
verbose = False
=======
>>>>>>> 1bc93747178b0bab1b7177568388ebd4207647e0
class KernelTests(unittest.TestCase):
def test_kerneltie(self):
K = GPy.kern.rbf(5, ARD=True)
K.tie_params('.*[01]')
K.constrain_fixed('2')
X = np.random.rand(5,5)
Y = np.ones((5,1))
m = GPy.models.GPRegression(X,Y,K)
@ -95,7 +98,6 @@ class KernelTests(unittest.TestCase):
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

View file

@ -7,9 +7,14 @@ import unittest
import GPy
import numpy as np
from GPy import testing
import sys
import numpy
from GPy.kern.parts.rbf import RBF
from GPy.kern.parts.linear import Linear
from copy import deepcopy
__test__ = False
np.random.seed(0)
__test__ = lambda: 'deep' in sys.argv
# np.random.seed(0)
def ard(p):
try:
@ -19,24 +24,37 @@ def ard(p):
pass
return ""
@testing.deepTest(__test__)
@testing.deepTest(__test__())
class Test(unittest.TestCase):
input_dim = 9
num_inducing = 4
N = 3
Nsamples = 6e6
Nsamples = 5e6
def setUp(self):
i_s_dim_list = [2,4,3]
indices = numpy.cumsum(i_s_dim_list).tolist()
input_slices = [slice(a,b) for a,b in zip([None]+indices, indices)]
#input_slices[2] = deepcopy(input_slices[1])
input_slice_kern = GPy.kern.kern(9,
[
RBF(i_s_dim_list[0], np.random.rand(), np.random.rand(i_s_dim_list[0]), ARD=True),
RBF(i_s_dim_list[1], np.random.rand(), np.random.rand(i_s_dim_list[1]), ARD=True),
Linear(i_s_dim_list[2], np.random.rand(i_s_dim_list[2]), ARD=True)
],
input_slices = input_slices
)
self.kerns = (
input_slice_kern,
# (GPy.kern.rbf(self.input_dim, ARD=True) +
# GPy.kern.linear(self.input_dim, ARD=True) +
# GPy.kern.bias(self.input_dim) +
# GPy.kern.white(self.input_dim)),
(GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) +
GPy.kern.bias(self.input_dim) +
GPy.kern.white(self.input_dim)),
# (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
# GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
# GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) +
# GPy.kern.bias(self.input_dim) +
# GPy.kern.white(self.input_dim)),
# GPy.kern.rbf(self.input_dim), GPy.kern.rbf(self.input_dim, ARD=True),
# GPy.kern.linear(self.input_dim, ARD=False), GPy.kern.linear(self.input_dim, ARD=True),
# GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim),
@ -61,22 +79,22 @@ class Test(unittest.TestCase):
def test_psi1(self):
for kern in self.kerns:
Nsamples = 100
Nsamples = np.floor(self.Nsamples/300.)
psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance)
K_ = np.zeros((Nsamples, self.num_inducing))
diffs = []
for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
K = kern.K(q_x_sample_stripe, self.Z)
K = kern.K(q_x_sample_stripe[:Nsamples], self.Z)
K_ += K
diffs.append(((psi1 - (K_ / (i + 1)))).mean())
diffs.append((np.abs(psi1 - (K_ / (i + 1)))**2).mean())
K_ /= self.Nsamples / Nsamples
msg = "psi1: " + "+".join([p.name + ard(p) for p in kern.parts])
try:
import pylab
pylab.figure(msg)
pylab.plot(diffs)
self.assertTrue(np.allclose(psi1.squeeze(), K_,
rtol=1e-1, atol=.1),
# print msg, ((psi1.squeeze() - K_)**2).mean() < .01
self.assertTrue(((psi1.squeeze() - K_)**2).mean() < .01,
msg=msg + ": not matching")
# sys.stdout.write(".")
except:
@ -87,7 +105,7 @@ class Test(unittest.TestCase):
def test_psi2(self):
for kern in self.kerns:
Nsamples = 100
Nsamples = self.Nsamples/300.
psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
K_ = np.zeros((self.num_inducing, self.num_inducing))
diffs = []
@ -95,13 +113,14 @@ class Test(unittest.TestCase):
K = kern.K(q_x_sample_stripe, self.Z)
K = (K[:, :, None] * K[:, None, :]).mean(0)
K_ += K
diffs.append(((psi2 - (K_ / (i + 1)))).mean())
diffs.append(((psi2 - (K_ / (i + 1)))**2).mean())
K_ /= self.Nsamples / Nsamples
msg = "psi2: {}".format("+".join([p.name + ard(p) for p in kern.parts]))
try:
import pylab
pylab.figure(msg)
pylab.plot(diffs)
# print msg, np.allclose(psi2.squeeze(), K_, rtol=1e-1, atol=.1)
self.assertTrue(np.allclose(psi2.squeeze(), K_,
rtol=1e-1, atol=.1),
msg=msg + ": not matching")
@ -114,10 +133,8 @@ class Test(unittest.TestCase):
pass
if __name__ == "__main__":
import sys
__test__ = 'deep' in sys.argv
sys.argv = ['',
'Test.test_psi0',
#'Test.test_psi0',
'Test.test_psi1',
'Test.test_psi2',
]

View file

@ -5,7 +5,6 @@
import unittest
import numpy as np
import GPy
from GPy.likelihoods.likelihood_functions import Binomial
class GradientTests(unittest.TestCase):
def setUp(self):
@ -199,10 +198,7 @@ class GradientTests(unittest.TestCase):
X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None]
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
kernel = GPy.kern.rbf(1)
distribution = GPy.likelihoods.likelihood_functions.Binomial()
likelihood = GPy.likelihoods.EP(Y, distribution)
m = GPy.core.GP(X, likelihood, kernel)
m.ensure_default_constraints()
m = GPy.models.GPClassification(X,Y,kernel=kernel)
m.update_likelihood_approximation()
self.assertTrue(m.checkgrad())
@ -212,10 +208,11 @@ class GradientTests(unittest.TestCase):
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
Z = np.linspace(0, 15, 4)[:, None]
kernel = GPy.kern.rbf(1)
distribution = GPy.likelihoods.likelihood_functions.Binomial()
likelihood = GPy.likelihoods.EP(Y, distribution)
m = GPy.core.SparseGP(X, likelihood, kernel, Z)
m.ensure_default_constraints()
m = GPy.models.SparseGPClassification(X,Y,kernel=kernel,Z=Z)
#distribution = GPy.likelihoods.likelihood_functions.Binomial()
#likelihood = GPy.likelihoods.EP(Y, distribution)
#m = GPy.core.SparseGP(X, likelihood, kernel, Z)
#m.ensure_default_constraints()
m.update_likelihood_approximation()
self.assertTrue(m.checkgrad())
@ -224,10 +221,24 @@ class GradientTests(unittest.TestCase):
X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None]
k = GPy.kern.rbf(1) + GPy.kern.white(1)
Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None]
m = GPy.models.FITCClassification(X, Y=Y)
m = GPy.models.FITCClassification(X, Y, kernel = k)
m.update_likelihood_approximation()
self.assertTrue(m.checkgrad())
def multioutput_regression_1D(self):
X1 = np.random.rand(50, 1) * 8
X2 = np.random.rand(30, 1) * 5
X = np.vstack((X1, X2))
Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
Y = np.vstack((Y1, Y2))
k1 = GPy.kern.rbf(1)
m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1])
m.constrain_fixed('.*rbf_var', 1.)
self.assertTrue(m.checkgrad())
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

View file

@ -14,3 +14,4 @@ import visualize
import decorators
import classification
import latent_space_visualizations
#import multioutput

35
GPy/util/multioutput.py Normal file
View file

@ -0,0 +1,35 @@
import numpy as np
import warnings
from .. import kern
def build_lcm(input_dim, num_outputs, CK = [], NC = [], W_columns=1,W=None,kappa=None):
#TODO build_icm or build_lcm
"""
Builds a kernel for a linear coregionalization model
:input_dim: Input dimensionality
:num_outputs: Number of outputs
:param CK: List of coregionalized kernels (i.e., this will be multiplied by a coregionalise kernel).
:param K: List of kernels that will be added up together with CK, but won't be multiplied by a coregionalise kernel
:param W_columns: number tuples of the corregionalization parameters 'coregion_W'
:type W_columns: integer
"""
for k in CK:
if k.input_dim <> input_dim:
k.input_dim = input_dim
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
for k in NC:
if k.input_dim <> input_dim + 1:
k.input_dim = input_dim + 1
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
kernel = CK[0].prod(kern.coregionalise(num_outputs,W_columns,W,kappa),tensor=True)
for k in CK[1:]:
k_coreg = kern.coregionalise(num_outputs,W_columns,W,kappa)
kernel += k.prod(k_coreg,tensor=True)
for k in NC:
kernel += k
return kernel

View file

@ -32,4 +32,15 @@ def std_norm_cdf(x):
x = float(x)
return weave.inline(code,arg_names=['x'],support_code=support_code)
def inv_std_norm_cdf(x):
"""
Inverse cumulative standard Gaussian distribution
Based on Winitzki, S. (2008)
"""
z = 2*x -1
ln1z2 = np.log(1-z**2)
a = 8*(np.pi -3)/(3*np.pi*(4-np.pi))
b = 2/(np.pi * a) + ln1z2/2
inv_erf = np.sign(z) * np.sqrt( np.sqrt(b**2 - ln1z2/a) - b )
return np.sqrt(2) * inv_erf