mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-02 08:12:39 +02:00
Merge branch 'devel' of github.com:SheffieldML/GPy into devel
This commit is contained in:
commit
df43f59dbf
78 changed files with 4930 additions and 1652 deletions
|
|
@ -126,7 +126,7 @@ class FITC(SparseGP):
|
||||||
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:])
|
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:])
|
||||||
|
|
||||||
# the partial derivative vector for the likelihood
|
# the partial derivative vector for the likelihood
|
||||||
if self.likelihood.Nparams == 0:
|
if self.likelihood.num_params == 0:
|
||||||
# save computation here.
|
# save computation here.
|
||||||
self.partial_for_likelihood = None
|
self.partial_for_likelihood = None
|
||||||
elif self.likelihood.is_heteroscedastic:
|
elif self.likelihood.is_heteroscedastic:
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import numpy as np
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from ..util.linalg import pdinv, mdot, tdot, dpotrs, dtrtrs
|
from ..util.linalg import pdinv, mdot, tdot, dpotrs, dtrtrs
|
||||||
from ..likelihoods import EP
|
from ..likelihoods import EP, Laplace
|
||||||
from gp_base import GPBase
|
from gp_base import GPBase
|
||||||
|
|
||||||
class GP(GPBase):
|
class GP(GPBase):
|
||||||
|
|
@ -25,20 +25,23 @@ class GP(GPBase):
|
||||||
"""
|
"""
|
||||||
def __init__(self, X, likelihood, kernel, normalize_X=False):
|
def __init__(self, X, likelihood, kernel, normalize_X=False):
|
||||||
GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
||||||
self._set_params(self._get_params())
|
self.update_likelihood_approximation()
|
||||||
|
|
||||||
def getstate(self):
|
|
||||||
return GPBase.getstate(self)
|
|
||||||
|
|
||||||
def setstate(self, state):
|
|
||||||
GPBase.setstate(self, state)
|
|
||||||
self._set_params(self._get_params())
|
|
||||||
|
|
||||||
def _set_params(self, p):
|
def _set_params(self, p):
|
||||||
self.kern._set_params_transformed(p[:self.kern.num_params_transformed()])
|
new_kern_params = p[:self.kern.num_params_transformed()]
|
||||||
self.likelihood._set_params(p[self.kern.num_params_transformed():])
|
new_likelihood_params = p[self.kern.num_params_transformed():]
|
||||||
|
old_likelihood_params = self.likelihood._get_params()
|
||||||
|
|
||||||
|
self.kern._set_params_transformed(new_kern_params)
|
||||||
|
self.likelihood._set_params_transformed(new_likelihood_params)
|
||||||
|
|
||||||
self.K = self.kern.K(self.X)
|
self.K = self.kern.K(self.X)
|
||||||
|
|
||||||
|
#Re fit likelihood approximation (if it is an approx), as parameters have changed
|
||||||
|
if isinstance(self.likelihood, Laplace):
|
||||||
|
self.likelihood.fit_full(self.K)
|
||||||
|
|
||||||
self.K += self.likelihood.covariance_matrix
|
self.K += self.likelihood.covariance_matrix
|
||||||
|
|
||||||
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
|
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
|
||||||
|
|
@ -55,6 +58,10 @@ class GP(GPBase):
|
||||||
tmp, _ = dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
|
tmp, _ = dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
|
||||||
self.dL_dK = 0.5 * (tmp - self.output_dim * self.Ki)
|
self.dL_dK = 0.5 * (tmp - self.output_dim * self.Ki)
|
||||||
|
|
||||||
|
#Adding dZ_dK (0 for a non-approximate likelihood, compensates for
|
||||||
|
#additional gradients of K when log-likelihood has non-zero Z term)
|
||||||
|
self.dL_dK += self.likelihood.dZ_dK
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
|
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
|
||||||
|
|
||||||
|
|
@ -94,19 +101,13 @@ class GP(GPBase):
|
||||||
return (-0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) -
|
return (-0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) -
|
||||||
0.5 * self.output_dim * self.K_logdet + self._model_fit_term() + self.likelihood.Z)
|
0.5 * self.output_dim * self.K_logdet + self._model_fit_term() + self.likelihood.Z)
|
||||||
|
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
def _log_likelihood_gradients(self):
|
||||||
"""
|
"""
|
||||||
The gradient of all parameters.
|
The gradient of all parameters.
|
||||||
|
|
||||||
Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta
|
Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta
|
||||||
"""
|
"""
|
||||||
#return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
|
return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
|
||||||
if not isinstance(self.likelihood,EP):
|
|
||||||
tmp = np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
|
|
||||||
else:
|
|
||||||
tmp = np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
|
|
||||||
return tmp
|
|
||||||
|
|
||||||
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False):
|
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False):
|
||||||
"""
|
"""
|
||||||
|
|
@ -193,3 +194,11 @@ class GP(GPBase):
|
||||||
"""
|
"""
|
||||||
Xnew = self._add_output_index(Xnew, output)
|
Xnew = self._add_output_index(Xnew, output)
|
||||||
return self.predict(Xnew, which_parts=which_parts, full_cov=full_cov, likelihood_args=likelihood_args)
|
return self.predict(Xnew, which_parts=which_parts, full_cov=full_cov, likelihood_args=likelihood_args)
|
||||||
|
|
||||||
|
def getstate(self):
|
||||||
|
return GPBase.getstate(self)
|
||||||
|
|
||||||
|
def setstate(self, state):
|
||||||
|
GPBase.setstate(self, state)
|
||||||
|
self._set_params(self._get_params())
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,9 @@ from ..likelihoods import Gaussian, Gaussian_Mixed_Noise
|
||||||
class GPBase(Model):
|
class GPBase(Model):
|
||||||
"""
|
"""
|
||||||
Gaussian process base model for holding shared behaviour between
|
Gaussian process base model for holding shared behaviour between
|
||||||
sparse_GP and GP models.
|
sparse_GP and GP models, and potentially other models in the future.
|
||||||
|
|
||||||
|
Here we define some functions that are use
|
||||||
"""
|
"""
|
||||||
def __init__(self, X, likelihood, kernel, normalize_X=False):
|
def __init__(self, X, likelihood, kernel, normalize_X=False):
|
||||||
self.X = X
|
self.X = X
|
||||||
|
|
@ -34,29 +36,6 @@ class GPBase(Model):
|
||||||
# All leaf nodes should call self._set_params(self._get_params()) at
|
# All leaf nodes should call self._set_params(self._get_params()) at
|
||||||
# the end
|
# the end
|
||||||
|
|
||||||
def getstate(self):
|
|
||||||
"""
|
|
||||||
Get the current state of the class, here we return everything that is needed to recompute the model.
|
|
||||||
"""
|
|
||||||
return Model.getstate(self) + [self.X,
|
|
||||||
self.num_data,
|
|
||||||
self.input_dim,
|
|
||||||
self.kern,
|
|
||||||
self.likelihood,
|
|
||||||
self.output_dim,
|
|
||||||
self._Xoffset,
|
|
||||||
self._Xscale]
|
|
||||||
|
|
||||||
def setstate(self, state):
|
|
||||||
self._Xscale = state.pop()
|
|
||||||
self._Xoffset = state.pop()
|
|
||||||
self.output_dim = state.pop()
|
|
||||||
self.likelihood = state.pop()
|
|
||||||
self.kern = state.pop()
|
|
||||||
self.input_dim = state.pop()
|
|
||||||
self.num_data = state.pop()
|
|
||||||
self.X = state.pop()
|
|
||||||
Model.setstate(self, state)
|
|
||||||
|
|
||||||
def posterior_samples_f(self,X,size=10,which_parts='all',full_cov=True):
|
def posterior_samples_f(self,X,size=10,which_parts='all',full_cov=True):
|
||||||
"""
|
"""
|
||||||
|
|
@ -110,90 +89,43 @@ class GPBase(Model):
|
||||||
|
|
||||||
return Ysim
|
return Ysim
|
||||||
|
|
||||||
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
|
def plot_f(self, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
Plot the GP's view of the world, where the data is normalized and the
|
Plot the GP's view of the world, where the data is normalized and before applying a likelihood.
|
||||||
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
|
||||||
- In two dimsensions, a contour-plot shows the mean predicted function
|
|
||||||
- Not implemented in higher dimensions
|
|
||||||
|
|
||||||
:param samples: the number of a posteriori samples to plot
|
This is a convenience function: we simply call self.plot with the
|
||||||
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
argument use_raw_predict set True. All args and kwargs are passed on to
|
||||||
:param which_data: which if the training data to plot (default all)
|
plot.
|
||||||
:type which_data: 'all' or a slice object to slice self.X, self.Y
|
|
||||||
:param which_parts: which of the kernel functions to plot (additively)
|
|
||||||
:type which_parts: 'all', or list of bools
|
|
||||||
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
|
||||||
:type resolution: int
|
|
||||||
:param full_cov:
|
|
||||||
:type full_cov: bool
|
|
||||||
:param fignum: figure to plot on.
|
|
||||||
:type fignum: figure number
|
|
||||||
:param ax: axes to plot on.
|
|
||||||
:type ax: axes handle
|
|
||||||
|
|
||||||
:param output: which output to plot (for multiple output models only)
|
see also: gp_base.plot
|
||||||
:type output: integer (first output is 0)
|
|
||||||
"""
|
"""
|
||||||
if which_data == 'all':
|
kwargs['plot_raw'] = True
|
||||||
which_data = slice(None)
|
self.plot(*args, **kwargs)
|
||||||
|
|
||||||
if ax is None:
|
def plot(self, plot_limits=None, which_data_rows='all',
|
||||||
fig = pb.figure(num=fignum)
|
which_data_ycols='all', which_parts='all', fixed_inputs=[],
|
||||||
ax = fig.add_subplot(111)
|
levels=20, samples=0, fignum=None, ax=None, resolution=None,
|
||||||
|
plot_raw=False,
|
||||||
if self.X.shape[1] == 1:
|
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
|
||||||
resolution = resolution or 200
|
|
||||||
Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
|
|
||||||
|
|
||||||
m, v = self._raw_predict(Xnew, which_parts=which_parts)
|
|
||||||
if samples:
|
|
||||||
Ysim = self.posterior_samples_f(Xnew, samples, which_parts=which_parts, full_cov=True)
|
|
||||||
for yi in Ysim.T:
|
|
||||||
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
|
|
||||||
gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
|
|
||||||
|
|
||||||
ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
|
|
||||||
ax.set_xlim(xmin, xmax)
|
|
||||||
ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
|
|
||||||
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
|
|
||||||
ax.set_ylim(ymin, ymax)
|
|
||||||
|
|
||||||
elif self.X.shape[1] == 2:
|
|
||||||
|
|
||||||
resolution = resolution or 50
|
|
||||||
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
|
|
||||||
m, v = self._raw_predict(Xnew, which_parts=which_parts)
|
|
||||||
m = m.reshape(resolution, resolution).T
|
|
||||||
ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
|
|
||||||
ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable
|
|
||||||
ax.set_xlim(xmin[0], xmax[0])
|
|
||||||
ax.set_ylim(xmin[1], xmax[1])
|
|
||||||
|
|
||||||
if samples:
|
|
||||||
warnings.warn("Samples only implemented for 1 dimensional inputs.")
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
|
||||||
|
|
||||||
def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
|
|
||||||
"""
|
"""
|
||||||
Plot the GP with noise where the likelihood is Gaussian.
|
|
||||||
|
|
||||||
Plot the posterior of the GP.
|
Plot the posterior of the GP.
|
||||||
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
||||||
- In two dimsensions, a contour-plot shows the mean predicted function
|
- In two dimsensions, a contour-plot shows the mean predicted function
|
||||||
- Not implemented in higher dimensions
|
- In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed.
|
||||||
|
|
||||||
Can plot only part of the data and part of the posterior functions
|
Can plot only part of the data and part of the posterior functions
|
||||||
using which_data and which_functions
|
using which_data_rowsm which_data_ycols and which_parts
|
||||||
|
|
||||||
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
||||||
:type plot_limits: np.array
|
:type plot_limits: np.array
|
||||||
:param which_data: which if the training data to plot (default all)
|
:param which_data_rows: which of the training data to plot (default all)
|
||||||
:type which_data: 'all' or a slice object to slice self.X, self.Y
|
:type which_data_rows: 'all' or a slice object to slice self.X, self.Y
|
||||||
|
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
|
||||||
|
:type which_data_rows: 'all' or a list of integers
|
||||||
:param which_parts: which of the kernel functions to plot (additively)
|
:param which_parts: which of the kernel functions to plot (additively)
|
||||||
:type which_parts: 'all', or list of bools
|
:type which_parts: 'all', or list of bools
|
||||||
|
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
|
||||||
|
:type fixed_inputs: a list of tuples
|
||||||
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
||||||
:type resolution: int
|
:type resolution: int
|
||||||
:param levels: number of levels to plot in a contour plot.
|
:param levels: number of levels to plot in a contour plot.
|
||||||
|
|
@ -205,216 +137,138 @@ class GPBase(Model):
|
||||||
:param ax: axes to plot on.
|
:param ax: axes to plot on.
|
||||||
:type ax: axes handle
|
:type ax: axes handle
|
||||||
:type output: integer (first output is 0)
|
:type output: integer (first output is 0)
|
||||||
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
|
|
||||||
:type fixed_inputs: a list of tuples
|
|
||||||
:param linecol: color of line to plot.
|
:param linecol: color of line to plot.
|
||||||
:type linecol:
|
:type linecol:
|
||||||
:param fillcol: color of fill
|
:param fillcol: color of fill
|
||||||
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
|
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
|
||||||
"""
|
"""
|
||||||
if which_data == 'all':
|
#deal with optional arguments
|
||||||
which_data = slice(None)
|
if which_data_rows == 'all':
|
||||||
|
which_data_rows = slice(None)
|
||||||
|
if which_data_ycols == 'all':
|
||||||
|
which_data_ycols = np.arange(self.output_dim)
|
||||||
|
if len(which_data_ycols)==0:
|
||||||
|
raise ValueError('No data selected for plotting')
|
||||||
if ax is None:
|
if ax is None:
|
||||||
fig = pb.figure(num=fignum)
|
fig = pb.figure(num=fignum)
|
||||||
ax = fig.add_subplot(111)
|
ax = fig.add_subplot(111)
|
||||||
|
|
||||||
plotdims = self.input_dim - len(fixed_inputs)
|
#work out what the inputs are for plotting (1D or 2D)
|
||||||
if plotdims == 1:
|
fixed_dims = np.array([i for i,v in fixed_inputs])
|
||||||
|
free_dims = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
|
||||||
|
|
||||||
|
#one dimensional plotting
|
||||||
|
if len(free_dims) == 1:
|
||||||
|
|
||||||
|
#define the frame on which to plot
|
||||||
resolution = resolution or 200
|
resolution = resolution or 200
|
||||||
|
|
||||||
Xu = self.X * self._Xscale + self._Xoffset #NOTE self.X are the normalized values now
|
Xu = self.X * self._Xscale + self._Xoffset #NOTE self.X are the normalized values now
|
||||||
|
Xnew, xmin, xmax = x_frame1D(Xu[:,free_dims], plot_limits=plot_limits)
|
||||||
fixed_dims = np.array([i for i,v in fixed_inputs])
|
|
||||||
freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
|
|
||||||
|
|
||||||
Xnew, xmin, xmax = x_frame1D(Xu[:,freedim], plot_limits=plot_limits)
|
|
||||||
Xgrid = np.empty((Xnew.shape[0],self.input_dim))
|
Xgrid = np.empty((Xnew.shape[0],self.input_dim))
|
||||||
Xgrid[:,freedim] = Xnew
|
Xgrid[:,free_dims] = Xnew
|
||||||
for i,v in fixed_inputs:
|
for i,v in fixed_inputs:
|
||||||
Xgrid[:,i] = v
|
Xgrid[:,i] = v
|
||||||
|
|
||||||
m, v, lower, upper = self.predict(Xgrid, which_parts=which_parts)
|
#make a prediction on the frame and plot it
|
||||||
|
if plot_raw:
|
||||||
|
m, v = self._raw_predict(Xgrid, which_parts=which_parts)
|
||||||
|
lower = m - 2*np.sqrt(v)
|
||||||
|
upper = m + 2*np.sqrt(v)
|
||||||
|
Y = self.likelihood.Y
|
||||||
|
else:
|
||||||
|
m, v, lower, upper = self.predict(Xgrid, which_parts=which_parts)
|
||||||
|
Y = self.likelihood.data
|
||||||
|
for d in which_data_ycols:
|
||||||
|
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
|
||||||
|
ax.plot(Xu[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5)
|
||||||
|
|
||||||
|
#optionally plot some samples
|
||||||
if samples: #NOTE not tested with fixed_inputs
|
if samples: #NOTE not tested with fixed_inputs
|
||||||
Ysim = self.posterior_samples(Xgrid, samples, which_parts=which_parts, full_cov=True)
|
Ysim = self.posterior_samples(Xgrid, samples, which_parts=which_parts, full_cov=True)
|
||||||
for yi in Ysim.T:
|
for yi in Ysim.T:
|
||||||
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
|
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
|
||||||
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
|
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
|
||||||
|
|
||||||
for d in range(m.shape[1]):
|
#set the limits of the plot to some sensible values
|
||||||
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
|
ymin, ymax = min(np.append(Y[which_data_rows, which_data_ycols].flatten(), lower)), max(np.append(Y[which_data_rows, which_data_ycols].flatten(), upper))
|
||||||
ax.plot(Xu[which_data,freedim], self.likelihood.data[which_data, d], 'kx', mew=1.5)
|
|
||||||
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
|
|
||||||
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
|
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
|
||||||
ax.set_xlim(xmin, xmax)
|
ax.set_xlim(xmin, xmax)
|
||||||
ax.set_ylim(ymin, ymax)
|
ax.set_ylim(ymin, ymax)
|
||||||
|
|
||||||
elif self.X.shape[1] == 2:
|
#2D plotting
|
||||||
|
elif len(free_dims) == 2:
|
||||||
|
|
||||||
|
#define the frame for plotting on
|
||||||
resolution = resolution or 50
|
resolution = resolution or 50
|
||||||
Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
|
Xu = self.X * self._Xscale + self._Xoffset #NOTE self.X are the normalized values now
|
||||||
|
Xnew, _, _, xmin, xmax = x_frame2D(Xu[:,free_dims], plot_limits, resolution)
|
||||||
|
Xgrid = np.empty((Xnew.shape[0],self.input_dim))
|
||||||
|
Xgrid[:,free_dims] = Xnew
|
||||||
|
for i,v in fixed_inputs:
|
||||||
|
Xgrid[:,i] = v
|
||||||
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
|
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
|
||||||
m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
|
|
||||||
m = m.reshape(resolution, resolution).T
|
#predict on the frame and plot
|
||||||
ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
|
if use_raw_predict:
|
||||||
Yf = self.likelihood.Y.flatten()
|
m, _ = self._raw_predict(Xgrid, which_parts=which_parts)
|
||||||
ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable
|
Y = self.likelihood.Y
|
||||||
|
else:
|
||||||
|
m, _, _, _ = self.predict(Xgrid, which_parts=which_parts)
|
||||||
|
Y = self.likelihood.data
|
||||||
|
for d in which_data_ycols:
|
||||||
|
m_d = m[:,d].reshape(resolution, resolution).T
|
||||||
|
ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
||||||
|
ax.scatter(self.X[which_data_rows, free_dims[0]], self.X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
|
||||||
|
|
||||||
|
#set the limits of the plot to some sensible values
|
||||||
ax.set_xlim(xmin[0], xmax[0])
|
ax.set_xlim(xmin[0], xmax[0])
|
||||||
ax.set_ylim(xmin[1], xmax[1])
|
ax.set_ylim(xmin[1], xmax[1])
|
||||||
|
|
||||||
if samples:
|
if samples:
|
||||||
warnings.warn("Samples only implemented for 1 dimensional inputs.")
|
warnings.warn("Samples are rather difficult to plot for 2D inputs...")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
||||||
|
|
||||||
def plot_single_output_f(self, output=None, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
|
def getstate(self):
|
||||||
"""
|
"""
|
||||||
For a specific output, in a multioutput model, this function works just as plot_f on single output models.
|
Get the curent state of the class. This is only used to efficiently
|
||||||
|
pickle the model. See also self.setstate
|
||||||
:param output: which output to plot (for multiple output models only)
|
|
||||||
:type output: integer (first output is 0)
|
|
||||||
:param samples: the number of a posteriori samples to plot
|
|
||||||
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
|
||||||
:param which_data: which if the training data to plot (default all)
|
|
||||||
:type which_data: 'all' or a slice object to slice self.X, self.Y
|
|
||||||
:param which_parts: which of the kernel functions to plot (additively)
|
|
||||||
:type which_parts: 'all', or list of bools
|
|
||||||
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
|
||||||
:type resolution: int
|
|
||||||
:param full_cov:
|
|
||||||
:type full_cov: bool
|
|
||||||
:param fignum: figure to plot on.
|
|
||||||
:type fignum: figure number
|
|
||||||
:param ax: axes to plot on.
|
|
||||||
:type ax: axes handle
|
|
||||||
"""
|
"""
|
||||||
assert output is not None, "An output must be specified."
|
return Model.getstate(self) + [self.X,
|
||||||
assert len(self.likelihood.noise_model_list) > output, "The model has only %s outputs." %(self.output_dim + 1)
|
self.num_data,
|
||||||
|
self.input_dim,
|
||||||
|
self.kern,
|
||||||
|
self.likelihood,
|
||||||
|
self.output_dim,
|
||||||
|
self._Xoffset,
|
||||||
|
self._Xscale]
|
||||||
|
|
||||||
if which_data == 'all':
|
def setstate(self, state):
|
||||||
which_data = slice(None)
|
|
||||||
|
|
||||||
if ax is None:
|
|
||||||
fig = pb.figure(num=fignum)
|
|
||||||
ax = fig.add_subplot(111)
|
|
||||||
|
|
||||||
if self.X.shape[1] == 2:
|
|
||||||
Xu = self.X[self.X[:,-1]==output ,0:1]
|
|
||||||
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
|
|
||||||
Xnew_indexed = self._add_output_index(Xnew,output)
|
|
||||||
|
|
||||||
m, v = self._raw_predict(Xnew_indexed, which_parts=which_parts)
|
|
||||||
|
|
||||||
if samples:
|
|
||||||
Ysim = self.posterior_samples_f(Xnew_indexed, samples, which_parts=which_parts, full_cov=True)
|
|
||||||
for yi in Ysim.T:
|
|
||||||
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
|
|
||||||
|
|
||||||
gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
|
|
||||||
ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5)
|
|
||||||
ax.set_xlim(xmin, xmax)
|
|
||||||
ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
|
|
||||||
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
|
|
||||||
ax.set_ylim(ymin, ymax)
|
|
||||||
|
|
||||||
elif self.X.shape[1] == 3:
|
|
||||||
raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet"
|
|
||||||
#if samples:
|
|
||||||
# warnings.warn("Samples only implemented for 1 dimensional inputs.")
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
|
||||||
|
|
||||||
|
|
||||||
def plot_single_output(self, output=None, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
|
|
||||||
"""
|
"""
|
||||||
For a specific output, in a multioutput model, this function works just as plot_f on single output models.
|
Set the state of the model. Used for efficient pickling
|
||||||
|
|
||||||
:param output: which output to plot (for multiple output models only)
|
|
||||||
:type output: integer (first output is 0)
|
|
||||||
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
|
||||||
:type plot_limits: np.array
|
|
||||||
:param which_data: which if the training data to plot (default all)
|
|
||||||
:type which_data: 'all' or a slice object to slice self.X, self.Y
|
|
||||||
:param which_parts: which of the kernel functions to plot (additively)
|
|
||||||
:type which_parts: 'all', or list of bools
|
|
||||||
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
|
||||||
:type resolution: int
|
|
||||||
:param levels: number of levels to plot in a contour plot.
|
|
||||||
:type levels: int
|
|
||||||
:param samples: the number of a posteriori samples to plot
|
|
||||||
:type samples: int
|
|
||||||
:param fignum: figure to plot on.
|
|
||||||
:type fignum: figure number
|
|
||||||
:param ax: axes to plot on.
|
|
||||||
:type ax: axes handle
|
|
||||||
:type output: integer (first output is 0)
|
|
||||||
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
|
|
||||||
:type fixed_inputs: a list of tuples
|
|
||||||
:param linecol: color of line to plot.
|
|
||||||
:type linecol:
|
|
||||||
:param fillcol: color of fill
|
|
||||||
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
|
|
||||||
"""
|
"""
|
||||||
assert output is not None, "An output must be specified."
|
self._Xscale = state.pop()
|
||||||
assert len(self.likelihood.noise_model_list) > output, "The model has only %s outputs." %(self.output_dim + 1)
|
self._Xoffset = state.pop()
|
||||||
if which_data == 'all':
|
self.output_dim = state.pop()
|
||||||
which_data = slice(None)
|
self.likelihood = state.pop()
|
||||||
|
self.kern = state.pop()
|
||||||
|
self.input_dim = state.pop()
|
||||||
|
self.num_data = state.pop()
|
||||||
|
self.X = state.pop()
|
||||||
|
Model.setstate(self, state)
|
||||||
|
|
||||||
if ax is None:
|
def log_predictive_density(self, x_test, y_test):
|
||||||
fig = pb.figure(num=fignum)
|
|
||||||
ax = fig.add_subplot(111)
|
|
||||||
|
|
||||||
if self.X.shape[1] == 2:
|
|
||||||
resolution = resolution or 200
|
|
||||||
|
|
||||||
Xu = self.X[self.X[:,-1]==output,:] #keep the output of interest
|
|
||||||
Xu = self.X * self._Xscale + self._Xoffset
|
|
||||||
Xu = self.X[self.X[:,-1]==output ,0:1] #get rid of the index column
|
|
||||||
|
|
||||||
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
|
|
||||||
Xnew_indexed = self._add_output_index(Xnew,output)
|
|
||||||
|
|
||||||
|
|
||||||
m, v, lower, upper = self.predict(Xnew_indexed, which_parts=which_parts,noise_model=output)
|
|
||||||
|
|
||||||
if samples: #NOTE not tested with fixed_inputs
|
|
||||||
Ysim = self.posterior_samples(Xnew_indexed, samples, which_parts=which_parts, full_cov=True,noise_model=output)
|
|
||||||
for yi in Ysim.T:
|
|
||||||
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
|
|
||||||
|
|
||||||
for d in range(m.shape[1]):
|
|
||||||
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
|
|
||||||
ax.plot(Xu[which_data], self.likelihood.noise_model_list[output].data, 'kx', mew=1.5)
|
|
||||||
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
|
|
||||||
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
|
|
||||||
ax.set_xlim(xmin, xmax)
|
|
||||||
ax.set_ylim(ymin, ymax)
|
|
||||||
|
|
||||||
elif self.X.shape[1] == 3:
|
|
||||||
raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet"
|
|
||||||
#if samples:
|
|
||||||
# warnings.warn("Samples only implemented for 1 dimensional inputs.")
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
|
||||||
|
|
||||||
|
|
||||||
def _add_output_index(self,X,output):
|
|
||||||
"""
|
"""
|
||||||
In a multioutput model, appends an index column to X to specify the output it is related to.
|
Calculation of the log predictive density
|
||||||
|
|
||||||
:param X: Input data
|
.. math:
|
||||||
:type X: np.ndarray, N x self.input_dim
|
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||||
:param output: output X is related to
|
|
||||||
:type output: integer in {0,..., output_dim-1}
|
|
||||||
|
|
||||||
.. Note:: For multiple non-independent outputs models only.
|
:param x_test: test observations (x_{*})
|
||||||
|
:type x_test: (Nx1) array
|
||||||
|
:param y_test: test observations (y_{*})
|
||||||
|
:type y_test: (Nx1) array
|
||||||
"""
|
"""
|
||||||
|
mu_star, var_star = self._raw_predict(x_test)
|
||||||
assert hasattr(self,'multioutput'), 'This function is for multiple output models only.'
|
return self.likelihood.log_predictive_density(y_test, mu_star, var_star)
|
||||||
|
|
||||||
index = np.ones((X.shape[0],1))*output
|
|
||||||
return np.hstack((X,index))
|
|
||||||
|
|
|
||||||
|
|
@ -259,7 +259,7 @@ class Model(Parameterized):
|
||||||
these terms are present in the name the parameter is
|
these terms are present in the name the parameter is
|
||||||
constrained positive.
|
constrained positive.
|
||||||
"""
|
"""
|
||||||
positive_strings = ['variance', 'lengthscale', 'precision', 'kappa']
|
positive_strings = ['variance', 'lengthscale', 'precision', 'decay', 'kappa']
|
||||||
# param_names = self._get_param_names()
|
# param_names = self._get_param_names()
|
||||||
currently_constrained = self.all_constrained_indices()
|
currently_constrained = self.all_constrained_indices()
|
||||||
to_make_positive = []
|
to_make_positive = []
|
||||||
|
|
@ -549,7 +549,7 @@ class Model(Parameterized):
|
||||||
|
|
||||||
.. Note: kwargs are passed to update_likelihood and optimize functions.
|
.. Note: kwargs are passed to update_likelihood and optimize functions.
|
||||||
"""
|
"""
|
||||||
assert isinstance(self.likelihood, likelihoods.EP) or isinstance(self.likelihood, likelihoods.EP_Mixed_Noise), "pseudo_EM is only available for EP likelihoods"
|
assert isinstance(self.likelihood, (likelihoods.EP, likelihoods.EP_Mixed_Noise, likelihoods.Laplace)), "pseudo_EM is only available for approximate likelihoods"
|
||||||
ll_change = stop_crit + 1.
|
ll_change = stop_crit + 1.
|
||||||
iteration = 0
|
iteration = 0
|
||||||
last_ll = -np.inf
|
last_ll = -np.inf
|
||||||
|
|
|
||||||
|
|
@ -52,23 +52,6 @@ class SparseGP(GPBase):
|
||||||
|
|
||||||
self._const_jitter = None
|
self._const_jitter = None
|
||||||
|
|
||||||
def getstate(self):
|
|
||||||
"""
|
|
||||||
Get the current state of the class,
|
|
||||||
here just all the indices, rest can get recomputed
|
|
||||||
"""
|
|
||||||
return GPBase.getstate(self) + [self.Z,
|
|
||||||
self.num_inducing,
|
|
||||||
self.has_uncertain_inputs,
|
|
||||||
self.X_variance]
|
|
||||||
|
|
||||||
def setstate(self, state):
|
|
||||||
self.X_variance = state.pop()
|
|
||||||
self.has_uncertain_inputs = state.pop()
|
|
||||||
self.num_inducing = state.pop()
|
|
||||||
self.Z = state.pop()
|
|
||||||
GPBase.setstate(self, state)
|
|
||||||
|
|
||||||
def _compute_kernel_matrices(self):
|
def _compute_kernel_matrices(self):
|
||||||
# kernel computations, using BGPLVM notation
|
# kernel computations, using BGPLVM notation
|
||||||
self.Kmm = self.kern.K(self.Z)
|
self.Kmm = self.kern.K(self.Z)
|
||||||
|
|
@ -87,7 +70,6 @@ class SparseGP(GPBase):
|
||||||
|
|
||||||
# factor Kmm
|
# factor Kmm
|
||||||
self._Lm = jitchol(self.Kmm + self._const_jitter)
|
self._Lm = jitchol(self.Kmm + self._const_jitter)
|
||||||
# TODO: no white kernel needed anymore, all noise in likelihood --------
|
|
||||||
|
|
||||||
# The rather complex computations of self._A
|
# The rather complex computations of self._A
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
|
|
@ -156,7 +138,7 @@ class SparseGP(GPBase):
|
||||||
|
|
||||||
|
|
||||||
# the partial derivative vector for the likelihood
|
# the partial derivative vector for the likelihood
|
||||||
if self.likelihood.Nparams == 0:
|
if self.likelihood.num_params == 0:
|
||||||
# save computation here.
|
# save computation here.
|
||||||
self.partial_for_likelihood = None
|
self.partial_for_likelihood = None
|
||||||
elif self.likelihood.is_heteroscedastic:
|
elif self.likelihood.is_heteroscedastic:
|
||||||
|
|
@ -341,7 +323,10 @@ class SparseGP(GPBase):
|
||||||
return mean, var, _025pm, _975pm
|
return mean, var, _025pm, _975pm
|
||||||
|
|
||||||
|
|
||||||
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
|
def plot_f(self, samples=0, plot_limits=None, which_data_rows='all',
|
||||||
|
which_data_ycols='all', which_parts='all', resolution=None,
|
||||||
|
full_cov=False, fignum=None, ax=None):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Plot the GP's view of the world, where the data is normalized and the
|
Plot the GP's view of the world, where the data is normalized and the
|
||||||
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
||||||
|
|
@ -350,8 +335,8 @@ class SparseGP(GPBase):
|
||||||
|
|
||||||
:param samples: the number of a posteriori samples to plot
|
:param samples: the number of a posteriori samples to plot
|
||||||
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
||||||
:param which_data: which if the training data to plot (default all)
|
:param which_data_rows: which if the training data to plot (default all)
|
||||||
:type which_data: 'all' or a slice object to slice self.X, self.Y
|
:type which_data_rows: 'all' or a slice object to slice self.X, self.Y
|
||||||
:param which_parts: which of the kernel functions to plot (additively)
|
:param which_parts: which of the kernel functions to plot (additively)
|
||||||
:type which_parts: 'all', or list of bools
|
:type which_parts: 'all', or list of bools
|
||||||
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
||||||
|
|
@ -371,10 +356,10 @@ class SparseGP(GPBase):
|
||||||
ax = fig.add_subplot(111)
|
ax = fig.add_subplot(111)
|
||||||
if fignum is None and ax is None:
|
if fignum is None and ax is None:
|
||||||
fignum = fig.num
|
fignum = fig.num
|
||||||
if which_data is 'all':
|
if which_data_rows is 'all':
|
||||||
which_data = slice(None)
|
which_data_rows = slice(None)
|
||||||
|
|
||||||
GPBase.plot_f(self, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, full_cov=full_cov, fignum=fignum, ax=ax)
|
GPBase.plot_f(self, samples=samples, plot_limits=plot_limits, which_data_rows=which_data_rows, which_data_ycols=which_data_ycols, which_parts=which_parts, resolution=resolution, fignum=fignum, ax=ax)
|
||||||
|
|
||||||
if self.X.shape[1] == 1:
|
if self.X.shape[1] == 1:
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
|
|
@ -389,177 +374,98 @@ class SparseGP(GPBase):
|
||||||
Zu = self.Z * self._Xscale + self._Xoffset
|
Zu = self.Z * self._Xscale + self._Xoffset
|
||||||
ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
|
ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
|
||||||
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
||||||
|
|
||||||
def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None):
|
def plot(self, plot_limits=None, which_data_rows='all',
|
||||||
|
which_data_ycols='all', which_parts='all', fixed_inputs=[],
|
||||||
|
plot_raw=False,
|
||||||
|
levels=20, samples=0, fignum=None, ax=None, resolution=None):
|
||||||
|
"""
|
||||||
|
Plot the posterior of the sparse GP.
|
||||||
|
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
||||||
|
- In two dimsensions, a contour-plot shows the mean predicted function
|
||||||
|
- In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed.
|
||||||
|
|
||||||
|
Can plot only part of the data and part of the posterior functions
|
||||||
|
using which_data_rowsm which_data_ycols and which_parts
|
||||||
|
|
||||||
|
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
||||||
|
:type plot_limits: np.array
|
||||||
|
:param which_data_rows: which of the training data to plot (default all)
|
||||||
|
:type which_data_rows: 'all' or a slice object to slice self.X, self.Y
|
||||||
|
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
|
||||||
|
:type which_data_rows: 'all' or a list of integers
|
||||||
|
:param which_parts: which of the kernel functions to plot (additively)
|
||||||
|
:type which_parts: 'all', or list of bools
|
||||||
|
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
|
||||||
|
:type fixed_inputs: a list of tuples
|
||||||
|
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
||||||
|
:type resolution: int
|
||||||
|
:param levels: number of levels to plot in a contour plot.
|
||||||
|
:type levels: int
|
||||||
|
:param samples: the number of a posteriori samples to plot
|
||||||
|
:type samples: int
|
||||||
|
:param fignum: figure to plot on.
|
||||||
|
:type fignum: figure number
|
||||||
|
:param ax: axes to plot on.
|
||||||
|
:type ax: axes handle
|
||||||
|
:type output: integer (first output is 0)
|
||||||
|
:param linecol: color of line to plot.
|
||||||
|
:type linecol:
|
||||||
|
:param fillcol: color of fill
|
||||||
|
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
|
||||||
|
"""
|
||||||
|
#deal work out which ax to plot on
|
||||||
if ax is None:
|
if ax is None:
|
||||||
fig = pb.figure(num=fignum)
|
fig = pb.figure(num=fignum)
|
||||||
ax = fig.add_subplot(111)
|
ax = fig.add_subplot(111)
|
||||||
if fignum is None and ax is None:
|
|
||||||
fignum = fig.num
|
|
||||||
if which_data is 'all':
|
|
||||||
which_data = slice(None)
|
|
||||||
|
|
||||||
GPBase.plot(self, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, levels=20, fignum=fignum, ax=ax)
|
#work out what the inputs are for plotting (1D or 2D)
|
||||||
|
fixed_dims = np.array([i for i,v in fixed_inputs])
|
||||||
|
free_dims = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
|
||||||
|
|
||||||
if self.X.shape[1] == 1:
|
#call the base plotting
|
||||||
|
GPBase.plot(self, samples=samples, plot_limits=plot_limits,
|
||||||
|
which_data_rows=which_data_rows,
|
||||||
|
which_data_ycols=which_data_ycols, fixed_inputs=fixed_inputs,
|
||||||
|
which_parts=which_parts, resolution=resolution, levels=20,
|
||||||
|
fignum=fignum, ax=ax)
|
||||||
|
|
||||||
|
if len(free_dims) == 1:
|
||||||
|
#plot errorbars for the uncertain inputs
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
|
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
|
||||||
ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
|
ax.errorbar(Xu[which_data_rows, 0], self.likelihood.data[which_data_rows, 0],
|
||||||
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
|
xerr=2 * np.sqrt(self.X_variance[which_data_rows, 0]),
|
||||||
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
|
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
|
||||||
|
|
||||||
|
#plot the inducing inputs
|
||||||
Zu = self.Z * self._Xscale + self._Xoffset
|
Zu = self.Z * self._Xscale + self._Xoffset
|
||||||
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
|
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
|
||||||
|
|
||||||
elif self.X.shape[1] == 2:
|
elif len(free_dims) == 2:
|
||||||
Zu = self.Z * self._Xscale + self._Xoffset
|
Zu = self.Z * self._Xscale + self._Xoffset
|
||||||
ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
|
ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
|
||||||
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
||||||
|
|
||||||
def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False):
|
def getstate(self):
|
||||||
"""
|
"""
|
||||||
For a specific output, predict the function at the new point(s) Xnew.
|
Get the current state of the class,
|
||||||
|
here just all the indices, rest can get recomputed
|
||||||
:param Xnew: The points at which to make a prediction
|
|
||||||
:type Xnew: np.ndarray, Nnew x self.input_dim
|
|
||||||
:param output: output to predict
|
|
||||||
:type output: integer in {0,..., num_outputs-1}
|
|
||||||
:param which_parts: specifies which outputs kernel(s) to use in prediction
|
|
||||||
:type which_parts: ('all', list of bools)
|
|
||||||
:param full_cov: whether to return the full covariance matrix, or just the diagonal
|
|
||||||
:type full_cov: bool
|
|
||||||
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim
|
|
||||||
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
|
|
||||||
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
|
|
||||||
|
|
||||||
.. Note:: For multiple output models only
|
|
||||||
"""
|
"""
|
||||||
|
return GPBase.getstate(self) + [self.Z,
|
||||||
|
self.num_inducing,
|
||||||
|
self.has_uncertain_inputs,
|
||||||
|
self.X_variance]
|
||||||
|
|
||||||
assert hasattr(self,'multioutput')
|
def setstate(self, state):
|
||||||
index = np.ones_like(Xnew)*output
|
self.X_variance = state.pop()
|
||||||
Xnew = np.hstack((Xnew,index))
|
self.has_uncertain_inputs = state.pop()
|
||||||
|
self.num_inducing = state.pop()
|
||||||
# normalize X values
|
self.Z = state.pop()
|
||||||
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
|
GPBase.setstate(self, state)
|
||||||
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
|
|
||||||
|
|
||||||
# now push through likelihood
|
|
||||||
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output)
|
|
||||||
return mean, var, _025pm, _975pm
|
|
||||||
|
|
||||||
def _raw_predict_single_output(self, _Xnew, output=0, X_variance_new=None, which_parts='all', full_cov=False,stop=False):
|
|
||||||
"""
|
|
||||||
Internal helper function for making predictions for a specific output,
|
|
||||||
does not account for normalization or likelihood
|
|
||||||
---------
|
|
||||||
|
|
||||||
:param Xnew: The points at which to make a prediction
|
|
||||||
:type Xnew: np.ndarray, Nnew x self.input_dim
|
|
||||||
:param output: output to predict
|
|
||||||
:type output: integer in {0,..., num_outputs-1}
|
|
||||||
:param which_parts: specifies which outputs kernel(s) to use in prediction
|
|
||||||
:type which_parts: ('all', list of bools)
|
|
||||||
:param full_cov: whether to return the full covariance matrix, or just the diagonal
|
|
||||||
|
|
||||||
.. Note:: For multiple output models only
|
|
||||||
"""
|
|
||||||
Bi, _ = dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work!
|
|
||||||
symmetrify(Bi)
|
|
||||||
Kmmi_LmiBLmi = backsub_both_sides(self._Lm, np.eye(self.num_inducing) - Bi)
|
|
||||||
|
|
||||||
if self.Cpsi1V is None:
|
|
||||||
psi1V = np.dot(self.psi1.T,self.likelihood.V)
|
|
||||||
tmp, _ = dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0)
|
|
||||||
tmp, _ = dpotrs(self.LB, tmp, lower=1)
|
|
||||||
self.Cpsi1V, _ = dtrtrs(self._Lm, tmp, lower=1, trans=1)
|
|
||||||
|
|
||||||
assert hasattr(self,'multioutput')
|
|
||||||
index = np.ones_like(_Xnew)*output
|
|
||||||
_Xnew = np.hstack((_Xnew,index))
|
|
||||||
|
|
||||||
if X_variance_new is None:
|
|
||||||
Kx = self.kern.K(self.Z, _Xnew, which_parts=which_parts)
|
|
||||||
mu = np.dot(Kx.T, self.Cpsi1V)
|
|
||||||
if full_cov:
|
|
||||||
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
|
|
||||||
var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting
|
|
||||||
else:
|
|
||||||
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
|
|
||||||
var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
|
|
||||||
else:
|
|
||||||
Kx = self.kern.psi1(self.Z, _Xnew, X_variance_new)
|
|
||||||
mu = np.dot(Kx, self.Cpsi1V)
|
|
||||||
if full_cov:
|
|
||||||
raise NotImplementedError, "TODO"
|
|
||||||
else:
|
|
||||||
Kxx = self.kern.psi0(self.Z, _Xnew, X_variance_new)
|
|
||||||
psi2 = self.kern.psi2(self.Z, _Xnew, X_variance_new)
|
|
||||||
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
|
|
||||||
|
|
||||||
return mu, var[:, None]
|
|
||||||
|
|
||||||
|
|
||||||
def plot_single_output_f(self, output=None, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
|
|
||||||
|
|
||||||
if ax is None:
|
|
||||||
fig = pb.figure(num=fignum)
|
|
||||||
ax = fig.add_subplot(111)
|
|
||||||
if fignum is None and ax is None:
|
|
||||||
fignum = fig.num
|
|
||||||
if which_data is 'all':
|
|
||||||
which_data = slice(None)
|
|
||||||
|
|
||||||
GPBase.plot_single_output_f(self, output=output, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, full_cov=full_cov, fignum=fignum, ax=ax)
|
|
||||||
|
|
||||||
if self.X.shape[1] == 2:
|
|
||||||
if self.has_uncertain_inputs:
|
|
||||||
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
|
|
||||||
ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
|
|
||||||
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
|
|
||||||
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
|
|
||||||
Zu = self.Z * self._Xscale + self._Xoffset
|
|
||||||
Zu = Zu[Zu[:,1]==output,0:1]
|
|
||||||
ax.plot(Zu[:,0], np.zeros_like(Zu[:,0]) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
|
|
||||||
|
|
||||||
elif self.X.shape[1] == 2:
|
|
||||||
Zu = self.Z * self._Xscale + self._Xoffset
|
|
||||||
Zu = Zu[Zu[:,1]==output,0:2]
|
|
||||||
ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
|
|
||||||
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
|
||||||
|
|
||||||
def plot_single_output(self, output=None, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None):
|
|
||||||
if ax is None:
|
|
||||||
fig = pb.figure(num=fignum)
|
|
||||||
ax = fig.add_subplot(111)
|
|
||||||
if fignum is None and ax is None:
|
|
||||||
fignum = fig.num
|
|
||||||
if which_data is 'all':
|
|
||||||
which_data = slice(None)
|
|
||||||
|
|
||||||
GPBase.plot_single_output(self, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, levels=20, fignum=fignum, ax=ax, output=output)
|
|
||||||
|
|
||||||
if self.X.shape[1] == 2:
|
|
||||||
if self.has_uncertain_inputs:
|
|
||||||
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
|
|
||||||
ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
|
|
||||||
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
|
|
||||||
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
|
|
||||||
Zu = self.Z * self._Xscale + self._Xoffset
|
|
||||||
Zu = Zu[Zu[:,1]==output,0:1]
|
|
||||||
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
|
|
||||||
|
|
||||||
elif self.X.shape[1] == 3:
|
|
||||||
Zu = self.Z * self._Xscale + self._Xoffset
|
|
||||||
Zu = Zu[Zu[:,1]==output,0:1]
|
|
||||||
ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
|
||||||
|
|
|
||||||
|
|
@ -18,30 +18,16 @@ class SVIGP(GPBase):
|
||||||
Stochastic Variational inference in a Gaussian Process
|
Stochastic Variational inference in a Gaussian Process
|
||||||
|
|
||||||
:param X: inputs
|
:param X: inputs
|
||||||
:type X: np.ndarray (N x Q)
|
:type X: np.ndarray (num_data x num_inputs)
|
||||||
:param Y: observed data
|
:param Y: observed data
|
||||||
:type Y: np.ndarray of observations (N x D)
|
:type Y: np.ndarray of observations (num_data x output_dim)
|
||||||
:param batchsize: the size of a h
|
:param batchsize: the size of a minibatch
|
||||||
|
|
||||||
Additional kwargs are used as for a sparse GP. They include:
|
|
||||||
|
|
||||||
:param q_u: canonical parameters of the distribution squasehd into a 1D array
|
:param q_u: canonical parameters of the distribution squasehd into a 1D array
|
||||||
:type q_u: np.ndarray
|
:type q_u: np.ndarray
|
||||||
:param M: Number of inducing points (optional, default 10. Ignored if Z is not None)
|
|
||||||
:type M: int
|
|
||||||
:param kernel: the kernel/covariance function. See link kernels
|
:param kernel: the kernel/covariance function. See link kernels
|
||||||
:type kernel: a GPy kernel
|
:type kernel: a GPy kernel
|
||||||
:param Z: inducing inputs (optional, see note)
|
:param Z: inducing inputs
|
||||||
:type Z: np.ndarray (M x Q) | None
|
:type Z: np.ndarray (num_inducing x num_inputs)
|
||||||
:param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
|
|
||||||
:type X_uncertainty: np.ndarray (N x Q) | None
|
|
||||||
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
|
|
||||||
:param M: Number of inducing points (optional, default 10. Ignored if Z is not None)
|
|
||||||
:type M: int
|
|
||||||
:param beta: noise precision. TODO: ignore beta if doing EP
|
|
||||||
:type beta: float
|
|
||||||
:param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
|
|
||||||
:type normalize_(X|Y): bool
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
@ -350,8 +336,8 @@ class SVIGP(GPBase):
|
||||||
|
|
||||||
#callback
|
#callback
|
||||||
if i and not i%callback_interval:
|
if i and not i%callback_interval:
|
||||||
callback()
|
callback(self) # Change this to callback()
|
||||||
time.sleep(0.1)
|
time.sleep(0.01)
|
||||||
|
|
||||||
if self.epochs > 10:
|
if self.epochs > 10:
|
||||||
self._adapt_steplength()
|
self._adapt_steplength()
|
||||||
|
|
@ -367,13 +353,13 @@ class SVIGP(GPBase):
|
||||||
assert self.vb_steplength > 0
|
assert self.vb_steplength > 0
|
||||||
|
|
||||||
if self.adapt_param_steplength:
|
if self.adapt_param_steplength:
|
||||||
# self._adaptive_param_steplength()
|
self._adaptive_param_steplength()
|
||||||
# self._adaptive_param_steplength_log()
|
# self._adaptive_param_steplength_log()
|
||||||
self._adaptive_param_steplength_from_vb()
|
# self._adaptive_param_steplength_from_vb()
|
||||||
self._param_steplength_trace.append(self.param_steplength)
|
self._param_steplength_trace.append(self.param_steplength)
|
||||||
|
|
||||||
def _adaptive_param_steplength(self):
|
def _adaptive_param_steplength(self):
|
||||||
decr_factor = 0.1
|
decr_factor = 0.02
|
||||||
g_tp = self._transform_gradients(self._log_likelihood_gradients())
|
g_tp = self._transform_gradients(self._log_likelihood_gradients())
|
||||||
self.gbar_tp = (1-1/self.tau_tp)*self.gbar_tp + 1/self.tau_tp * g_tp
|
self.gbar_tp = (1-1/self.tau_tp)*self.gbar_tp + 1/self.tau_tp * g_tp
|
||||||
self.hbar_tp = (1-1/self.tau_tp)*self.hbar_tp + 1/self.tau_tp * np.dot(g_tp.T, g_tp)
|
self.hbar_tp = (1-1/self.tau_tp)*self.hbar_tp + 1/self.tau_tp * np.dot(g_tp.T, g_tp)
|
||||||
|
|
@ -407,7 +393,7 @@ class SVIGP(GPBase):
|
||||||
self.tau_t = self.tau_t*(1-self.vb_steplength) + 1
|
self.tau_t = self.tau_t*(1-self.vb_steplength) + 1
|
||||||
|
|
||||||
def _adaptive_vb_steplength_KL(self):
|
def _adaptive_vb_steplength_KL(self):
|
||||||
decr_factor = 1 #0.1
|
decr_factor = 0.1
|
||||||
natgrad = self.vb_grad_natgrad()
|
natgrad = self.vb_grad_natgrad()
|
||||||
g_t1 = natgrad[0]
|
g_t1 = natgrad[0]
|
||||||
g_t2 = natgrad[1]
|
g_t2 = natgrad[1]
|
||||||
|
|
|
||||||
19
GPy/core/variational.py
Normal file
19
GPy/core/variational.py
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
'''
|
||||||
|
Created on 6 Nov 2013
|
||||||
|
|
||||||
|
@author: maxz
|
||||||
|
'''
|
||||||
|
from parameterized import Parameterized
|
||||||
|
from parameter import Param
|
||||||
|
|
||||||
|
class Normal(Parameterized):
|
||||||
|
'''
|
||||||
|
Normal distribution for variational approximations.
|
||||||
|
|
||||||
|
holds the means and variances for a factorizing multivariate normal distribution
|
||||||
|
'''
|
||||||
|
def __init__(self, name, means, variances):
|
||||||
|
Parameterized.__init__(self, name=name)
|
||||||
|
self.means = Param("mean", means)
|
||||||
|
self.variances = Param('variance', variances)
|
||||||
|
self.add_parameters(self.means, self.variances)
|
||||||
|
|
@ -43,7 +43,7 @@ def oil(num_inducing=50, max_iters=100, kernel=None):
|
||||||
|
|
||||||
def toy_linear_1d_classification(seed=default_seed):
|
def toy_linear_1d_classification(seed=default_seed):
|
||||||
"""
|
"""
|
||||||
Simple 1D classification example
|
Simple 1D classification example using EP approximation
|
||||||
|
|
||||||
:param seed: seed value for data generation (default is 4).
|
:param seed: seed value for data generation (default is 4).
|
||||||
:type seed: int
|
:type seed: int
|
||||||
|
|
@ -61,6 +61,7 @@ def toy_linear_1d_classification(seed=default_seed):
|
||||||
#m.update_likelihood_approximation()
|
#m.update_likelihood_approximation()
|
||||||
# Parameters optimization:
|
# Parameters optimization:
|
||||||
#m.optimize()
|
#m.optimize()
|
||||||
|
#m.update_likelihood_approximation()
|
||||||
m.pseudo_EM()
|
m.pseudo_EM()
|
||||||
|
|
||||||
# Plot
|
# Plot
|
||||||
|
|
@ -71,6 +72,41 @@ def toy_linear_1d_classification(seed=default_seed):
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
||||||
|
def toy_linear_1d_classification_laplace(seed=default_seed):
|
||||||
|
"""
|
||||||
|
Simple 1D classification example using Laplace approximation
|
||||||
|
|
||||||
|
:param seed: seed value for data generation (default is 4).
|
||||||
|
:type seed: int
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
|
||||||
|
Y = data['Y'][:, 0:1]
|
||||||
|
Y[Y.flatten() == -1] = 0
|
||||||
|
|
||||||
|
bern_noise_model = GPy.likelihoods.bernoulli()
|
||||||
|
laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), bern_noise_model)
|
||||||
|
|
||||||
|
# Model definition
|
||||||
|
m = GPy.models.GPClassification(data['X'], Y, likelihood=laplace_likelihood)
|
||||||
|
|
||||||
|
print m
|
||||||
|
# Optimize
|
||||||
|
#m.update_likelihood_approximation()
|
||||||
|
# Parameters optimization:
|
||||||
|
m.optimize('bfgs', messages=1)
|
||||||
|
#m.pseudo_EM()
|
||||||
|
|
||||||
|
# Plot
|
||||||
|
fig, axes = pb.subplots(2,1)
|
||||||
|
m.plot_f(ax=axes[0])
|
||||||
|
m.plot(ax=axes[1])
|
||||||
|
print(m)
|
||||||
|
|
||||||
|
return m
|
||||||
|
|
||||||
|
|
||||||
def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed):
|
def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed):
|
||||||
"""
|
"""
|
||||||
Sparse 1D classification example
|
Sparse 1D classification example
|
||||||
|
|
@ -116,7 +152,7 @@ def toy_heaviside(seed=default_seed):
|
||||||
Y[Y.flatten() == -1] = 0
|
Y[Y.flatten() == -1] = 0
|
||||||
|
|
||||||
# Model definition
|
# Model definition
|
||||||
noise_model = GPy.likelihoods.binomial(GPy.likelihoods.noise_models.gp_transformations.Heaviside())
|
noise_model = GPy.likelihoods.bernoulli(GPy.likelihoods.noise_models.gp_transformations.Heaviside())
|
||||||
likelihood = GPy.likelihoods.EP(Y,noise_model)
|
likelihood = GPy.likelihoods.EP(Y,noise_model)
|
||||||
m = GPy.models.GPClassification(data['X'], likelihood=likelihood)
|
m = GPy.models.GPClassification(data['X'], likelihood=likelihood)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,10 +12,10 @@ from GPy.likelihoods.gaussian import Gaussian
|
||||||
default_seed = np.random.seed(123344)
|
default_seed = np.random.seed(123344)
|
||||||
|
|
||||||
def BGPLVM(seed=default_seed):
|
def BGPLVM(seed=default_seed):
|
||||||
N = 5
|
N = 13
|
||||||
num_inducing = 4
|
num_inducing = 5
|
||||||
Q = 3
|
Q = 6
|
||||||
D = 2
|
D = 25
|
||||||
# generate GPLVM-like data
|
# generate GPLVM-like data
|
||||||
X = np.random.rand(N, Q)
|
X = np.random.rand(N, Q)
|
||||||
lengthscales = np.random.rand(Q)
|
lengthscales = np.random.rand(Q)
|
||||||
|
|
@ -25,9 +25,12 @@ def BGPLVM(seed=default_seed):
|
||||||
Y = np.random.multivariate_normal(np.zeros(N), K, D).T
|
Y = np.random.multivariate_normal(np.zeros(N), K, D).T
|
||||||
lik = Gaussian(Y, normalize=True)
|
lik = Gaussian(Y, normalize=True)
|
||||||
|
|
||||||
k = GPy.kern.rbf_inv(Q, .5, np.ones(Q) * 2., ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
|
# k = GPy.kern.rbf_inv(Q, .5, np.ones(Q) * 2., ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
|
||||||
# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
|
# k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
|
||||||
# k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001)
|
# k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001)
|
||||||
|
# k = GPy.kern.rbf(Q, .5, np.ones(Q) * 2., ARD=True) + GPy.kern.rbf(Q, .3, np.ones(Q) * .2, ARD=True)
|
||||||
|
k = GPy.kern.rbf(Q, .5, np.ones(Q) * 2., ARD=True) + GPy.kern.linear(Q, np.ones(Q) * .2, ARD=True)
|
||||||
|
# k = GPy.kern.rbf(Q, .5, 2., ARD=0) + GPy.kern.rbf(Q, .3, .2, ARD=0)
|
||||||
|
|
||||||
m = GPy.models.BayesianGPLVM(lik, Q, kernel=k, num_inducing=num_inducing)
|
m = GPy.models.BayesianGPLVM(lik, Q, kernel=k, num_inducing=num_inducing)
|
||||||
m.lengthscales = lengthscales
|
m.lengthscales = lengthscales
|
||||||
|
|
@ -331,27 +334,46 @@ def brendan_faces():
|
||||||
from GPy import kern
|
from GPy import kern
|
||||||
data = GPy.util.datasets.brendan_faces()
|
data = GPy.util.datasets.brendan_faces()
|
||||||
Q = 2
|
Q = 2
|
||||||
Y = data['Y'][0:-1:10, :]
|
Y = data['Y']
|
||||||
# Y = data['Y']
|
|
||||||
Yn = Y - Y.mean()
|
Yn = Y - Y.mean()
|
||||||
Yn /= Yn.std()
|
Yn /= Yn.std()
|
||||||
|
|
||||||
m = GPy.models.GPLVM(Yn, Q)
|
m = GPy.models.GPLVM(Yn, Q)
|
||||||
# m = GPy.models.BayesianGPLVM(Yn, Q, num_inducing=100)
|
|
||||||
|
|
||||||
# optimize
|
# optimize
|
||||||
m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped())
|
m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped())
|
||||||
|
|
||||||
m.optimize('scg', messages=1, max_f_eval=10000)
|
m.optimize('scg', messages=1, max_iters=1000)
|
||||||
|
|
||||||
ax = m.plot_latent(which_indices=(0, 1))
|
ax = m.plot_latent(which_indices=(0, 1))
|
||||||
y = m.likelihood.Y[0, :]
|
y = m.likelihood.Y[0, :]
|
||||||
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
|
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False)
|
||||||
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
||||||
|
def olivetti_faces():
|
||||||
|
from GPy import kern
|
||||||
|
data = GPy.util.datasets.olivetti_faces()
|
||||||
|
Q = 2
|
||||||
|
Y = data['Y']
|
||||||
|
Yn = Y - Y.mean()
|
||||||
|
Yn /= Yn.std()
|
||||||
|
|
||||||
|
m = GPy.models.GPLVM(Yn, Q)
|
||||||
|
m.optimize('scg', messages=1, max_iters=1000)
|
||||||
|
|
||||||
|
ax = m.plot_latent(which_indices=(0, 1))
|
||||||
|
y = m.likelihood.Y[0, :]
|
||||||
|
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False)
|
||||||
|
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||||
|
raw_input('Press enter to finish')
|
||||||
|
|
||||||
|
return m
|
||||||
|
|
||||||
def stick_play(range=None, frame_rate=15):
|
def stick_play(range=None, frame_rate=15):
|
||||||
|
|
||||||
data = GPy.util.datasets.osu_run1()
|
data = GPy.util.datasets.osu_run1()
|
||||||
# optimize
|
# optimize
|
||||||
if range == None:
|
if range == None:
|
||||||
|
|
|
||||||
296
GPy/examples/laplace_approximations.py
Normal file
296
GPy/examples/laplace_approximations.py
Normal file
|
|
@ -0,0 +1,296 @@
|
||||||
|
import GPy
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from GPy.util import datasets
|
||||||
|
np.random.seed(1)
|
||||||
|
|
||||||
|
def student_t_approx():
|
||||||
|
"""
|
||||||
|
Example of regressing with a student t likelihood
|
||||||
|
"""
|
||||||
|
real_std = 0.1
|
||||||
|
#Start a function, any function
|
||||||
|
X = np.linspace(0.0, np.pi*2, 100)[:, None]
|
||||||
|
Y = np.sin(X) + np.random.randn(*X.shape)*real_std
|
||||||
|
Yc = Y.copy()
|
||||||
|
|
||||||
|
X_full = np.linspace(0.0, np.pi*2, 500)[:, None]
|
||||||
|
Y_full = np.sin(X_full)
|
||||||
|
|
||||||
|
Y = Y/Y.max()
|
||||||
|
|
||||||
|
#Slightly noisy data
|
||||||
|
Yc[75:80] += 1
|
||||||
|
|
||||||
|
#Very noisy data
|
||||||
|
#Yc[10] += 100
|
||||||
|
#Yc[25] += 10
|
||||||
|
#Yc[23] += 10
|
||||||
|
#Yc[26] += 1000
|
||||||
|
#Yc[24] += 10
|
||||||
|
#Yc = Yc/Yc.max()
|
||||||
|
|
||||||
|
#Add student t random noise to datapoints
|
||||||
|
deg_free = 5
|
||||||
|
print "Real noise: ", real_std
|
||||||
|
initial_var_guess = 0.5
|
||||||
|
|
||||||
|
#t_rv = t(deg_free, loc=0, scale=real_var)
|
||||||
|
#noise = t_rvrvs(size=Y.shape)
|
||||||
|
#Y += noise
|
||||||
|
|
||||||
|
plt.figure(1)
|
||||||
|
plt.suptitle('Gaussian likelihood')
|
||||||
|
# Kernel object
|
||||||
|
kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||||
|
kernel2 = kernel1.copy()
|
||||||
|
kernel3 = kernel1.copy()
|
||||||
|
kernel4 = kernel1.copy()
|
||||||
|
kernel5 = kernel1.copy()
|
||||||
|
kernel6 = kernel1.copy()
|
||||||
|
|
||||||
|
print "Clean Gaussian"
|
||||||
|
#A GP should completely break down due to the points as they get a lot of weight
|
||||||
|
# create simple GP model
|
||||||
|
m = GPy.models.GPRegression(X, Y, kernel=kernel1)
|
||||||
|
# optimize
|
||||||
|
m.ensure_default_constraints()
|
||||||
|
m.constrain_fixed('white', 1e-4)
|
||||||
|
m.randomize()
|
||||||
|
m.optimize()
|
||||||
|
# plot
|
||||||
|
ax = plt.subplot(211)
|
||||||
|
m.plot(ax=ax)
|
||||||
|
plt.plot(X_full, Y_full)
|
||||||
|
plt.ylim(-1.5, 1.5)
|
||||||
|
plt.title('Gaussian clean')
|
||||||
|
print m
|
||||||
|
|
||||||
|
#Corrupt
|
||||||
|
print "Corrupt Gaussian"
|
||||||
|
m = GPy.models.GPRegression(X, Yc, kernel=kernel2)
|
||||||
|
m.ensure_default_constraints()
|
||||||
|
m.constrain_fixed('white', 1e-4)
|
||||||
|
m.randomize()
|
||||||
|
m.optimize()
|
||||||
|
ax = plt.subplot(212)
|
||||||
|
m.plot(ax=ax)
|
||||||
|
plt.plot(X_full, Y_full)
|
||||||
|
plt.ylim(-1.5, 1.5)
|
||||||
|
plt.title('Gaussian corrupt')
|
||||||
|
print m
|
||||||
|
|
||||||
|
plt.figure(2)
|
||||||
|
plt.suptitle('Student-t likelihood')
|
||||||
|
edited_real_sd = initial_var_guess
|
||||||
|
|
||||||
|
print "Clean student t, rasm"
|
||||||
|
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
|
||||||
|
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution)
|
||||||
|
m = GPy.models.GPRegression(X, Y.copy(), kernel6, likelihood=stu_t_likelihood)
|
||||||
|
m.ensure_default_constraints()
|
||||||
|
m.constrain_positive('t_noise')
|
||||||
|
m.constrain_fixed('white', 1e-4)
|
||||||
|
m.randomize()
|
||||||
|
#m.update_likelihood_approximation()
|
||||||
|
m.optimize()
|
||||||
|
print(m)
|
||||||
|
ax = plt.subplot(211)
|
||||||
|
m.plot(ax=ax)
|
||||||
|
plt.plot(X_full, Y_full)
|
||||||
|
plt.ylim(-1.5, 1.5)
|
||||||
|
plt.title('Student-t rasm clean')
|
||||||
|
|
||||||
|
print "Corrupt student t, rasm"
|
||||||
|
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
|
||||||
|
corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution)
|
||||||
|
m = GPy.models.GPRegression(X, Yc.copy(), kernel4, likelihood=corrupt_stu_t_likelihood)
|
||||||
|
m.ensure_default_constraints()
|
||||||
|
m.constrain_positive('t_noise')
|
||||||
|
m.constrain_fixed('white', 1e-4)
|
||||||
|
m.randomize()
|
||||||
|
for a in range(1):
|
||||||
|
m.randomize()
|
||||||
|
m_start = m.copy()
|
||||||
|
print m
|
||||||
|
m.optimize('scg', messages=1)
|
||||||
|
print(m)
|
||||||
|
ax = plt.subplot(212)
|
||||||
|
m.plot(ax=ax)
|
||||||
|
plt.plot(X_full, Y_full)
|
||||||
|
plt.ylim(-1.5, 1.5)
|
||||||
|
plt.title('Student-t rasm corrupt')
|
||||||
|
|
||||||
|
return m
|
||||||
|
|
||||||
|
def boston_example():
|
||||||
|
import sklearn
|
||||||
|
from sklearn.cross_validation import KFold
|
||||||
|
optimizer='bfgs'
|
||||||
|
messages=0
|
||||||
|
data = datasets.boston_housing()
|
||||||
|
degrees_freedoms = [3, 5, 8, 10]
|
||||||
|
X = data['X'].copy()
|
||||||
|
Y = data['Y'].copy()
|
||||||
|
X = X-X.mean(axis=0)
|
||||||
|
X = X/X.std(axis=0)
|
||||||
|
Y = Y-Y.mean()
|
||||||
|
Y = Y/Y.std()
|
||||||
|
num_folds = 10
|
||||||
|
kf = KFold(len(Y), n_folds=num_folds, indices=True)
|
||||||
|
num_models = len(degrees_freedoms) + 3 #3 for baseline, gaussian, gaussian laplace approx
|
||||||
|
score_folds = np.zeros((num_models, num_folds))
|
||||||
|
pred_density = score_folds.copy()
|
||||||
|
|
||||||
|
def rmse(Y, Ystar):
|
||||||
|
return np.sqrt(np.mean((Y-Ystar)**2))
|
||||||
|
|
||||||
|
for n, (train, test) in enumerate(kf):
|
||||||
|
X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
|
||||||
|
print "Fold {}".format(n)
|
||||||
|
|
||||||
|
noise = 1e-1 #np.exp(-2)
|
||||||
|
rbf_len = 0.5
|
||||||
|
data_axis_plot = 4
|
||||||
|
plot = False
|
||||||
|
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
|
||||||
|
kernelgp = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
|
||||||
|
|
||||||
|
#Baseline
|
||||||
|
score_folds[0, n] = rmse(Y_test, np.mean(Y_train))
|
||||||
|
|
||||||
|
#Gaussian GP
|
||||||
|
print "Gauss GP"
|
||||||
|
mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy())
|
||||||
|
mgp.ensure_default_constraints()
|
||||||
|
mgp.constrain_fixed('white', 1e-5)
|
||||||
|
mgp['rbf_len'] = rbf_len
|
||||||
|
mgp['noise'] = noise
|
||||||
|
print mgp
|
||||||
|
mgp.optimize(optimizer=optimizer, messages=messages)
|
||||||
|
Y_test_pred = mgp.predict(X_test)
|
||||||
|
score_folds[1, n] = rmse(Y_test, Y_test_pred[0])
|
||||||
|
pred_density[1, n] = np.mean(mgp.log_predictive_density(X_test, Y_test))
|
||||||
|
print mgp
|
||||||
|
print pred_density
|
||||||
|
if plot:
|
||||||
|
plt.figure()
|
||||||
|
plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0])
|
||||||
|
plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
|
||||||
|
plt.title('GP gauss')
|
||||||
|
|
||||||
|
print "Gaussian Laplace GP"
|
||||||
|
N, D = Y_train.shape
|
||||||
|
g_distribution = GPy.likelihoods.noise_model_constructors.gaussian(variance=noise, N=N, D=D)
|
||||||
|
g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution)
|
||||||
|
mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=g_likelihood)
|
||||||
|
mg.ensure_default_constraints()
|
||||||
|
mg.constrain_positive('noise_variance')
|
||||||
|
mg.constrain_fixed('white', 1e-5)
|
||||||
|
mg['rbf_len'] = rbf_len
|
||||||
|
mg['noise'] = noise
|
||||||
|
print mg
|
||||||
|
try:
|
||||||
|
mg.optimize(optimizer=optimizer, messages=messages)
|
||||||
|
except Exception:
|
||||||
|
print "Blew up"
|
||||||
|
Y_test_pred = mg.predict(X_test)
|
||||||
|
score_folds[2, n] = rmse(Y_test, Y_test_pred[0])
|
||||||
|
pred_density[2, n] = np.mean(mg.log_predictive_density(X_test, Y_test))
|
||||||
|
print pred_density
|
||||||
|
print mg
|
||||||
|
if plot:
|
||||||
|
plt.figure()
|
||||||
|
plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0])
|
||||||
|
plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
|
||||||
|
plt.title('Lap gauss')
|
||||||
|
|
||||||
|
for stu_num, df in enumerate(degrees_freedoms):
|
||||||
|
#Student T
|
||||||
|
print "Student-T GP {}df".format(df)
|
||||||
|
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=df, sigma2=noise)
|
||||||
|
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
|
||||||
|
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
|
||||||
|
mstu_t.ensure_default_constraints()
|
||||||
|
mstu_t.constrain_fixed('white', 1e-5)
|
||||||
|
mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
|
||||||
|
mstu_t['rbf_len'] = rbf_len
|
||||||
|
mstu_t['t_noise'] = noise
|
||||||
|
print mstu_t
|
||||||
|
try:
|
||||||
|
mstu_t.optimize(optimizer=optimizer, messages=messages)
|
||||||
|
except Exception:
|
||||||
|
print "Blew up"
|
||||||
|
Y_test_pred = mstu_t.predict(X_test)
|
||||||
|
score_folds[3+stu_num, n] = rmse(Y_test, Y_test_pred[0])
|
||||||
|
pred_density[3+stu_num, n] = np.mean(mstu_t.log_predictive_density(X_test, Y_test))
|
||||||
|
print pred_density
|
||||||
|
print mstu_t
|
||||||
|
if plot:
|
||||||
|
plt.figure()
|
||||||
|
plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0])
|
||||||
|
plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
|
||||||
|
plt.title('Stu t {}df'.format(df))
|
||||||
|
|
||||||
|
print "Average scores: {}".format(np.mean(score_folds, 1))
|
||||||
|
print "Average pred density: {}".format(np.mean(pred_density, 1))
|
||||||
|
|
||||||
|
#Plotting
|
||||||
|
stu_t_legends = ['Student T, df={}'.format(df) for df in degrees_freedoms]
|
||||||
|
legends = ['Baseline', 'Gaussian', 'Laplace Approx Gaussian'] + stu_t_legends
|
||||||
|
|
||||||
|
#Plot boxplots for RMSE density
|
||||||
|
fig = plt.figure()
|
||||||
|
ax=fig.add_subplot(111)
|
||||||
|
plt.title('RMSE')
|
||||||
|
bp = ax.boxplot(score_folds.T, notch=0, sym='+', vert=1, whis=1.5)
|
||||||
|
plt.setp(bp['boxes'], color='black')
|
||||||
|
plt.setp(bp['whiskers'], color='black')
|
||||||
|
plt.setp(bp['fliers'], color='red', marker='+')
|
||||||
|
xtickNames = plt.setp(ax, xticklabels=legends)
|
||||||
|
plt.setp(xtickNames, rotation=45, fontsize=8)
|
||||||
|
ax.set_ylabel('RMSE')
|
||||||
|
ax.set_xlabel('Distribution')
|
||||||
|
#Make grid and put it below boxes
|
||||||
|
ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
|
||||||
|
alpha=0.5)
|
||||||
|
ax.set_axisbelow(True)
|
||||||
|
|
||||||
|
#Plot boxplots for predictive density
|
||||||
|
fig = plt.figure()
|
||||||
|
ax=fig.add_subplot(111)
|
||||||
|
plt.title('Predictive density')
|
||||||
|
bp = ax.boxplot(pred_density[1:,:].T, notch=0, sym='+', vert=1, whis=1.5)
|
||||||
|
plt.setp(bp['boxes'], color='black')
|
||||||
|
plt.setp(bp['whiskers'], color='black')
|
||||||
|
plt.setp(bp['fliers'], color='red', marker='+')
|
||||||
|
xtickNames = plt.setp(ax, xticklabels=legends[1:])
|
||||||
|
plt.setp(xtickNames, rotation=45, fontsize=8)
|
||||||
|
ax.set_ylabel('Mean Log probability P(Y*|Y)')
|
||||||
|
ax.set_xlabel('Distribution')
|
||||||
|
#Make grid and put it below boxes
|
||||||
|
ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
|
||||||
|
alpha=0.5)
|
||||||
|
ax.set_axisbelow(True)
|
||||||
|
return mstu_t
|
||||||
|
|
||||||
|
def precipitation_example():
|
||||||
|
import sklearn
|
||||||
|
from sklearn.cross_validation import KFold
|
||||||
|
data = datasets.boston_housing()
|
||||||
|
X = data['X'].copy()
|
||||||
|
Y = data['Y'].copy()
|
||||||
|
X = X-X.mean(axis=0)
|
||||||
|
X = X/X.std(axis=0)
|
||||||
|
Y = Y-Y.mean()
|
||||||
|
Y = Y/Y.std()
|
||||||
|
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
|
||||||
|
num_folds = 10
|
||||||
|
kf = KFold(len(Y), n_folds=num_folds, indices=True)
|
||||||
|
score_folds = np.zeros((4, num_folds))
|
||||||
|
def rmse(Y, Ystar):
|
||||||
|
return np.sqrt(np.mean((Y-Ystar)**2))
|
||||||
|
#for train, test in kf:
|
||||||
|
for n, (train, test) in enumerate(kf):
|
||||||
|
X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
|
||||||
|
print "Fold {}".format(n)
|
||||||
|
|
@ -57,8 +57,8 @@ def coregionalization_toy(max_iters=100):
|
||||||
m.optimize(max_iters=max_iters)
|
m.optimize(max_iters=max_iters)
|
||||||
|
|
||||||
fig, axes = pb.subplots(2,1)
|
fig, axes = pb.subplots(2,1)
|
||||||
m.plot_single_output(output=0,ax=axes[0])
|
m.plot(fixed_inputs=[(1,0)],ax=axes[0])
|
||||||
m.plot_single_output(output=1,ax=axes[1])
|
m.plot(fixed_inputs=[(1,1)],ax=axes[1])
|
||||||
axes[0].set_title('Output 0')
|
axes[0].set_title('Output 0')
|
||||||
axes[1].set_title('Output 1')
|
axes[1].set_title('Output 1')
|
||||||
return m
|
return m
|
||||||
|
|
@ -270,6 +270,50 @@ def toy_rbf_1d_50(max_iters=100):
|
||||||
print(m)
|
print(m)
|
||||||
return m
|
return m
|
||||||
|
|
||||||
|
def toy_poisson_rbf_1d(optimizer='bfgs', max_nb_eval_optim=100):
|
||||||
|
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
|
||||||
|
x_len = 400
|
||||||
|
X = np.linspace(0, 10, x_len)[:, None]
|
||||||
|
f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X))
|
||||||
|
Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None]
|
||||||
|
|
||||||
|
noise_model = GPy.likelihoods.poisson()
|
||||||
|
likelihood = GPy.likelihoods.EP(Y,noise_model)
|
||||||
|
|
||||||
|
# create simple GP Model
|
||||||
|
m = GPy.models.GPRegression(X, Y, likelihood=likelihood)
|
||||||
|
|
||||||
|
# optimize
|
||||||
|
m.optimize(optimizer, max_f_eval=max_nb_eval_optim)
|
||||||
|
# plot
|
||||||
|
m.plot()
|
||||||
|
print(m)
|
||||||
|
return m
|
||||||
|
|
||||||
|
def toy_poisson_rbf_1d_laplace(optimizer='bfgs', max_nb_eval_optim=100):
|
||||||
|
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
|
||||||
|
x_len = 30
|
||||||
|
X = np.linspace(0, 10, x_len)[:, None]
|
||||||
|
f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X))
|
||||||
|
Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None]
|
||||||
|
|
||||||
|
noise_model = GPy.likelihoods.poisson()
|
||||||
|
likelihood = GPy.likelihoods.Laplace(Y,noise_model)
|
||||||
|
|
||||||
|
# create simple GP Model
|
||||||
|
m = GPy.models.GPRegression(X, Y, likelihood=likelihood)
|
||||||
|
|
||||||
|
# optimize
|
||||||
|
m.optimize(optimizer, max_f_eval=max_nb_eval_optim)
|
||||||
|
# plot
|
||||||
|
m.plot()
|
||||||
|
# plot the real underlying rate function
|
||||||
|
pb.plot(X, np.exp(f_true), '--k', linewidth=2)
|
||||||
|
print(m)
|
||||||
|
return m
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4):
|
def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4):
|
||||||
# Create an artificial dataset where the values in the targets (Y)
|
# Create an artificial dataset where the values in the targets (Y)
|
||||||
# only depend in dimensions 1 and 3 of the inputs (X). Run ARD to
|
# only depend in dimensions 1 and 3 of the inputs (X). Run ARD to
|
||||||
|
|
|
||||||
7
GPy/gpy_config.cfg
Normal file
7
GPy/gpy_config.cfg
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
# This is the configuration file for GPy
|
||||||
|
|
||||||
|
[parallel]
|
||||||
|
# Enable openmp support. This speeds up some computations, depending on the number
|
||||||
|
# of cores available. Setting up a compiler with openmp support can be difficult on
|
||||||
|
# some platforms, hence this option.
|
||||||
|
openmp=False
|
||||||
|
|
@ -62,7 +62,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
|
||||||
fnow = fold
|
fnow = fold
|
||||||
gradnew = gradf(x, *optargs) # Initial gradient.
|
gradnew = gradf(x, *optargs) # Initial gradient.
|
||||||
if any(np.isnan(gradnew)):
|
if any(np.isnan(gradnew)):
|
||||||
raise UnexpectedInfOrNan
|
raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value"
|
||||||
current_grad = np.dot(gradnew, gradnew)
|
current_grad = np.dot(gradnew, gradnew)
|
||||||
gradold = gradnew.copy()
|
gradold = gradnew.copy()
|
||||||
d = -gradnew # Initial search direction.
|
d = -gradnew # Initial search direction.
|
||||||
|
|
|
||||||
|
|
@ -298,43 +298,67 @@ if sympy_available:
|
||||||
"""
|
"""
|
||||||
Radial Basis Function covariance.
|
Radial Basis Function covariance.
|
||||||
"""
|
"""
|
||||||
X = [sp.var('x%i' % i) for i in range(input_dim)]
|
X = sp.symbols('x_:' + str(input_dim))
|
||||||
Z = [sp.var('z%i' % i) for i in range(input_dim)]
|
Z = sp.symbols('z_:' + str(input_dim))
|
||||||
variance = sp.var('variance',positive=True)
|
variance = sp.var('variance',positive=True)
|
||||||
if ARD:
|
if ARD:
|
||||||
lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)]
|
lengthscales = sp.symbols('lengthscale_:' + str(input_dim))
|
||||||
dist_string = ' + '.join(['(x%i-z%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
|
dist_string = ' + '.join(['(x_%i-z_%i)**2/lengthscale%i**2' % (i, i, i) for i in range(input_dim)])
|
||||||
dist = parse_expr(dist_string)
|
dist = parse_expr(dist_string)
|
||||||
f = variance*sp.exp(-dist/2.)
|
f = variance*sp.exp(-dist/2.)
|
||||||
else:
|
else:
|
||||||
lengthscale = sp.var('lengthscale',positive=True)
|
lengthscale = sp.var('lengthscale',positive=True)
|
||||||
dist_string = ' + '.join(['(x%i-z%i)**2' % (i, i) for i in range(input_dim)])
|
dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(input_dim)])
|
||||||
dist = parse_expr(dist_string)
|
dist = parse_expr(dist_string)
|
||||||
f = variance*sp.exp(-dist/(2*lengthscale**2))
|
f = variance*sp.exp(-dist/(2*lengthscale**2))
|
||||||
return kern(input_dim, [spkern(input_dim, f, name='rbf_sympy')])
|
return kern(input_dim, [spkern(input_dim, f, name='rbf_sympy')])
|
||||||
|
|
||||||
|
def eq_sympy(input_dim, output_dim, ARD=False, variance=1., lengthscale=1.):
|
||||||
|
"""
|
||||||
|
Exponentiated quadratic with multiple outputs.
|
||||||
|
"""
|
||||||
|
real_input_dim = input_dim
|
||||||
|
if output_dim>1:
|
||||||
|
real_input_dim -= 1
|
||||||
|
X = sp.symbols('x_:' + str(real_input_dim))
|
||||||
|
Z = sp.symbols('z_:' + str(real_input_dim))
|
||||||
|
scale = sp.var('scale_i scale_j',positive=True)
|
||||||
|
if ARD:
|
||||||
|
lengthscales = [sp.var('lengthscale%i_i lengthscale%i_j' % i, positive=True) for i in range(real_input_dim)]
|
||||||
|
shared_lengthscales = [sp.var('shared_lengthscale%i' % i, positive=True) for i in range(real_input_dim)]
|
||||||
|
dist_string = ' + '.join(['(x_%i-z_%i)**2/(shared_lengthscale%i**2 + lengthscale%i_i*lengthscale%i_j)' % (i, i, i) for i in range(real_input_dim)])
|
||||||
|
dist = parse_expr(dist_string)
|
||||||
|
f = variance*sp.exp(-dist/2.)
|
||||||
|
else:
|
||||||
|
lengthscales = sp.var('lengthscale_i lengthscale_j',positive=True)
|
||||||
|
shared_lengthscale = sp.var('shared_lengthscale',positive=True)
|
||||||
|
dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(real_input_dim)])
|
||||||
|
dist = parse_expr(dist_string)
|
||||||
|
f = scale_i*scale_j*sp.exp(-dist/(2*(lengthscale_i**2 + lengthscale_j**2 + shared_lengthscale**2)))
|
||||||
|
return kern(input_dim, [spkern(input_dim, f, output_dim=output_dim, name='eq_sympy')])
|
||||||
|
|
||||||
def sinc(input_dim, ARD=False, variance=1., lengthscale=1.):
|
def sinc(input_dim, ARD=False, variance=1., lengthscale=1.):
|
||||||
"""
|
"""
|
||||||
TODO: Not clear why this isn't working, suggests argument of sinc is not a number.
|
TODO: Not clear why this isn't working, suggests argument of sinc is not a number.
|
||||||
sinc covariance funciton
|
sinc covariance funciton
|
||||||
"""
|
"""
|
||||||
X = [sp.var('x%i' % i) for i in range(input_dim)]
|
X = sp.symbols('x_:' + str(input_dim))
|
||||||
Z = [sp.var('z%i' % i) for i in range(input_dim)]
|
Z = sp.symbols('z_:' + str(input_dim))
|
||||||
variance = sp.var('variance',positive=True)
|
variance = sp.var('variance',positive=True)
|
||||||
if ARD:
|
if ARD:
|
||||||
lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)]
|
lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)]
|
||||||
dist_string = ' + '.join(['(x%i-z%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
|
dist_string = ' + '.join(['(x_%i-z_%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
|
||||||
dist = parse_expr(dist_string)
|
dist = parse_expr(dist_string)
|
||||||
f = variance*sinc(sp.pi*sp.sqrt(dist))
|
f = variance*sinc(sp.pi*sp.sqrt(dist))
|
||||||
else:
|
else:
|
||||||
lengthscale = sp.var('lengthscale',positive=True)
|
lengthscale = sp.var('lengthscale',positive=True)
|
||||||
dist_string = ' + '.join(['(x%i-z%i)**2' % (i, i) for i in range(input_dim)])
|
dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(input_dim)])
|
||||||
dist = parse_expr(dist_string)
|
dist = parse_expr(dist_string)
|
||||||
f = variance*sinc(sp.pi*sp.sqrt(dist)/lengthscale)
|
f = variance*sinc(sp.pi*sp.sqrt(dist)/lengthscale)
|
||||||
|
|
||||||
return kern(input_dim, [spkern(input_dim, f, name='sinc')])
|
return kern(input_dim, [spkern(input_dim, f, name='sinc')])
|
||||||
|
|
||||||
def sympykern(input_dim, k,name=None):
|
def sympykern(input_dim, k=None, output_dim=1, name=None, param=None):
|
||||||
"""
|
"""
|
||||||
A base kernel object, where all the hard work in done by sympy.
|
A base kernel object, where all the hard work in done by sympy.
|
||||||
|
|
||||||
|
|
@ -349,7 +373,7 @@ if sympy_available:
|
||||||
- to handle multiple inputs, call them x1, z1, etc
|
- to handle multiple inputs, call them x1, z1, etc
|
||||||
- to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
|
- to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
|
||||||
"""
|
"""
|
||||||
return kern(input_dim, [spkern(input_dim, k,name)])
|
return kern(input_dim, [spkern(input_dim, k=k, output_dim=output_dim, name=name, param=param)])
|
||||||
del sympy_available
|
del sympy_available
|
||||||
|
|
||||||
def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
|
def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
|
||||||
|
|
|
||||||
307
GPy/kern/kern.py
307
GPy/kern/kern.py
|
|
@ -18,37 +18,37 @@ class kern(Parameterized):
|
||||||
like which parameters live where.
|
like which parameters live where.
|
||||||
|
|
||||||
The technical code for kernels is divided into _parts_ (see
|
The technical code for kernels is divided into _parts_ (see
|
||||||
e.g. rbf.py). This object contains a list of parts, which are
|
e.g. rbf.py). This object contains a list of _parameters_, which are
|
||||||
computed additively. For multiplication, special _prod_ parts
|
computed additively. For multiplication, special _prod_ _parameters_
|
||||||
are used.
|
are used.
|
||||||
|
|
||||||
:param input_dim: The dimensionality of the kernel's input space
|
:param input_dim: The dimensionality of the kernel's input space
|
||||||
:type input_dim: int
|
:type input_dim: int
|
||||||
:param parts: the 'parts' (PD functions) of the kernel
|
:param _parameters_: the '_parameters_' (PD functions) of the kernel
|
||||||
:type parts: list of Kernpart objects
|
:type _parameters_: list of Kernpart objects
|
||||||
:param input_slices: the slices on the inputs which apply to each kernel
|
:param input_slices: the slices on the inputs which apply to each kernel
|
||||||
:type input_slices: list of slice objects, or list of bools
|
:type input_slices: list of slice objects, or list of bools
|
||||||
|
|
||||||
"""
|
"""
|
||||||
self.parts = parts
|
self._parameters_ = parts
|
||||||
self.Nparts = len(parts)
|
self.num_parts = len(parts)
|
||||||
self.num_params = sum([p.num_params for p in self.parts])
|
self.num_params = sum([p.num_params for p in self._parameters_])
|
||||||
|
|
||||||
self.input_dim = input_dim
|
self.input_dim = input_dim
|
||||||
|
|
||||||
part_names = [k.name for k in self.parts]
|
part_names = [k.name for k in self._parameters_]
|
||||||
self.name=''
|
self.name=''
|
||||||
for name in part_names:
|
for name in part_names:
|
||||||
self.name += name + '+'
|
self.name += name + '+'
|
||||||
self.name = self.name[:-1]
|
self.name = self.name[:-1]
|
||||||
# deal with input_slices
|
# deal with input_slices
|
||||||
if input_slices is None:
|
if input_slices is None:
|
||||||
self.input_slices = [slice(None) for p in self.parts]
|
self.input_slices = [slice(None) for p in self._parameters_]
|
||||||
else:
|
else:
|
||||||
assert len(input_slices) == len(self.parts)
|
assert len(input_slices) == len(self._parameters_)
|
||||||
self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices]
|
self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices]
|
||||||
|
|
||||||
for p in self.parts:
|
for p in self._parameters_:
|
||||||
assert isinstance(p, Kernpart), "bad kernel part"
|
assert isinstance(p, Kernpart), "bad kernel part"
|
||||||
|
|
||||||
self.compute_param_slices()
|
self.compute_param_slices()
|
||||||
|
|
@ -60,8 +60,8 @@ class kern(Parameterized):
|
||||||
Get the current state of the class,
|
Get the current state of the class,
|
||||||
here just all the indices, rest can get recomputed
|
here just all the indices, rest can get recomputed
|
||||||
"""
|
"""
|
||||||
return Parameterized.getstate(self) + [self.parts,
|
return Parameterized.getstate(self) + [self._parameters_,
|
||||||
self.Nparts,
|
self.num_parts,
|
||||||
self.num_params,
|
self.num_params,
|
||||||
self.input_dim,
|
self.input_dim,
|
||||||
self.input_slices,
|
self.input_slices,
|
||||||
|
|
@ -73,13 +73,13 @@ class kern(Parameterized):
|
||||||
self.input_slices = state.pop()
|
self.input_slices = state.pop()
|
||||||
self.input_dim = state.pop()
|
self.input_dim = state.pop()
|
||||||
self.num_params = state.pop()
|
self.num_params = state.pop()
|
||||||
self.Nparts = state.pop()
|
self.num_parts = state.pop()
|
||||||
self.parts = state.pop()
|
self._parameters_ = state.pop()
|
||||||
Parameterized.setstate(self, state)
|
Parameterized.setstate(self, state)
|
||||||
|
|
||||||
|
|
||||||
def plot_ARD(self, fignum=None, ax=None, title='', legend=False):
|
def plot_ARD(self, fignum=None, ax=None, title='', legend=False):
|
||||||
"""If an ARD kernel is present, it bar-plots the ARD parameters.
|
"""If an ARD kernel is present, plot a bar representation using matplotlib
|
||||||
|
|
||||||
:param fignum: figure number of the plot
|
:param fignum: figure number of the plot
|
||||||
:param ax: matplotlib axis to plot on
|
:param ax: matplotlib axis to plot on
|
||||||
|
|
@ -87,7 +87,6 @@ class kern(Parameterized):
|
||||||
title of the plot,
|
title of the plot,
|
||||||
pass '' to not print a title
|
pass '' to not print a title
|
||||||
pass None for a generic title
|
pass None for a generic title
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if ax is None:
|
if ax is None:
|
||||||
fig = pb.figure(fignum)
|
fig = pb.figure(fignum)
|
||||||
|
|
@ -100,7 +99,7 @@ class kern(Parameterized):
|
||||||
xticklabels = []
|
xticklabels = []
|
||||||
bars = []
|
bars = []
|
||||||
x0 = 0
|
x0 = 0
|
||||||
for p in self.parts:
|
for p in self._parameters_:
|
||||||
c = Tango.nextMedium()
|
c = Tango.nextMedium()
|
||||||
if hasattr(p, 'ARD') and p.ARD:
|
if hasattr(p, 'ARD') and p.ARD:
|
||||||
if title is None:
|
if title is None:
|
||||||
|
|
@ -152,6 +151,13 @@ class kern(Parameterized):
|
||||||
return ax
|
return ax
|
||||||
|
|
||||||
def _transform_gradients(self, g):
|
def _transform_gradients(self, g):
|
||||||
|
"""
|
||||||
|
Apply the transformations of the kernel so that the returned vector
|
||||||
|
represents the gradient in the transformed space (i.e. that given by
|
||||||
|
get_params_transformed())
|
||||||
|
|
||||||
|
:param g: the gradient vector for the current model, usually created by dK_dtheta
|
||||||
|
"""
|
||||||
x = self._get_params()
|
x = self._get_params()
|
||||||
[np.put(x, i, x * t.gradfactor(x[i])) for i, t in zip(self.constrained_indices, self.constraints)]
|
[np.put(x, i, x * t.gradfactor(x[i])) for i, t in zip(self.constrained_indices, self.constraints)]
|
||||||
[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
|
[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
|
||||||
|
|
@ -162,22 +168,29 @@ class kern(Parameterized):
|
||||||
return g
|
return g
|
||||||
|
|
||||||
def compute_param_slices(self):
|
def compute_param_slices(self):
|
||||||
"""create a set of slices that can index the parameters of each part."""
|
"""
|
||||||
|
Create a set of slices that can index the parameters of each part.
|
||||||
|
"""
|
||||||
self.param_slices = []
|
self.param_slices = []
|
||||||
count = 0
|
count = 0
|
||||||
for p in self.parts:
|
for p in self._parameters_:
|
||||||
self.param_slices.append(slice(count, count + p.num_params))
|
self.param_slices.append(slice(count, count + p.num_params))
|
||||||
count += p.num_params
|
count += p.num_params
|
||||||
|
|
||||||
def __add__(self, other):
|
def __add__(self, other):
|
||||||
"""
|
""" Overloading of the '+' operator. for more control, see self.add """
|
||||||
Shortcut for `add`.
|
|
||||||
"""
|
|
||||||
return self.add(other)
|
return self.add(other)
|
||||||
|
|
||||||
def add(self, other, tensor=False):
|
def add(self, other, tensor=False):
|
||||||
"""
|
"""
|
||||||
Add another kernel to this one. Both kernels are defined on the same _space_
|
Add another kernel to this one.
|
||||||
|
|
||||||
|
If Tensor is False, both kernels are defined on the same _space_. then
|
||||||
|
the created kernel will have the same number of inputs as self and
|
||||||
|
other (which must be the same).
|
||||||
|
|
||||||
|
If Tensor is True, then the dimensions are stacked 'horizontally', so
|
||||||
|
that the resulting kernel has self.input_dim + other.input_dim
|
||||||
|
|
||||||
:param other: the other kernel to be added
|
:param other: the other kernel to be added
|
||||||
:type other: GPy.kern
|
:type other: GPy.kern
|
||||||
|
|
@ -189,7 +202,7 @@ class kern(Parameterized):
|
||||||
other_input_indices = [sl.indices(other.input_dim) for sl in other.input_slices]
|
other_input_indices = [sl.indices(other.input_dim) for sl in other.input_slices]
|
||||||
other_input_slices = [slice(i[0] + self.input_dim, i[1] + self.input_dim, i[2]) for i in other_input_indices]
|
other_input_slices = [slice(i[0] + self.input_dim, i[1] + self.input_dim, i[2]) for i in other_input_indices]
|
||||||
|
|
||||||
newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
|
newkern = kern(D, self._parameters_ + other._parameters_, self_input_slices + other_input_slices)
|
||||||
|
|
||||||
# transfer constraints:
|
# transfer constraints:
|
||||||
newkern.constrained_indices = self.constrained_indices + [x + self.num_params for x in other.constrained_indices]
|
newkern.constrained_indices = self.constrained_indices + [x + self.num_params for x in other.constrained_indices]
|
||||||
|
|
@ -200,7 +213,7 @@ class kern(Parameterized):
|
||||||
newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices]
|
newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices]
|
||||||
else:
|
else:
|
||||||
assert self.input_dim == other.input_dim
|
assert self.input_dim == other.input_dim
|
||||||
newkern = kern(self.input_dim, self.parts + other.parts, self.input_slices + other.input_slices)
|
newkern = kern(self.input_dim, self._parameters_ + other._parameters_, self.input_slices + other.input_slices)
|
||||||
# transfer constraints:
|
# transfer constraints:
|
||||||
newkern.constrained_indices = self.constrained_indices + [i + self.num_params for i in other.constrained_indices]
|
newkern.constrained_indices = self.constrained_indices + [i + self.num_params for i in other.constrained_indices]
|
||||||
newkern.constraints = self.constraints + other.constraints
|
newkern.constraints = self.constraints + other.constraints
|
||||||
|
|
@ -210,9 +223,7 @@ class kern(Parameterized):
|
||||||
return newkern
|
return newkern
|
||||||
|
|
||||||
def __mul__(self, other):
|
def __mul__(self, other):
|
||||||
"""
|
""" Here we overload the '*' operator. See self.prod for more information"""
|
||||||
Shortcut for `prod`.
|
|
||||||
"""
|
|
||||||
return self.prod(other)
|
return self.prod(other)
|
||||||
|
|
||||||
def __pow__(self, other, tensor=False):
|
def __pow__(self, other, tensor=False):
|
||||||
|
|
@ -240,7 +251,7 @@ class kern(Parameterized):
|
||||||
s1[sl1], s2[sl2] = [True], [True]
|
s1[sl1], s2[sl2] = [True], [True]
|
||||||
slices += [s1 + s2]
|
slices += [s1 + s2]
|
||||||
|
|
||||||
newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)]
|
newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1._parameters_, K2._parameters_)]
|
||||||
|
|
||||||
if tensor:
|
if tensor:
|
||||||
newkern = kern(K1.input_dim + K2.input_dim, newkernparts, slices)
|
newkern = kern(K1.input_dim + K2.input_dim, newkernparts, slices)
|
||||||
|
|
@ -255,12 +266,12 @@ class kern(Parameterized):
|
||||||
# Build the array that allows to go from the initial indices of the param to the new ones
|
# Build the array that allows to go from the initial indices of the param to the new ones
|
||||||
K1_param = []
|
K1_param = []
|
||||||
n = 0
|
n = 0
|
||||||
for k1 in K1.parts:
|
for k1 in K1._parameters_:
|
||||||
K1_param += [range(n, n + k1.num_params)]
|
K1_param += [range(n, n + k1.num_params)]
|
||||||
n += k1.num_params
|
n += k1.num_params
|
||||||
n = 0
|
n = 0
|
||||||
K2_param = []
|
K2_param = []
|
||||||
for k2 in K2.parts:
|
for k2 in K2._parameters_:
|
||||||
K2_param += [range(K1.num_params + n, K1.num_params + n + k2.num_params)]
|
K2_param += [range(K1.num_params + n, K1.num_params + n + k2.num_params)]
|
||||||
n += k2.num_params
|
n += k2.num_params
|
||||||
index_param = []
|
index_param = []
|
||||||
|
|
@ -292,30 +303,41 @@ class kern(Parameterized):
|
||||||
self.constrain(np.where(index_param == i)[0], t)
|
self.constrain(np.where(index_param == i)[0], t)
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
return np.hstack([p._get_params() for p in self.parts])
|
return np.hstack([p._get_params() for p in self._parameters_])
|
||||||
|
|
||||||
def _set_params(self, x):
|
def _set_params(self, x):
|
||||||
[p._set_params(x[s]) for p, s in zip(self.parts, self.param_slices)]
|
[p._set_params(x[s]) for p, s in zip(self._parameters_, self.param_slices)]
|
||||||
|
|
||||||
def _get_param_names(self):
|
def _get_param_names(self):
|
||||||
# this is a bit nasty: we want to distinguish between parts with the same name by appending a count
|
# this is a bit nasty: we want to distinguish between _parameters_ with the same name by appending a count
|
||||||
part_names = np.array([k.name for k in self.parts], dtype=np.str)
|
part_names = np.array([k.name for k in self._parameters_], dtype=np.str)
|
||||||
counts = [np.sum(part_names == ni) for i, ni in enumerate(part_names)]
|
counts = [np.sum(part_names == ni) for i, ni in enumerate(part_names)]
|
||||||
cum_counts = [np.sum(part_names[i:] == ni) for i, ni in enumerate(part_names)]
|
cum_counts = [np.sum(part_names[i:] == ni) for i, ni in enumerate(part_names)]
|
||||||
names = [name + '_' + str(cum_count) if count > 1 else name for name, count, cum_count in zip(part_names, counts, cum_counts)]
|
names = [name + '_' + str(cum_count) if count > 1 else name for name, count, cum_count in zip(part_names, counts, cum_counts)]
|
||||||
|
|
||||||
return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self.parts)], [])
|
return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self._parameters_)], [])
|
||||||
|
|
||||||
def K(self, X, X2=None, which_parts='all'):
|
def K(self, X, X2=None, which_parts='all'):
|
||||||
|
"""
|
||||||
|
Compute the kernel function.
|
||||||
|
|
||||||
|
:param X: the first set of inputs to the kernel
|
||||||
|
:param X2: (optional) the second set of arguments to the kernel. If X2
|
||||||
|
is None, this is passed throgh to the 'part' object, which
|
||||||
|
handles this as X2 == X.
|
||||||
|
:param which_parts: a list of booleans detailing whether to include
|
||||||
|
each of the part functions. By default, 'all'
|
||||||
|
indicates [True]*self.num_parts
|
||||||
|
"""
|
||||||
if which_parts == 'all':
|
if which_parts == 'all':
|
||||||
which_parts = [True] * self.Nparts
|
which_parts = [True] * self.num_parts
|
||||||
assert X.shape[1] == self.input_dim
|
assert X.shape[1] == self.input_dim
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
target = np.zeros((X.shape[0], X.shape[0]))
|
target = np.zeros((X.shape[0], X.shape[0]))
|
||||||
[p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
|
[p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used]
|
||||||
else:
|
else:
|
||||||
target = np.zeros((X.shape[0], X2.shape[0]))
|
target = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
[p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
|
[p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def dK_dtheta(self, dL_dK, X, X2=None):
|
def dK_dtheta(self, dL_dK, X, X2=None):
|
||||||
|
|
@ -329,18 +351,19 @@ class kern(Parameterized):
|
||||||
:param X2: Observed data inputs (optional, defaults to X)
|
:param X2: Observed data inputs (optional, defaults to X)
|
||||||
:type X2: np.ndarray (num_inducing x input_dim)
|
:type X2: np.ndarray (num_inducing x input_dim)
|
||||||
|
|
||||||
|
returns: dL_dtheta
|
||||||
"""
|
"""
|
||||||
assert X.shape[1] == self.input_dim
|
assert X.shape[1] == self.input_dim
|
||||||
target = np.zeros(self.num_params)
|
target = np.zeros(self.num_params)
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
[p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)]
|
[p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self.param_slices)]
|
||||||
else:
|
else:
|
||||||
[p.dK_dtheta(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)]
|
[p.dK_dtheta(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self.param_slices)]
|
||||||
|
|
||||||
return self._transform_gradients(target)
|
return self._transform_gradients(target)
|
||||||
|
|
||||||
def dK_dX(self, dL_dK, X, X2=None):
|
def dK_dX(self, dL_dK, X, X2=None):
|
||||||
"""Compute the gradient of the covariance function with respect to X.
|
"""Compute the gradient of the objective function with respect to X.
|
||||||
|
|
||||||
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
|
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
|
||||||
:type dL_dK: np.ndarray (num_samples x num_inducing)
|
:type dL_dK: np.ndarray (num_samples x num_inducing)
|
||||||
|
|
@ -351,18 +374,18 @@ class kern(Parameterized):
|
||||||
|
|
||||||
target = np.zeros_like(X)
|
target = np.zeros_like(X)
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
[p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
[p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||||
else:
|
else:
|
||||||
[p.dK_dX(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
[p.dK_dX(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def Kdiag(self, X, which_parts='all'):
|
def Kdiag(self, X, which_parts='all'):
|
||||||
"""Compute the diagonal of the covariance function for inputs X."""
|
"""Compute the diagonal of the covariance function for inputs X."""
|
||||||
if which_parts == 'all':
|
if which_parts == 'all':
|
||||||
which_parts = [True] * self.Nparts
|
which_parts = [True] * self.num_parts
|
||||||
assert X.shape[1] == self.input_dim
|
assert X.shape[1] == self.input_dim
|
||||||
target = np.zeros(X.shape[0])
|
target = np.zeros(X.shape[0])
|
||||||
[p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self.parts, self.input_slices, which_parts) if part_on]
|
[p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self._parameters_, self.input_slices, which_parts) if part_on]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def dKdiag_dtheta(self, dL_dKdiag, X):
|
def dKdiag_dtheta(self, dL_dKdiag, X):
|
||||||
|
|
@ -370,134 +393,203 @@ class kern(Parameterized):
|
||||||
assert X.shape[1] == self.input_dim
|
assert X.shape[1] == self.input_dim
|
||||||
assert dL_dKdiag.size == X.shape[0]
|
assert dL_dKdiag.size == X.shape[0]
|
||||||
target = np.zeros(self.num_params)
|
target = np.zeros(self.num_params)
|
||||||
[p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)]
|
[p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self.param_slices)]
|
||||||
return self._transform_gradients(target)
|
return self._transform_gradients(target)
|
||||||
|
|
||||||
def dKdiag_dX(self, dL_dKdiag, X):
|
def dKdiag_dX(self, dL_dKdiag, X):
|
||||||
assert X.shape[1] == self.input_dim
|
assert X.shape[1] == self.input_dim
|
||||||
target = np.zeros_like(X)
|
target = np.zeros_like(X)
|
||||||
[p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
[p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def psi0(self, Z, mu, S):
|
def psi0(self, Z, mu, S):
|
||||||
target = np.zeros(mu.shape[0])
|
target = np.zeros(mu.shape[0])
|
||||||
[p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
|
[p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S):
|
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S):
|
||||||
target = np.zeros(self.num_params)
|
target = np.zeros(self.num_params)
|
||||||
[p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
|
[p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self.param_slices, self.input_slices)]
|
||||||
return self._transform_gradients(target)
|
return self._transform_gradients(target)
|
||||||
|
|
||||||
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S):
|
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S):
|
||||||
target_mu, target_S = np.zeros_like(mu), np.zeros_like(S)
|
target_mu, target_S = np.zeros_like(mu), np.zeros_like(S)
|
||||||
[p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
[p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||||
return target_mu, target_S
|
return target_mu, target_S
|
||||||
|
|
||||||
def psi1(self, Z, mu, S):
|
def psi1(self, Z, mu, S):
|
||||||
target = np.zeros((mu.shape[0], Z.shape[0]))
|
target = np.zeros((mu.shape[0], Z.shape[0]))
|
||||||
[p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
|
[p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S):
|
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S):
|
||||||
target = np.zeros((self.num_params))
|
target = np.zeros((self.num_params))
|
||||||
[p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
|
[p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self.param_slices, self.input_slices)]
|
||||||
return self._transform_gradients(target)
|
return self._transform_gradients(target)
|
||||||
|
|
||||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S):
|
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S):
|
||||||
target = np.zeros_like(Z)
|
target = np.zeros_like(Z)
|
||||||
[p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
[p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S):
|
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S):
|
||||||
"""return shapes are num_samples,num_inducing,input_dim"""
|
"""return shapes are num_samples,num_inducing,input_dim"""
|
||||||
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
|
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
|
||||||
[p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
[p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||||
return target_mu, target_S
|
return target_mu, target_S
|
||||||
|
|
||||||
def psi2(self, Z, mu, S):
|
def psi2(self, Z, mu, S):
|
||||||
"""
|
"""
|
||||||
Computer the psi2 statistics for the covariance function.
|
:param Z: np.ndarray of inducing inputs (M x Q)
|
||||||
|
:param mu, S: np.ndarrays of means and variances (each N x Q)
|
||||||
:param Z: np.ndarray of inducing inputs (num_inducing x input_dim)
|
:returns psi2: np.ndarray (N,M,M)
|
||||||
:param mu, S: np.ndarrays of means and variances (each num_samples x input_dim)
|
|
||||||
:returns psi2: np.ndarray (num_samples,num_inducing,num_inducing)
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
|
target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
|
||||||
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
|
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||||
|
|
||||||
# compute the "cross" terms
|
# compute the "cross" terms
|
||||||
# TODO: input_slices needed
|
# TODO: input_slices needed
|
||||||
crossterms = 0
|
from parts.white import White
|
||||||
|
from parts.rbf import RBF
|
||||||
|
from parts.rbf_inv import RBFInv
|
||||||
|
from parts.bias import Bias
|
||||||
|
from parts.linear import Linear
|
||||||
|
|
||||||
for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self.parts, self.input_slices), 2):
|
for (p1, i1), (p2, i2) in itertools.combinations(itertools.izip(self._parameters_, self._param_slices_), 2):
|
||||||
if i_s1 == i_s2:
|
# white doesn;t combine with anything
|
||||||
# TODO psi1 this must be faster/better/precached/more nice
|
if isinstance(p1, White) or isinstance(p2, White):
|
||||||
tmp1 = np.zeros((mu.shape[0], Z.shape[0]))
|
pass
|
||||||
p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1)
|
# rbf X bias
|
||||||
tmp2 = np.zeros((mu.shape[0], Z.shape[0]))
|
elif isinstance(p1, Bias) and isinstance(p2, (RBF, RBFInv)):
|
||||||
p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2)
|
target += p1.variance * (p2._psi1[:, :, None] + p2._psi1[:, None, :])
|
||||||
|
elif isinstance(p2, Bias) and isinstance(p1, (RBF, RBFInv)):
|
||||||
prod = np.multiply(tmp1, tmp2)
|
target += p2.variance * (p1._psi1[:, :, None] + p1._psi1[:, None, :])
|
||||||
crossterms += prod[:, :, None] + prod[:, None, :]
|
# linear X bias
|
||||||
|
elif isinstance(p1, Bias) and isinstance(p2, Linear):
|
||||||
# target += crossterms
|
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
||||||
return target + crossterms
|
p2.psi1(Z, mu, S, tmp)
|
||||||
|
target += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||||
|
elif isinstance(p2, Bias) and isinstance(p1, Linear):
|
||||||
|
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
||||||
|
p1.psi1(Z, mu, S, tmp)
|
||||||
|
target += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||||
|
# rbf X linear
|
||||||
|
elif isinstance(p1, Linear) and isinstance(p2, (RBF, RBFInv)):
|
||||||
|
pass
|
||||||
|
elif isinstance(p2, Linear) and isinstance(p1, (RBF, RBFInv)):
|
||||||
|
raise NotImplementedError # TODO
|
||||||
|
elif isinstance(p1, (RBF, RBFInv)) and isinstance(p2, (RBF, RBFInv)):
|
||||||
|
raise NotImplementedError # TODO
|
||||||
|
elif isinstance(p2, (RBF, RBFInv)) and isinstance(p1, (RBF, RBFInv)):
|
||||||
|
raise NotImplementedError # TODO
|
||||||
|
else:
|
||||||
|
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||||
|
return target
|
||||||
|
|
||||||
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S):
|
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S):
|
||||||
"""Gradient of the psi2 statistics with respect to the parameters."""
|
target = np.zeros(self.Nparam)
|
||||||
target = np.zeros(self.num_params)
|
[p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self.param_slices)]
|
||||||
[p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)]
|
|
||||||
|
|
||||||
# compute the "cross" terms
|
# compute the "cross" terms
|
||||||
# TODO: better looping, input_slices
|
# TODO: better looping, input_slices
|
||||||
for i1, i2 in itertools.permutations(range(len(self.parts)), 2):
|
for i1, i2 in itertools.combinations(range(len(self._parameters_)), 2):
|
||||||
p1, p2 = self.parts[i1], self.parts[i2]
|
p1, p2 = self._parameters_[i1], self._parameters_[i2]
|
||||||
# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
|
# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
|
||||||
ps1, ps2 = self.param_slices[i1], self.param_slices[i2]
|
ps1, ps2 = self.param_slices[i1], self.param_slices[i2]
|
||||||
|
|
||||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
# white doesn;t combine with anything
|
||||||
p1.psi1(Z, mu, S, tmp)
|
if p1.name == 'white' or p2.name == 'white':
|
||||||
p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2])
|
pass
|
||||||
|
# rbf X bias
|
||||||
|
elif p1.name == 'bias' and p2.name == 'rbf':
|
||||||
|
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2])
|
||||||
|
p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2._psi1 * 2., Z, mu, S, target[ps1])
|
||||||
|
elif p2.name == 'bias' and p1.name == 'rbf':
|
||||||
|
p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target[ps1])
|
||||||
|
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1._psi1 * 2., Z, mu, S, target[ps2])
|
||||||
|
# linear X bias
|
||||||
|
elif p1.name == 'bias' and p2.name == 'linear':
|
||||||
|
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2]) # [ps1])
|
||||||
|
psi1 = np.zeros((mu.shape[0], Z.shape[0]))
|
||||||
|
p2.psi1(Z, mu, S, psi1)
|
||||||
|
p1.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps1])
|
||||||
|
elif p2.name == 'bias' and p1.name == 'linear':
|
||||||
|
p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target[ps1])
|
||||||
|
psi1 = np.zeros((mu.shape[0], Z.shape[0]))
|
||||||
|
p1.psi1(Z, mu, S, psi1)
|
||||||
|
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps2])
|
||||||
|
# rbf X linear
|
||||||
|
elif p1.name == 'linear' and p2.name == 'rbf':
|
||||||
|
raise NotImplementedError # TODO
|
||||||
|
elif p2.name == 'linear' and p1.name == 'rbf':
|
||||||
|
raise NotImplementedError # TODO
|
||||||
|
else:
|
||||||
|
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||||
|
|
||||||
return self._transform_gradients(target)
|
return self._transform_gradients(target)
|
||||||
|
|
||||||
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S):
|
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S):
|
||||||
target = np.zeros_like(Z)
|
target = np.zeros_like(Z)
|
||||||
[p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
[p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||||
# target *= 2
|
|
||||||
|
|
||||||
# compute the "cross" terms
|
# compute the "cross" terms
|
||||||
# TODO: we need input_slices here.
|
# TODO: we need input_slices here.
|
||||||
for p1, p2 in itertools.permutations(self.parts, 2):
|
for p1, p2 in itertools.combinations(self._parameters_, 2):
|
||||||
if p1.name == 'linear' and p2.name == 'linear':
|
# white doesn;t combine with anything
|
||||||
raise NotImplementedError("We don't handle linear/linear cross-terms")
|
if p1.name == 'white' or p2.name == 'white':
|
||||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
pass
|
||||||
p1.psi1(Z, mu, S, tmp)
|
# rbf X bias
|
||||||
p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target)
|
elif p1.name == 'bias' and p2.name == 'rbf':
|
||||||
|
p2.dpsi1_dX(dL_dpsi2.sum(1).T * p1.variance, Z, mu, S, target)
|
||||||
|
elif p2.name == 'bias' and p1.name == 'rbf':
|
||||||
|
p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target)
|
||||||
|
# linear X bias
|
||||||
|
elif p1.name == 'bias' and p2.name == 'linear':
|
||||||
|
p2.dpsi1_dZ(dL_dpsi2.sum(1).T * p1.variance, Z, mu, S, target)
|
||||||
|
elif p2.name == 'bias' and p1.name == 'linear':
|
||||||
|
p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target)
|
||||||
|
# rbf X linear
|
||||||
|
elif p1.name == 'linear' and p2.name == 'rbf':
|
||||||
|
raise NotImplementedError # TODO
|
||||||
|
elif p2.name == 'linear' and p1.name == 'rbf':
|
||||||
|
raise NotImplementedError # TODO
|
||||||
|
else:
|
||||||
|
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||||
|
|
||||||
return target * 2
|
return target * 2.
|
||||||
|
|
||||||
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S):
|
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S):
|
||||||
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
|
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
|
||||||
[p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
[p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||||
|
|
||||||
# compute the "cross" terms
|
# compute the "cross" terms
|
||||||
# TODO: we need input_slices here.
|
# TODO: we need input_slices here.
|
||||||
for p1, p2 in itertools.permutations(self.parts, 2):
|
for p1, p2 in itertools.combinations(self._parameters_, 2):
|
||||||
if p1.name == 'linear' and p2.name == 'linear':
|
# white doesn;t combine with anything
|
||||||
raise NotImplementedError("We don't handle linear/linear cross-terms")
|
if p1.name == 'white' or p2.name == 'white':
|
||||||
|
pass
|
||||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
# rbf X bias
|
||||||
p1.psi1(Z, mu, S, tmp)
|
elif p1.name == 'bias' and p2.name == 'rbf':
|
||||||
p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S)
|
p2.dpsi1_dmuS(dL_dpsi2.sum(1).T * p1.variance * 2., Z, mu, S, target_mu, target_S)
|
||||||
|
elif p2.name == 'bias' and p1.name == 'rbf':
|
||||||
|
p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S)
|
||||||
|
# linear X bias
|
||||||
|
elif p1.name == 'bias' and p2.name == 'linear':
|
||||||
|
p2.dpsi1_dmuS(dL_dpsi2.sum(1).T * p1.variance * 2., Z, mu, S, target_mu, target_S)
|
||||||
|
elif p2.name == 'bias' and p1.name == 'linear':
|
||||||
|
p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S)
|
||||||
|
# rbf X linear
|
||||||
|
elif p1.name == 'linear' and p2.name == 'rbf':
|
||||||
|
raise NotImplementedError # TODO
|
||||||
|
elif p2.name == 'linear' and p1.name == 'rbf':
|
||||||
|
raise NotImplementedError # TODO
|
||||||
|
else:
|
||||||
|
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||||
|
|
||||||
return target_mu, target_S
|
return target_mu, target_S
|
||||||
|
|
||||||
def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs):
|
def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs):
|
||||||
if which_parts == 'all':
|
if which_parts == 'all':
|
||||||
which_parts = [True] * self.Nparts
|
which_parts = [True] * self.num_parts
|
||||||
if self.input_dim == 1:
|
if self.input_dim == 1:
|
||||||
if x is None:
|
if x is None:
|
||||||
x = np.zeros((1, 1))
|
x = np.zeros((1, 1))
|
||||||
|
|
@ -658,7 +750,7 @@ class Kern_check_dKdiag_dX(Kern_check_model):
|
||||||
def _set_params(self, x):
|
def _set_params(self, x):
|
||||||
self.X=x.reshape(self.X.shape)
|
self.X=x.reshape(self.X.shape)
|
||||||
|
|
||||||
def kern_test(kern, X=None, X2=None, verbose=False):
|
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
|
||||||
"""This function runs on kernels to check the correctness of their implementation. It checks that the covariance function is positive definite for a randomly generated data set.
|
"""This function runs on kernels to check the correctness of their implementation. It checks that the covariance function is positive definite for a randomly generated data set.
|
||||||
|
|
||||||
:param kern: the kernel to be tested.
|
:param kern: the kernel to be tested.
|
||||||
|
|
@ -672,8 +764,13 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
||||||
pass_checks = True
|
pass_checks = True
|
||||||
if X==None:
|
if X==None:
|
||||||
X = np.random.randn(10, kern.input_dim)
|
X = np.random.randn(10, kern.input_dim)
|
||||||
|
if output_ind is not None:
|
||||||
|
X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
|
||||||
if X2==None:
|
if X2==None:
|
||||||
X2 = np.random.randn(20, kern.input_dim)
|
X2 = np.random.randn(20, kern.input_dim)
|
||||||
|
if output_ind is not None:
|
||||||
|
X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print("Checking covariance function is positive definite.")
|
print("Checking covariance function is positive definite.")
|
||||||
result = Kern_check_model(kern, X=X).is_positive_definite()
|
result = Kern_check_model(kern, X=X).is_positive_definite()
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
from IPython.core.debugger import Tracer; debug_here=Tracer()
|
|
||||||
from kernpart import Kernpart
|
from kernpart import Kernpart
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from ...util.linalg import tdot
|
from ...util.linalg import tdot
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ from independent_outputs import index_to_slices
|
||||||
|
|
||||||
class Hierarchical(Kernpart):
|
class Hierarchical(Kernpart):
|
||||||
"""
|
"""
|
||||||
A kernel part which can reopresent a hierarchy of indepencnce: a gerenalisation of independent_outputs
|
A kernel part which can reopresent a hierarchy of indepencnce: a generalisation of independent_outputs
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self,parts):
|
def __init__(self,parts):
|
||||||
|
|
|
||||||
|
|
@ -5,15 +5,18 @@
|
||||||
class Kernpart(object):
|
class Kernpart(object):
|
||||||
def __init__(self,input_dim):
|
def __init__(self,input_dim):
|
||||||
"""
|
"""
|
||||||
The base class for a kernpart: a positive definite function which forms part of a kernel
|
The base class for a kernpart: a positive definite function which forms part of a covariance function (kernel).
|
||||||
|
|
||||||
:param input_dim: the number of input dimensions to the function
|
:param input_dim: the number of input dimensions to the function
|
||||||
:type input_dim: int
|
:type input_dim: int
|
||||||
|
|
||||||
Do not instantiate.
|
Do not instantiate.
|
||||||
"""
|
"""
|
||||||
|
# the input dimensionality for the covariance
|
||||||
self.input_dim = input_dim
|
self.input_dim = input_dim
|
||||||
|
# the number of optimisable parameters
|
||||||
self.num_params = 1
|
self.num_params = 1
|
||||||
|
# the name of the covariance function.
|
||||||
self.name = 'unnamed'
|
self.name = 'unnamed'
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ import numpy as np
|
||||||
from ...util.linalg import tdot
|
from ...util.linalg import tdot
|
||||||
from ...util.misc import fast_array_equal
|
from ...util.misc import fast_array_equal
|
||||||
from scipy import weave
|
from scipy import weave
|
||||||
|
from ...util.config import *
|
||||||
|
|
||||||
class Linear(Kernpart):
|
class Linear(Kernpart):
|
||||||
"""
|
"""
|
||||||
|
|
@ -51,6 +52,26 @@ class Linear(Kernpart):
|
||||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
|
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
|
||||||
self._X, self._X2, self._params = np.empty(shape=(3, 1))
|
self._X, self._X2, self._params = np.empty(shape=(3, 1))
|
||||||
|
|
||||||
|
# a set of optional args to pass to weave
|
||||||
|
weave_options_openmp = {'headers' : ['<omp.h>'],
|
||||||
|
'extra_compile_args': ['-fopenmp -O3'],
|
||||||
|
'extra_link_args' : ['-lgomp'],
|
||||||
|
'libraries': ['gomp']}
|
||||||
|
weave_options_noopenmp = {'extra_compile_args': ['-O3']}
|
||||||
|
|
||||||
|
|
||||||
|
if config.getboolean('parallel', 'openmp'):
|
||||||
|
self.weave_options = weave_options_openmp
|
||||||
|
self.weave_support_code = """
|
||||||
|
#include <omp.h>
|
||||||
|
#include <math.h>
|
||||||
|
"""
|
||||||
|
else:
|
||||||
|
self.weave_options = weave_options_noopenmp
|
||||||
|
self.weave_support_code = """
|
||||||
|
#include <math.h>
|
||||||
|
"""
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
return self.variances
|
return self.variances
|
||||||
|
|
||||||
|
|
@ -190,11 +211,17 @@ class Linear(Kernpart):
|
||||||
#target_mu_dummy += (dL_dpsi2[:, :, :, None] * muAZZA).sum(1).sum(1)
|
#target_mu_dummy += (dL_dpsi2[:, :, :, None] * muAZZA).sum(1).sum(1)
|
||||||
#target_S_dummy += (dL_dpsi2[:, :, :, None] * self.ZA[None, :, None, :] * self.ZA[None, None, :, :]).sum(1).sum(1)
|
#target_S_dummy += (dL_dpsi2[:, :, :, None] * self.ZA[None, :, None, :] * self.ZA[None, None, :, :]).sum(1).sum(1)
|
||||||
|
|
||||||
|
|
||||||
|
if config.getboolean('parallel', 'openmp'):
|
||||||
|
pragma_string = "#pragma omp parallel for private(m,mm,q,qq,factor,tmp)"
|
||||||
|
else:
|
||||||
|
pragma_string = ''
|
||||||
|
|
||||||
#Using weave, we can exploiut the symmetry of this problem:
|
#Using weave, we can exploiut the symmetry of this problem:
|
||||||
code = """
|
code = """
|
||||||
int n, m, mm,q,qq;
|
int n, m, mm,q,qq;
|
||||||
double factor,tmp;
|
double factor,tmp;
|
||||||
#pragma omp parallel for private(m,mm,q,qq,factor,tmp)
|
%s
|
||||||
for(n=0;n<N;n++){
|
for(n=0;n<N;n++){
|
||||||
for(m=0;m<num_inducing;m++){
|
for(m=0;m<num_inducing;m++){
|
||||||
for(mm=0;mm<=m;mm++){
|
for(mm=0;mm<=m;mm++){
|
||||||
|
|
@ -218,19 +245,13 @@ class Linear(Kernpart):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
""" % pragma_string
|
||||||
support_code = """
|
|
||||||
#include <omp.h>
|
|
||||||
#include <math.h>
|
|
||||||
"""
|
|
||||||
weave_options = {'headers' : ['<omp.h>'],
|
|
||||||
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
|
|
||||||
'extra_link_args' : ['-lgomp']}
|
|
||||||
|
|
||||||
N,num_inducing,input_dim = mu.shape[0],Z.shape[0],mu.shape[1]
|
|
||||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
N,num_inducing,input_dim = int(mu.shape[0]),int(Z.shape[0]),int(mu.shape[1])
|
||||||
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
|
weave.inline(code, support_code=self.weave_support_code,
|
||||||
type_converters=weave.converters.blitz,**weave_options)
|
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
|
||||||
|
type_converters=weave.converters.blitz,**self.weave_options)
|
||||||
|
|
||||||
|
|
||||||
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
|
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
|
||||||
|
|
@ -240,9 +261,15 @@ class Linear(Kernpart):
|
||||||
#dummy_target += psi2_dZ.sum(0).sum(0)
|
#dummy_target += psi2_dZ.sum(0).sum(0)
|
||||||
|
|
||||||
AZA = self.variances*self.ZAinner
|
AZA = self.variances*self.ZAinner
|
||||||
|
|
||||||
|
if config.getboolean('parallel', 'openmp'):
|
||||||
|
pragma_string = '#pragma omp parallel for private(n,mm,q)'
|
||||||
|
else:
|
||||||
|
pragma_string = ''
|
||||||
|
|
||||||
code="""
|
code="""
|
||||||
int n,m,mm,q;
|
int n,m,mm,q;
|
||||||
#pragma omp parallel for private(n,mm,q)
|
%s
|
||||||
for(m=0;m<num_inducing;m++){
|
for(m=0;m<num_inducing;m++){
|
||||||
for(q=0;q<input_dim;q++){
|
for(q=0;q<input_dim;q++){
|
||||||
for(mm=0;mm<num_inducing;mm++){
|
for(mm=0;mm<num_inducing;mm++){
|
||||||
|
|
@ -252,22 +279,13 @@ class Linear(Kernpart):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
""" % pragma_string
|
||||||
support_code = """
|
|
||||||
#include <omp.h>
|
|
||||||
#include <math.h>
|
|
||||||
"""
|
|
||||||
weave_options = {'headers' : ['<omp.h>'],
|
|
||||||
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
|
|
||||||
'extra_link_args' : ['-lgomp']}
|
|
||||||
|
|
||||||
N,num_inducing,input_dim = mu.shape[0],Z.shape[0],mu.shape[1]
|
|
||||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
N,num_inducing,input_dim = int(mu.shape[0]),int(Z.shape[0]),int(mu.shape[1])
|
||||||
|
weave.inline(code, support_code=self.weave_support_code,
|
||||||
arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
|
arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
|
||||||
type_converters=weave.converters.blitz,**weave_options)
|
type_converters=weave.converters.blitz,**self.weave_options)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
|
|
|
||||||
|
|
@ -113,7 +113,7 @@ class PeriodicMatern32(Kernpart):
|
||||||
|
|
||||||
@silence_errors
|
@silence_errors
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||||
"""derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)"""
|
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
|
||||||
if X2 is None: X2 = X
|
if X2 is None: X2 = X
|
||||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||||
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
||||||
|
|
|
||||||
|
|
@ -115,7 +115,7 @@ class PeriodicMatern52(Kernpart):
|
||||||
|
|
||||||
@silence_errors
|
@silence_errors
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||||
"""derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)"""
|
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
|
||||||
if X2 is None: X2 = X
|
if X2 is None: X2 = X
|
||||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||||
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
||||||
|
|
|
||||||
|
|
@ -111,7 +111,7 @@ class PeriodicExponential(Kernpart):
|
||||||
|
|
||||||
@silence_errors
|
@silence_errors
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||||
"""derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)"""
|
"""derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)"""
|
||||||
if X2 is None: X2 = X
|
if X2 is None: X2 = X
|
||||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||||
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ import numpy as np
|
||||||
from scipy import weave
|
from scipy import weave
|
||||||
from ...util.linalg import tdot
|
from ...util.linalg import tdot
|
||||||
from ...util.misc import fast_array_equal
|
from ...util.misc import fast_array_equal
|
||||||
|
from ...util.config import *
|
||||||
|
|
||||||
class RBF(Kernpart):
|
class RBF(Kernpart):
|
||||||
"""
|
"""
|
||||||
|
|
@ -57,12 +58,27 @@ class RBF(Kernpart):
|
||||||
self._X, self._X2, self._params = np.empty(shape=(3, 1))
|
self._X, self._X2, self._params = np.empty(shape=(3, 1))
|
||||||
|
|
||||||
# a set of optional args to pass to weave
|
# a set of optional args to pass to weave
|
||||||
self.weave_options = {'headers' : ['<omp.h>'],
|
weave_options_openmp = {'headers' : ['<omp.h>'],
|
||||||
'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
|
'extra_compile_args': ['-fopenmp -O3'],
|
||||||
'extra_link_args' : ['-lgomp']}
|
'extra_link_args' : ['-lgomp'],
|
||||||
|
'libraries': ['gomp']}
|
||||||
|
weave_options_noopenmp = {'extra_compile_args': ['-O3']}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if config.getboolean('parallel', 'openmp'):
|
||||||
|
self.weave_options = weave_options_openmp
|
||||||
|
self.weave_support_code = """
|
||||||
|
#include <omp.h>
|
||||||
|
#include <math.h>
|
||||||
|
"""
|
||||||
|
else:
|
||||||
|
self.weave_options = weave_options_noopenmp
|
||||||
|
self.weave_support_code = """
|
||||||
|
#include <math.h>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
return np.hstack((self.variance, self.lengthscale))
|
return np.hstack((self.variance, self.lengthscale))
|
||||||
|
|
||||||
|
|
@ -110,7 +126,7 @@ class RBF(Kernpart):
|
||||||
target(q+1) += var_len3(q)*tmp;
|
target(q+1) += var_len3(q)*tmp;
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
|
num_data, num_inducing, input_dim = int(X.shape[0]), int(X.shape[0]), int(self.input_dim)
|
||||||
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
|
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
|
||||||
else:
|
else:
|
||||||
code = """
|
code = """
|
||||||
|
|
@ -126,7 +142,7 @@ class RBF(Kernpart):
|
||||||
target(q+1) += var_len3(q)*tmp;
|
target(q+1) += var_len3(q)*tmp;
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
|
num_data, num_inducing, input_dim = int(X.shape[0]), int(X2.shape[0]), int(self.input_dim)
|
||||||
# [np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)]
|
# [np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)]
|
||||||
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
|
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
|
||||||
else:
|
else:
|
||||||
|
|
@ -287,10 +303,16 @@ class RBF(Kernpart):
|
||||||
lengthscale2 = self.lengthscale2
|
lengthscale2 = self.lengthscale2
|
||||||
else:
|
else:
|
||||||
lengthscale2 = np.ones(input_dim) * self.lengthscale2
|
lengthscale2 = np.ones(input_dim) * self.lengthscale2
|
||||||
|
|
||||||
|
if config.getboolean('parallel', 'openmp'):
|
||||||
|
pragma_string = '#pragma omp parallel for private(tmp)'
|
||||||
|
else:
|
||||||
|
pragma_string = ''
|
||||||
|
|
||||||
code = """
|
code = """
|
||||||
double tmp;
|
double tmp;
|
||||||
|
|
||||||
#pragma omp parallel for private(tmp)
|
%s
|
||||||
for (int n=0; n<N; n++){
|
for (int n=0; n<N; n++){
|
||||||
for (int m=0; m<num_inducing; m++){
|
for (int m=0; m<num_inducing; m++){
|
||||||
for (int mm=0; mm<(m+1); mm++){
|
for (int mm=0; mm<(m+1); mm++){
|
||||||
|
|
@ -320,13 +342,20 @@ class RBF(Kernpart):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
"""
|
""" % pragma_string
|
||||||
|
|
||||||
|
if config.getboolean('parallel', 'openmp'):
|
||||||
|
pragma_string = '#include <omp.h>'
|
||||||
|
else:
|
||||||
|
pragma_string = ''
|
||||||
|
|
||||||
support_code = """
|
support_code = """
|
||||||
#include <omp.h>
|
%s
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
"""
|
""" % pragma_string
|
||||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
|
||||||
|
N, num_inducing, input_dim = int(N), int(num_inducing), int(input_dim)
|
||||||
|
weave.inline(code, support_code=support_code,
|
||||||
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
|
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
|
||||||
type_converters=weave.converters.blitz, **self.weave_options)
|
type_converters=weave.converters.blitz, **self.weave_options)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,8 @@ import numpy as np
|
||||||
import hashlib
|
import hashlib
|
||||||
from scipy import weave
|
from scipy import weave
|
||||||
from ...util.linalg import tdot
|
from ...util.linalg import tdot
|
||||||
|
from ...util.config import *
|
||||||
|
|
||||||
|
|
||||||
class RBFInv(RBF):
|
class RBFInv(RBF):
|
||||||
"""
|
"""
|
||||||
|
|
@ -58,11 +60,23 @@ class RBFInv(RBF):
|
||||||
self._X, self._X2, self._params = np.empty(shape=(3, 1))
|
self._X, self._X2, self._params = np.empty(shape=(3, 1))
|
||||||
|
|
||||||
# a set of optional args to pass to weave
|
# a set of optional args to pass to weave
|
||||||
self.weave_options = {'headers' : ['<omp.h>'],
|
weave_options_openmp = {'headers' : ['<omp.h>'],
|
||||||
'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
|
'extra_compile_args': ['-fopenmp -O3'],
|
||||||
'extra_link_args' : ['-lgomp']}
|
'extra_link_args' : ['-lgomp'],
|
||||||
|
'libraries': ['gomp']}
|
||||||
|
weave_options_noopenmp = {'extra_compile_args': ['-O3']}
|
||||||
|
|
||||||
|
if config.getboolean('parallel', 'openmp'):
|
||||||
|
self.weave_options = weave_options_openmp
|
||||||
|
self.weave_support_code = """
|
||||||
|
#include <omp.h>
|
||||||
|
#include <math.h>
|
||||||
|
"""
|
||||||
|
else:
|
||||||
|
self.weave_options = weave_options_noopenmp
|
||||||
|
self.weave_support_code = """
|
||||||
|
#include <math.h>
|
||||||
|
"""
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
return np.hstack((self.variance, self.inv_lengthscale))
|
return np.hstack((self.variance, self.inv_lengthscale))
|
||||||
|
|
@ -109,7 +123,7 @@ class RBFInv(RBF):
|
||||||
target(q+1) += var_len3(q)*tmp*(-len2(q));
|
target(q+1) += var_len3(q)*tmp*(-len2(q));
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
|
num_data, num_inducing, input_dim = int(X.shape[0]), int(X.shape[0]), int(self.input_dim)
|
||||||
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3', 'len2'], type_converters=weave.converters.blitz, **self.weave_options)
|
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3', 'len2'], type_converters=weave.converters.blitz, **self.weave_options)
|
||||||
else:
|
else:
|
||||||
code = """
|
code = """
|
||||||
|
|
@ -125,7 +139,7 @@ class RBFInv(RBF):
|
||||||
target(q+1) += var_len3(q)*tmp*(-len2(q));
|
target(q+1) += var_len3(q)*tmp*(-len2(q));
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
|
num_data, num_inducing, input_dim = int(X.shape[0]), int(X2.shape[0]), int(self.input_dim)
|
||||||
# [np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)]
|
# [np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)]
|
||||||
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3', 'len2'], type_converters=weave.converters.blitz, **self.weave_options)
|
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3', 'len2'], type_converters=weave.converters.blitz, **self.weave_options)
|
||||||
else:
|
else:
|
||||||
|
|
@ -263,8 +277,8 @@ class RBFInv(RBF):
|
||||||
self._Z, self._mu, self._S = Z, mu, S
|
self._Z, self._mu, self._S = Z, mu, S
|
||||||
|
|
||||||
def weave_psi2(self, mu, Zhat):
|
def weave_psi2(self, mu, Zhat):
|
||||||
N, input_dim = mu.shape
|
N, input_dim = int(mu.shape[0]), int(mu.shape[1])
|
||||||
num_inducing = Zhat.shape[0]
|
num_inducing = int(Zhat.shape[0])
|
||||||
|
|
||||||
mudist = np.empty((N, num_inducing, num_inducing, input_dim))
|
mudist = np.empty((N, num_inducing, num_inducing, input_dim))
|
||||||
mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim))
|
mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim))
|
||||||
|
|
@ -279,10 +293,16 @@ class RBFInv(RBF):
|
||||||
inv_lengthscale2 = self.inv_lengthscale2
|
inv_lengthscale2 = self.inv_lengthscale2
|
||||||
else:
|
else:
|
||||||
inv_lengthscale2 = np.ones(input_dim) * self.inv_lengthscale2
|
inv_lengthscale2 = np.ones(input_dim) * self.inv_lengthscale2
|
||||||
|
|
||||||
|
if config.getboolean('parallel', 'openmp'):
|
||||||
|
pragma_string = '#pragma omp parallel for private(tmp)'
|
||||||
|
else:
|
||||||
|
pragma_string = ''
|
||||||
|
|
||||||
code = """
|
code = """
|
||||||
double tmp;
|
double tmp;
|
||||||
|
|
||||||
#pragma omp parallel for private(tmp)
|
%s
|
||||||
for (int n=0; n<N; n++){
|
for (int n=0; n<N; n++){
|
||||||
for (int m=0; m<num_inducing; m++){
|
for (int m=0; m<num_inducing; m++){
|
||||||
for (int mm=0; mm<(m+1); mm++){
|
for (int mm=0; mm<(m+1); mm++){
|
||||||
|
|
@ -312,13 +332,9 @@ class RBFInv(RBF):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
"""
|
""" % pragma_string
|
||||||
|
|
||||||
support_code = """
|
weave.inline(code, support_code=self.weave_support_code,
|
||||||
#include <omp.h>
|
|
||||||
#include <math.h>
|
|
||||||
"""
|
|
||||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
|
||||||
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'inv_lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
|
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'inv_lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
|
||||||
type_converters=weave.converters.blitz, **self.weave_options)
|
type_converters=weave.converters.blitz, **self.weave_options)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,7 @@
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
#include <float.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
double DiracDelta(double x){
|
double DiracDelta(double x){
|
||||||
// TODO: this doesn't seem to be a dirac delta ... should return infinity. Neil
|
// TODO: this doesn't seem to be a dirac delta ... should return infinity. Neil
|
||||||
if((x<0.000001) & (x>-0.000001))//go on, laugh at my c++ skills
|
if((x<0.000001) & (x>-0.000001))//go on, laugh at my c++ skills
|
||||||
|
|
@ -23,3 +26,36 @@ double sinc_grad(double x){
|
||||||
else
|
else
|
||||||
return (x*cos(x) - sin(x))/(x*x);
|
return (x*cos(x) - sin(x))/(x*x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double erfcx(double x){
|
||||||
|
double xneg=-sqrt(log(DBL_MAX/2));
|
||||||
|
double xmax = 1/(sqrt(M_PI)*DBL_MIN);
|
||||||
|
xmax = DBL_MAX<xmax ? DBL_MAX : xmax;
|
||||||
|
// Find values where erfcx can be evaluated
|
||||||
|
double t = 3.97886080735226 / (abs(x) + 3.97886080735226);
|
||||||
|
double u = t-0.5;
|
||||||
|
double y = (((((((((u * 0.00127109764952614092 + 1.19314022838340944e-4) * u
|
||||||
|
- 0.003963850973605135) * u - 8.70779635317295828e-4) * u
|
||||||
|
+ 0.00773672528313526668) * u + 0.00383335126264887303) * u
|
||||||
|
- 0.0127223813782122755) * u - 0.0133823644533460069) * u
|
||||||
|
+ 0.0161315329733252248) * u + 0.0390976845588484035) * u + 0.00249367200053503304;
|
||||||
|
if (x<xneg)
|
||||||
|
return -INFINITY;
|
||||||
|
else if (x<0)
|
||||||
|
return 2*exp(x*x)-y;
|
||||||
|
else if (x>xmax)
|
||||||
|
return 0.0;
|
||||||
|
else
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
|
||||||
|
double ln_diff_erf(double x0, double x1){
|
||||||
|
if (x0==x1)
|
||||||
|
return INFINITY;
|
||||||
|
else if(x0<0 && x1>0 || x0>0 && x1<0)
|
||||||
|
return log(erf(x0)-erf(x1));
|
||||||
|
else if(x1>0)
|
||||||
|
return log(erfcx(x1)-erfcx(x0)*exp(x1*x1)- x0*x0)-x1*x1;
|
||||||
|
else
|
||||||
|
return log(erfcx(-x0)-erfcx(-x1)*exp(x0*x0 - x1*x1))-x0*x0;
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,3 +4,6 @@ double DiracDelta(double x, int foo);
|
||||||
|
|
||||||
double sinc(double x);
|
double sinc(double x);
|
||||||
double sinc_grad(double x);
|
double sinc_grad(double x);
|
||||||
|
|
||||||
|
double erfcx(double x);
|
||||||
|
double ln_diff_erf(double x0, double x1);
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ import sys
|
||||||
current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
|
current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
|
||||||
import tempfile
|
import tempfile
|
||||||
import pdb
|
import pdb
|
||||||
|
import ast
|
||||||
from kernpart import Kernpart
|
from kernpart import Kernpart
|
||||||
|
|
||||||
class spkern(Kernpart):
|
class spkern(Kernpart):
|
||||||
|
|
@ -16,64 +17,388 @@ class spkern(Kernpart):
|
||||||
A kernel object, where all the hard work in done by sympy.
|
A kernel object, where all the hard work in done by sympy.
|
||||||
|
|
||||||
:param k: the covariance function
|
:param k: the covariance function
|
||||||
:type k: a positive definite sympy function of x1, z1, x2, z2...
|
:type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
|
||||||
|
|
||||||
To construct a new sympy kernel, you'll need to define:
|
To construct a new sympy kernel, you'll need to define:
|
||||||
- a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
|
- a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
|
||||||
- that's it! we'll extract the variables from the function k.
|
- that's it! we'll extract the variables from the function k.
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
- to handle multiple inputs, call them x1, z1, etc
|
- to handle multiple inputs, call them x_1, z_1, etc
|
||||||
- to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
|
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
|
||||||
"""
|
"""
|
||||||
def __init__(self,input_dim,k,name=None,param=None):
|
def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None):
|
||||||
if name is None:
|
if name is None:
|
||||||
self.name='sympykern'
|
self.name='sympykern'
|
||||||
else:
|
else:
|
||||||
self.name = name
|
self.name = name
|
||||||
|
if k is None:
|
||||||
|
raise ValueError, "You must provide an argument for the covariance function."
|
||||||
self._sp_k = k
|
self._sp_k = k
|
||||||
sp_vars = [e for e in k.atoms() if e.is_Symbol]
|
sp_vars = [e for e in k.atoms() if e.is_Symbol]
|
||||||
self._sp_x= sorted([e for e in sp_vars if e.name[0]=='x'],key=lambda x:int(x.name[1:]))
|
self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
|
||||||
self._sp_z= sorted([e for e in sp_vars if e.name[0]=='z'],key=lambda z:int(z.name[1:]))
|
self._sp_z= sorted([e for e in sp_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:]))
|
||||||
assert all([x.name=='x%i'%i for i,x in enumerate(self._sp_x)])
|
# Check that variable names make sense.
|
||||||
assert all([z.name=='z%i'%i for i,z in enumerate(self._sp_z)])
|
assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)])
|
||||||
|
assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)])
|
||||||
assert len(self._sp_x)==len(self._sp_z)
|
assert len(self._sp_x)==len(self._sp_z)
|
||||||
self.input_dim = len(self._sp_x)
|
self.input_dim = len(self._sp_x)
|
||||||
|
self._real_input_dim = self.input_dim
|
||||||
|
if output_dim > 1:
|
||||||
|
self.input_dim += 1
|
||||||
assert self.input_dim == input_dim
|
assert self.input_dim == input_dim
|
||||||
self._sp_theta = sorted([e for e in sp_vars if not (e.name[0]=='x' or e.name[0]=='z')],key=lambda e:e.name)
|
self.output_dim = output_dim
|
||||||
self.num_params = len(self._sp_theta)
|
# extract parameter names
|
||||||
|
thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
|
||||||
|
|
||||||
|
|
||||||
|
# Look for parameters with index.
|
||||||
|
if self.output_dim>1:
|
||||||
|
self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
|
||||||
|
self._sp_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name)
|
||||||
|
# Make sure parameter appears with both indices!
|
||||||
|
assert len(self._sp_theta_i)==len(self._sp_theta_j)
|
||||||
|
assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j)])
|
||||||
|
|
||||||
|
# Extract names of shared parameters
|
||||||
|
self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j]
|
||||||
|
|
||||||
|
self.num_split_params = len(self._sp_theta_i)
|
||||||
|
self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i]
|
||||||
|
for theta in self._split_theta_names:
|
||||||
|
setattr(self, theta, np.ones(self.output_dim))
|
||||||
|
|
||||||
|
self.num_shared_params = len(self._sp_theta)
|
||||||
|
self.num_params = self.num_shared_params+self.num_split_params*self.output_dim
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.num_split_params = 0
|
||||||
|
self._split_theta_names = []
|
||||||
|
self._sp_theta = thetas
|
||||||
|
self.num_shared_params = len(self._sp_theta)
|
||||||
|
self.num_params = self.num_shared_params
|
||||||
|
|
||||||
|
for theta in self._sp_theta:
|
||||||
|
val = 1.0
|
||||||
|
if param is not None:
|
||||||
|
if param.has_key(theta):
|
||||||
|
val = param[theta]
|
||||||
|
setattr(self, theta.name, val)
|
||||||
#deal with param
|
#deal with param
|
||||||
if param is None:
|
self._set_params(self._get_params())
|
||||||
param = np.ones(self.num_params)
|
|
||||||
assert param.size==self.num_params
|
|
||||||
self._set_params(param)
|
|
||||||
|
|
||||||
#Differentiate!
|
#Differentiate!
|
||||||
self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta]
|
self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta]
|
||||||
self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x]
|
if self.output_dim > 1:
|
||||||
#self._sp_dk_dz = [sp.diff(k,zi) for zi in self._sp_z]
|
self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i]
|
||||||
|
|
||||||
|
self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x]
|
||||||
|
|
||||||
|
if False:
|
||||||
|
self.compute_psi_stats()
|
||||||
|
|
||||||
#self.compute_psi_stats()
|
|
||||||
self._gen_code()
|
self._gen_code()
|
||||||
|
|
||||||
self.weave_kwargs = {\
|
if False:
|
||||||
'support_code':self._function_code,\
|
extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5']
|
||||||
'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')],\
|
else:
|
||||||
'headers':['"sympy_helpers.h"'],\
|
extra_compile_args = []
|
||||||
'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")],\
|
|
||||||
#'extra_compile_args':['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'],\
|
self.weave_kwargs = {
|
||||||
'extra_compile_args':[],\
|
'support_code':self._function_code,
|
||||||
'extra_link_args':['-lgomp'],\
|
'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')],
|
||||||
|
'headers':['"sympy_helpers.h"'],
|
||||||
|
'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")],
|
||||||
|
'extra_compile_args':extra_compile_args,
|
||||||
|
'extra_link_args':['-lgomp'],
|
||||||
'verbose':True}
|
'verbose':True}
|
||||||
|
|
||||||
def __add__(self,other):
|
def __add__(self,other):
|
||||||
return spkern(self._sp_k+other._sp_k)
|
return spkern(self._sp_k+other._sp_k)
|
||||||
|
|
||||||
|
def _gen_code(self):
|
||||||
|
"""Generates the C functions necessary for computing the covariance function using the sympy objects as input."""
|
||||||
|
#TODO: maybe generate one C function only to save compile time? Also easier to take that as a basis and hand craft other covariances??
|
||||||
|
|
||||||
|
#generate c functions from sympy objects
|
||||||
|
argument_sequence = self._sp_x+self._sp_z+self._sp_theta
|
||||||
|
code_list = [('k',self._sp_k)]
|
||||||
|
# gradients with respect to covariance input
|
||||||
|
code_list += [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]
|
||||||
|
# gradient with respect to parameters
|
||||||
|
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]
|
||||||
|
# gradient with respect to multiple output parameters
|
||||||
|
if self.output_dim > 1:
|
||||||
|
argument_sequence += self._sp_theta_i + self._sp_theta_j
|
||||||
|
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)]
|
||||||
|
(foo_c,self._function_code), (foo_h,self._function_header) = \
|
||||||
|
codegen(code_list, "C",'foobar',argument_sequence=argument_sequence)
|
||||||
|
#put the header file where we can find it
|
||||||
|
f = file(os.path.join(tempfile.gettempdir(),'foobar.h'),'w')
|
||||||
|
f.write(self._function_header)
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
# Substitute any known derivatives which sympy doesn't compute
|
||||||
|
self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
|
||||||
|
|
||||||
|
|
||||||
|
############################################################
|
||||||
|
# This is the basic argument construction for the C code. #
|
||||||
|
############################################################
|
||||||
|
|
||||||
|
arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x]
|
||||||
|
+ ["Z2(j, %s)"%z.name[2:] for z in self._sp_z])
|
||||||
|
|
||||||
|
# for multiple outputs need to also provide these arguments reversed.
|
||||||
|
if self.output_dim>1:
|
||||||
|
reverse_arg_list = list(arg_list)
|
||||||
|
reverse_arg_list.reverse()
|
||||||
|
|
||||||
|
# Add in any 'shared' parameters to the list.
|
||||||
|
param_arg_list = [shared_params.name for shared_params in self._sp_theta]
|
||||||
|
arg_list += param_arg_list
|
||||||
|
|
||||||
|
precompute_list=[]
|
||||||
|
if self.output_dim > 1:
|
||||||
|
reverse_arg_list+=list(param_arg_list)
|
||||||
|
split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i]
|
||||||
|
split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i]
|
||||||
|
arg_list += split_param_arg_list
|
||||||
|
reverse_arg_list += split_param_reverse_arg_list
|
||||||
|
# Extract the right output indices from the inputs.
|
||||||
|
c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])]
|
||||||
|
precompute_list += c_define_output_indices
|
||||||
|
reverse_arg_string = ", ".join(reverse_arg_list)
|
||||||
|
arg_string = ", ".join(arg_list)
|
||||||
|
precompute_string = "\n".join(precompute_list)
|
||||||
|
|
||||||
|
# Code to compute argments string needed when only X is provided.
|
||||||
|
X_arg_string = re.sub('Z','X',arg_string)
|
||||||
|
# Code to compute argument string when only diagonal is required.
|
||||||
|
diag_arg_string = re.sub('int jj','//int jj',X_arg_string)
|
||||||
|
diag_arg_string = re.sub('j','i',diag_arg_string)
|
||||||
|
diag_precompute_string = precompute_list[0]
|
||||||
|
|
||||||
|
|
||||||
|
# Here's the code to do the looping for K
|
||||||
|
self._K_code =\
|
||||||
|
"""
|
||||||
|
// _K_code
|
||||||
|
// Code for computing the covariance function.
|
||||||
|
int i;
|
||||||
|
int j;
|
||||||
|
int N = target_array->dimensions[0];
|
||||||
|
int num_inducing = target_array->dimensions[1];
|
||||||
|
int input_dim = X_array->dimensions[1];
|
||||||
|
//#pragma omp parallel for private(j)
|
||||||
|
for (i=0;i<N;i++){
|
||||||
|
for (j=0;j<num_inducing;j++){
|
||||||
|
%s
|
||||||
|
//target[i*num_inducing+j] =
|
||||||
|
TARGET2(i, j) += k(%s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
%s
|
||||||
|
"""%(precompute_string,arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||||
|
|
||||||
|
self._K_code_X = """
|
||||||
|
// _K_code_X
|
||||||
|
// Code for computing the covariance function.
|
||||||
|
int i;
|
||||||
|
int j;
|
||||||
|
int N = target_array->dimensions[0];
|
||||||
|
int num_inducing = target_array->dimensions[1];
|
||||||
|
int input_dim = X_array->dimensions[1];
|
||||||
|
//#pragma omp parallel for private(j)
|
||||||
|
for (i=0;i<N;i++){
|
||||||
|
%s // int ii=(int)X2(i, 1);
|
||||||
|
TARGET2(i, i) += k(%s);
|
||||||
|
for (j=0;j<i;j++){
|
||||||
|
%s //int jj=(int)X2(j, 1);
|
||||||
|
double kval = k(%s); //double kval = k(X2(i, 0), X2(j, 0), shared_lengthscale, LENGTHSCALE1(ii), SCALE1(ii), LENGTHSCALE1(jj), SCALE1(jj));
|
||||||
|
TARGET2(i, j) += kval;
|
||||||
|
TARGET2(j, i) += kval;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*%s*/
|
||||||
|
"""%(diag_precompute_string, diag_arg_string, re.sub('Z2', 'X2', precompute_list[1]), X_arg_string,str(self._sp_k)) #adding a string representation forces recompile when needed
|
||||||
|
|
||||||
|
# Code to do the looping for Kdiag
|
||||||
|
self._Kdiag_code =\
|
||||||
|
"""
|
||||||
|
// _Kdiag_code
|
||||||
|
// Code for computing diagonal of covariance function.
|
||||||
|
int i;
|
||||||
|
int N = target_array->dimensions[0];
|
||||||
|
int input_dim = X_array->dimensions[1];
|
||||||
|
//#pragma omp parallel for
|
||||||
|
for (i=0;i<N;i++){
|
||||||
|
%s
|
||||||
|
//target[i] =
|
||||||
|
TARGET1(i)=k(%s);
|
||||||
|
}
|
||||||
|
%s
|
||||||
|
"""%(diag_precompute_string,diag_arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||||
|
|
||||||
|
# Code to compute gradients
|
||||||
|
grad_func_list = []
|
||||||
|
if self.output_dim>1:
|
||||||
|
grad_func_list += c_define_output_indices
|
||||||
|
grad_func_list += [' '*16 + 'TARGET1(%i+ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)]
|
||||||
|
grad_func_list += [' '*16 + 'TARGET1(%i+jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)]
|
||||||
|
grad_func_list += ([' '*16 + 'TARGET1(%i) += PARTIAL2(i, j)*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in enumerate(self._sp_theta)])
|
||||||
|
grad_func_string = '\n'.join(grad_func_list)
|
||||||
|
|
||||||
|
self._dK_dtheta_code =\
|
||||||
|
"""
|
||||||
|
// _dK_dtheta_code
|
||||||
|
// Code for computing gradient of covariance with respect to parameters.
|
||||||
|
int i;
|
||||||
|
int j;
|
||||||
|
int N = partial_array->dimensions[0];
|
||||||
|
int num_inducing = partial_array->dimensions[1];
|
||||||
|
int input_dim = X_array->dimensions[1];
|
||||||
|
//#pragma omp parallel for private(j)
|
||||||
|
for (i=0;i<N;i++){
|
||||||
|
for (j=0;j<num_inducing;j++){
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
%s
|
||||||
|
"""%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
|
||||||
|
|
||||||
|
|
||||||
|
# Code to compute gradients for Kdiag TODO: needs clean up
|
||||||
|
diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
|
||||||
|
diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
|
||||||
|
diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
|
||||||
|
diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL1(i)',diag_grad_func_string)
|
||||||
|
self._dKdiag_dtheta_code =\
|
||||||
|
"""
|
||||||
|
// _dKdiag_dtheta_code
|
||||||
|
// Code for computing gradient of diagonal with respect to parameters.
|
||||||
|
int i;
|
||||||
|
int N = partial_array->dimensions[0];
|
||||||
|
int input_dim = X_array->dimensions[1];
|
||||||
|
for (i=0;i<N;i++){
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
%s
|
||||||
|
"""%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||||
|
|
||||||
|
# Code for gradients wrt X, TODO: may need to deal with special case where one input is actually an output.
|
||||||
|
gradX_func_list = []
|
||||||
|
if self.output_dim>1:
|
||||||
|
gradX_func_list += c_define_output_indices
|
||||||
|
gradX_func_list += ["TARGET2(i, %i) += PARTIAL2(i, j)*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)]
|
||||||
|
gradX_func_string = "\n".join(gradX_func_list)
|
||||||
|
|
||||||
|
self._dK_dX_code = \
|
||||||
|
"""
|
||||||
|
// _dK_dX_code
|
||||||
|
// Code for computing gradient of covariance with respect to inputs.
|
||||||
|
int i;
|
||||||
|
int j;
|
||||||
|
int N = partial_array->dimensions[0];
|
||||||
|
int num_inducing = partial_array->dimensions[1];
|
||||||
|
int input_dim = X_array->dimensions[1];
|
||||||
|
//#pragma omp parallel for private(j)
|
||||||
|
for (i=0;i<N; i++){
|
||||||
|
for (j=0; j<num_inducing; j++){
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
%s
|
||||||
|
"""%(gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||||
|
|
||||||
|
|
||||||
|
diag_gradX_func_string = re.sub('Z','X',gradX_func_string,count=0)
|
||||||
|
diag_gradX_func_string = re.sub('int jj','//int jj',diag_gradX_func_string)
|
||||||
|
diag_gradX_func_string = re.sub('j','i',diag_gradX_func_string)
|
||||||
|
diag_gradX_func_string = re.sub('PARTIAL2\(i, i\)','2*PARTIAL1(i)',diag_gradX_func_string)
|
||||||
|
|
||||||
|
# Code for gradients of Kdiag wrt X
|
||||||
|
self._dKdiag_dX_code= \
|
||||||
|
"""
|
||||||
|
// _dKdiag_dX_code
|
||||||
|
// Code for computing gradient of diagonal with respect to inputs.
|
||||||
|
int N = partial_array->dimensions[0];
|
||||||
|
int input_dim = X_array->dimensions[1];
|
||||||
|
for (int i=0;i<N; i++){
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
%s
|
||||||
|
"""%(diag_gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a
|
||||||
|
# string representation forces recompile when needed Get rid
|
||||||
|
# of Zs in argument for diagonal. TODO: Why wasn't
|
||||||
|
# diag_func_string called here? Need to check that.
|
||||||
|
#self._dKdiag_dX_code = self._dKdiag_dX_code.replace('Z[j', 'X[i')
|
||||||
|
|
||||||
|
# Code to use when only X is provided.
|
||||||
|
self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[')
|
||||||
|
self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= partial[', '+= 2*partial[')
|
||||||
|
self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z2(', 'X2(')
|
||||||
|
self._dK_dX_code_X = self._dK_dX_code.replace('Z2(', 'X2(')
|
||||||
|
|
||||||
|
|
||||||
|
#TODO: insert multiple functions here via string manipulation
|
||||||
|
#TODO: similar functions for psi_stats
|
||||||
|
def _get_arg_names(self, Z=None, partial=None):
|
||||||
|
arg_names = ['target','X']
|
||||||
|
for shared_params in self._sp_theta:
|
||||||
|
arg_names += [shared_params.name]
|
||||||
|
if Z is not None:
|
||||||
|
arg_names += ['Z']
|
||||||
|
if partial is not None:
|
||||||
|
arg_names += ['partial']
|
||||||
|
if self.output_dim>1:
|
||||||
|
arg_names += self._split_theta_names
|
||||||
|
arg_names += ['output_dim']
|
||||||
|
return arg_names
|
||||||
|
|
||||||
|
def _weave_inline(self, code, X, target, Z=None, partial=None):
|
||||||
|
output_dim = self.output_dim
|
||||||
|
for shared_params in self._sp_theta:
|
||||||
|
locals()[shared_params.name] = getattr(self, shared_params.name)
|
||||||
|
|
||||||
|
# Need to extract parameters first
|
||||||
|
for split_params in self._split_theta_names:
|
||||||
|
locals()[split_params] = getattr(self, split_params)
|
||||||
|
arg_names = self._get_arg_names(Z, partial)
|
||||||
|
weave.inline(code=code, arg_names=arg_names,**self.weave_kwargs)
|
||||||
|
|
||||||
|
def K(self,X,Z,target):
|
||||||
|
if Z is None:
|
||||||
|
self._weave_inline(self._K_code_X, X, target)
|
||||||
|
else:
|
||||||
|
self._weave_inline(self._K_code, X, target, Z)
|
||||||
|
|
||||||
|
|
||||||
|
def Kdiag(self,X,target):
|
||||||
|
self._weave_inline(self._Kdiag_code, X, target)
|
||||||
|
|
||||||
|
def dK_dtheta(self,partial,X,Z,target):
|
||||||
|
if Z is None:
|
||||||
|
self._weave_inline(self._dK_dtheta_code_X, X, target, Z, partial)
|
||||||
|
else:
|
||||||
|
self._weave_inline(self._dK_dtheta_code, X, target, Z, partial)
|
||||||
|
|
||||||
|
def dKdiag_dtheta(self,partial,X,target):
|
||||||
|
self._weave_inline(self._dKdiag_dtheta_code, X, target, Z=None, partial=partial)
|
||||||
|
|
||||||
|
def dK_dX(self,partial,X,Z,target):
|
||||||
|
if Z is None:
|
||||||
|
self._weave_inline(self._dK_dX_code_X, X, target, Z, partial)
|
||||||
|
else:
|
||||||
|
self._weave_inline(self._dK_dX_code, X, target, Z, partial)
|
||||||
|
|
||||||
|
def dKdiag_dX(self,partial,X,target):
|
||||||
|
self._weave.inline(self._dKdiag_dX_code, X, target, Z, partial)
|
||||||
|
|
||||||
def compute_psi_stats(self):
|
def compute_psi_stats(self):
|
||||||
#define some normal distributions
|
#define some normal distributions
|
||||||
mus = [sp.var('mu%i'%i,real=True) for i in range(self.input_dim)]
|
mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)]
|
||||||
Ss = [sp.var('S%i'%i,positive=True) for i in range(self.input_dim)]
|
Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)]
|
||||||
normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
|
normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
|
||||||
|
|
||||||
#do some integration!
|
#do some integration!
|
||||||
|
|
@ -99,188 +424,29 @@ class spkern(Kernpart):
|
||||||
self._sp_psi2 = self._sp_psi2.simplify()
|
self._sp_psi2 = self._sp_psi2.simplify()
|
||||||
|
|
||||||
|
|
||||||
def _gen_code(self):
|
|
||||||
#generate c functions from sympy objects
|
|
||||||
(foo_c,self._function_code),(foo_h,self._function_header) = \
|
|
||||||
codegen([('k',self._sp_k)] \
|
|
||||||
+ [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]\
|
|
||||||
#+ [('dk_d%s'%z.name,dz) for z,dz in zip(self._sp_z,self._sp_dk_dz)]\
|
|
||||||
+ [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]\
|
|
||||||
,"C",'foobar',argument_sequence=self._sp_x+self._sp_z+self._sp_theta)
|
|
||||||
#put the header file where we can find it
|
|
||||||
f = file(os.path.join(tempfile.gettempdir(),'foobar.h'),'w')
|
|
||||||
f.write(self._function_header)
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
# Substitute any known derivatives which sympy doesn't compute
|
|
||||||
self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
|
|
||||||
|
|
||||||
# Here's the code to do the looping for K
|
|
||||||
arglist = ", ".join(["X[i*input_dim+%s]"%x.name[1:] for x in self._sp_x]
|
|
||||||
+ ["Z[j*input_dim+%s]"%z.name[1:] for z in self._sp_z]
|
|
||||||
+ ["param[%i]"%i for i in range(self.num_params)])
|
|
||||||
|
|
||||||
|
|
||||||
self._K_code =\
|
|
||||||
"""
|
|
||||||
int i;
|
|
||||||
int j;
|
|
||||||
int N = target_array->dimensions[0];
|
|
||||||
int num_inducing = target_array->dimensions[1];
|
|
||||||
int input_dim = X_array->dimensions[1];
|
|
||||||
//#pragma omp parallel for private(j)
|
|
||||||
for (i=0;i<N;i++){
|
|
||||||
for (j=0;j<num_inducing;j++){
|
|
||||||
target[i*num_inducing+j] = k(%s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
%s
|
|
||||||
"""%(arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
|
||||||
|
|
||||||
# Similar code when only X is provided.
|
|
||||||
self._K_code_X = self._K_code.replace('Z[', 'X[')
|
|
||||||
|
|
||||||
|
|
||||||
# Code to compute diagonal of covariance.
|
|
||||||
diag_arglist = re.sub('Z','X',arglist)
|
|
||||||
diag_arglist = re.sub('j','i',diag_arglist)
|
|
||||||
# Code to do the looping for Kdiag
|
|
||||||
self._Kdiag_code =\
|
|
||||||
"""
|
|
||||||
int i;
|
|
||||||
int N = target_array->dimensions[0];
|
|
||||||
int input_dim = X_array->dimensions[1];
|
|
||||||
//#pragma omp parallel for
|
|
||||||
for (i=0;i<N;i++){
|
|
||||||
target[i] = k(%s);
|
|
||||||
}
|
|
||||||
%s
|
|
||||||
"""%(diag_arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
|
||||||
|
|
||||||
# Code to compute gradients
|
|
||||||
funclist = '\n'.join([' '*16 + 'target[%i] += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arglist) for i,theta in enumerate(self._sp_theta)])
|
|
||||||
|
|
||||||
self._dK_dtheta_code =\
|
|
||||||
"""
|
|
||||||
int i;
|
|
||||||
int j;
|
|
||||||
int N = partial_array->dimensions[0];
|
|
||||||
int num_inducing = partial_array->dimensions[1];
|
|
||||||
int input_dim = X_array->dimensions[1];
|
|
||||||
//#pragma omp parallel for private(j)
|
|
||||||
for (i=0;i<N;i++){
|
|
||||||
for (j=0;j<num_inducing;j++){
|
|
||||||
%s
|
|
||||||
}
|
|
||||||
}
|
|
||||||
%s
|
|
||||||
"""%(funclist,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
|
|
||||||
|
|
||||||
# Similar code when only X is provided, change argument lists.
|
|
||||||
self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[')
|
|
||||||
|
|
||||||
# Code to compute gradients for Kdiag TODO: needs clean up
|
|
||||||
diag_funclist = re.sub('Z','X',funclist,count=0)
|
|
||||||
diag_funclist = re.sub('j','i',diag_funclist)
|
|
||||||
diag_funclist = re.sub('partial\[i\*num_inducing\+i\]','partial[i]',diag_funclist)
|
|
||||||
self._dKdiag_dtheta_code =\
|
|
||||||
"""
|
|
||||||
int i;
|
|
||||||
int N = partial_array->dimensions[0];
|
|
||||||
int input_dim = X_array->dimensions[1];
|
|
||||||
for (i=0;i<N;i++){
|
|
||||||
%s
|
|
||||||
}
|
|
||||||
%s
|
|
||||||
"""%(diag_funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
|
||||||
|
|
||||||
# Code for gradients wrt X
|
|
||||||
gradient_funcs = "\n".join(["target[i*input_dim+%i] += partial[i*num_inducing+j]*dk_dx%i(%s);"%(q,q,arglist) for q in range(self.input_dim)])
|
|
||||||
if False:
|
|
||||||
gradient_funcs += """if(isnan(target[i*input_dim+2])){printf("%%f\\n",dk_dx2(X[i*input_dim+0], X[i*input_dim+1], X[i*input_dim+2], Z[j*input_dim+0], Z[j*input_dim+1], Z[j*input_dim+2], param[0], param[1], param[2], param[3], param[4], param[5]));}
|
|
||||||
if(isnan(target[i*input_dim+2])){printf("%%f,%%f,%%i,%%i\\n", X[i*input_dim+2], Z[j*input_dim+2],i,j);}"""
|
|
||||||
|
|
||||||
self._dK_dX_code = \
|
|
||||||
"""
|
|
||||||
int i;
|
|
||||||
int j;
|
|
||||||
int N = partial_array->dimensions[0];
|
|
||||||
int num_inducing = partial_array->dimensions[1];
|
|
||||||
int input_dim = X_array->dimensions[1];
|
|
||||||
//#pragma omp parallel for private(j)
|
|
||||||
for (i=0;i<N; i++){
|
|
||||||
for (j=0; j<num_inducing; j++){
|
|
||||||
%s
|
|
||||||
}
|
|
||||||
}
|
|
||||||
%s
|
|
||||||
"""%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
|
||||||
|
|
||||||
# Create code for call when just X is passed as argument.
|
|
||||||
self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= partial[', '+= 2*partial[')
|
|
||||||
|
|
||||||
diag_gradient_funcs = re.sub('Z','X',gradient_funcs,count=0)
|
|
||||||
diag_gradient_funcs = re.sub('j','i',diag_gradient_funcs)
|
|
||||||
diag_gradient_funcs = re.sub('partial\[i\*num_inducing\+i\]','2*partial[i]',diag_gradient_funcs)
|
|
||||||
|
|
||||||
# Code for gradients of Kdiag wrt X
|
|
||||||
self._dKdiag_dX_code= \
|
|
||||||
"""
|
|
||||||
int N = partial_array->dimensions[0];
|
|
||||||
int input_dim = X_array->dimensions[1];
|
|
||||||
for (int i=0;i<N; i++){
|
|
||||||
%s
|
|
||||||
}
|
|
||||||
%s
|
|
||||||
"""%(diag_gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a
|
|
||||||
# string representation forces recompile when needed Get rid
|
|
||||||
# of Zs in argument for diagonal. TODO: Why wasn't
|
|
||||||
# diag_funclist called here? Need to check that.
|
|
||||||
#self._dKdiag_dX_code = self._dKdiag_dX_code.replace('Z[j', 'X[i')
|
|
||||||
|
|
||||||
|
|
||||||
#TODO: insert multiple functions here via string manipulation
|
|
||||||
#TODO: similar functions for psi_stats
|
|
||||||
|
|
||||||
def K(self,X,Z,target):
|
|
||||||
param = self._param
|
|
||||||
if Z is None:
|
|
||||||
weave.inline(self._K_code_X,arg_names=['target','X','param'],**self.weave_kwargs)
|
|
||||||
else:
|
|
||||||
weave.inline(self._K_code,arg_names=['target','X','Z','param'],**self.weave_kwargs)
|
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
|
||||||
param = self._param
|
|
||||||
weave.inline(self._Kdiag_code,arg_names=['target','X','param'],**self.weave_kwargs)
|
|
||||||
|
|
||||||
def dK_dtheta(self,partial,X,Z,target):
|
|
||||||
param = self._param
|
|
||||||
if Z is None:
|
|
||||||
weave.inline(self._dK_dtheta_code_X, arg_names=['target','X','param','partial'],**self.weave_kwargs)
|
|
||||||
else:
|
|
||||||
weave.inline(self._dK_dtheta_code, arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
|
|
||||||
|
|
||||||
def dKdiag_dtheta(self,partial,X,target):
|
|
||||||
param = self._param
|
|
||||||
weave.inline(self._dKdiag_dtheta_code,arg_names=['target','X','param','partial'],**self.weave_kwargs)
|
|
||||||
|
|
||||||
def dK_dX(self,partial,X,Z,target):
|
|
||||||
param = self._param
|
|
||||||
if Z is None:
|
|
||||||
weave.inline(self._dK_dX_code_X,arg_names=['target','X','param','partial'],**self.weave_kwargs)
|
|
||||||
else:
|
|
||||||
weave.inline(self._dK_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
|
|
||||||
|
|
||||||
def dKdiag_dX(self,partial,X,target):
|
|
||||||
param = self._param
|
|
||||||
weave.inline(self._dKdiag_dX_code,arg_names=['target','X','param','partial'],**self.weave_kwargs)
|
|
||||||
|
|
||||||
def _set_params(self,param):
|
def _set_params(self,param):
|
||||||
#print param.flags['C_CONTIGUOUS']
|
assert param.size == (self.num_params)
|
||||||
self._param = param.copy()
|
for i, shared_params in enumerate(self._sp_theta):
|
||||||
|
setattr(self, shared_params.name, param[i])
|
||||||
|
|
||||||
|
if self.output_dim>1:
|
||||||
|
for i, split_params in enumerate(self._split_theta_names):
|
||||||
|
start = self.num_shared_params + i*self.output_dim
|
||||||
|
end = self.num_shared_params + (i+1)*self.output_dim
|
||||||
|
setattr(self, split_params, param[start:end])
|
||||||
|
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
return self._param
|
params = np.zeros(0)
|
||||||
|
for shared_params in self._sp_theta:
|
||||||
|
params = np.hstack((params, getattr(self, shared_params.name)))
|
||||||
|
if self.output_dim>1:
|
||||||
|
for split_params in self._split_theta_names:
|
||||||
|
params = np.hstack((params, getattr(self, split_params).flatten()))
|
||||||
|
return params
|
||||||
|
|
||||||
def _get_param_names(self):
|
def _get_param_names(self):
|
||||||
return [x.name for x in self._sp_theta]
|
if self.output_dim>1:
|
||||||
|
return [x.name for x in self._sp_theta] + [x.name[:-2] + str(i) for x in self._sp_theta_i for i in range(self.output_dim)]
|
||||||
|
else:
|
||||||
|
return [x.name for x in self._sp_theta]
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
from ep import EP
|
from ep import EP
|
||||||
|
from laplace import Laplace
|
||||||
from ep_mixed_noise import EP_Mixed_Noise
|
from ep_mixed_noise import EP_Mixed_Noise
|
||||||
from gaussian import Gaussian
|
from gaussian import Gaussian
|
||||||
from gaussian_mixed_noise import Gaussian_Mixed_Noise
|
from gaussian_mixed_noise import Gaussian_Mixed_Noise
|
||||||
|
import noise_models
|
||||||
from noise_model_constructors import *
|
from noise_model_constructors import *
|
||||||
# TODO: from Laplace import Laplace
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,8 +18,7 @@ class EP(likelihood):
|
||||||
self.data = data
|
self.data = data
|
||||||
self.num_data, self.output_dim = self.data.shape
|
self.num_data, self.output_dim = self.data.shape
|
||||||
self.is_heteroscedastic = True
|
self.is_heteroscedastic = True
|
||||||
self.Nparams = 0
|
self.num_params = 0
|
||||||
self._transf_data = self.noise_model._preprocess_values(data)
|
|
||||||
|
|
||||||
#Initial values - Likelihood approximation parameters:
|
#Initial values - Likelihood approximation parameters:
|
||||||
#p(y|f) = t(f|tau_tilde,v_tilde)
|
#p(y|f) = t(f|tau_tilde,v_tilde)
|
||||||
|
|
@ -55,6 +54,22 @@ class EP(likelihood):
|
||||||
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
|
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
|
||||||
return self.noise_model.predictive_values(mu,var)
|
return self.noise_model.predictive_values(mu,var)
|
||||||
|
|
||||||
|
def log_predictive_density(self, y_test, mu_star, var_star):
|
||||||
|
"""
|
||||||
|
Calculation of the log predictive density
|
||||||
|
|
||||||
|
.. math:
|
||||||
|
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||||
|
|
||||||
|
:param y_test: test observations (y_{*})
|
||||||
|
:type y_test: (Nx1) array
|
||||||
|
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||||
|
:type mu_star: (Nx1) array
|
||||||
|
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||||
|
:type var_star: (Nx1) array
|
||||||
|
"""
|
||||||
|
return self.noise_model.log_predictive_density(y_test, mu_star, var_star)
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
#return np.zeros(0)
|
#return np.zeros(0)
|
||||||
return self.noise_model._get_params()
|
return self.noise_model._get_params()
|
||||||
|
|
@ -134,7 +149,7 @@ class EP(likelihood):
|
||||||
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
|
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
|
||||||
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
|
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
|
||||||
#Marginal moments
|
#Marginal moments
|
||||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
|
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
|
||||||
#Site parameters update
|
#Site parameters update
|
||||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
|
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
|
||||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
|
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
|
||||||
|
|
@ -233,7 +248,7 @@ class EP(likelihood):
|
||||||
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
||||||
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
||||||
#Marginal moments
|
#Marginal moments
|
||||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
|
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
|
||||||
#Site parameters update
|
#Site parameters update
|
||||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
||||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
||||||
|
|
@ -336,7 +351,7 @@ class EP(likelihood):
|
||||||
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
||||||
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
||||||
#Marginal moments
|
#Marginal moments
|
||||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
|
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
|
||||||
#Site parameters update
|
#Site parameters update
|
||||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
||||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ class EP_Mixed_Noise(likelihood):
|
||||||
self.data = np.vstack(data_list)
|
self.data = np.vstack(data_list)
|
||||||
self.N, self.output_dim = self.data.shape
|
self.N, self.output_dim = self.data.shape
|
||||||
self.is_heteroscedastic = True
|
self.is_heteroscedastic = True
|
||||||
self.Nparams = 0#FIXME
|
self.num_params = 0#FIXME
|
||||||
self._transf_data = np.vstack([noise_model._preprocess_values(data) for noise_model,data in zip(noise_model_list,data_list)])
|
self._transf_data = np.vstack([noise_model._preprocess_values(data) for noise_model,data in zip(noise_model_list,data_list)])
|
||||||
#TODO non-gaussian index
|
#TODO non-gaussian index
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ class Gaussian(likelihood):
|
||||||
"""
|
"""
|
||||||
def __init__(self, data, variance=1., normalize=False):
|
def __init__(self, data, variance=1., normalize=False):
|
||||||
self.is_heteroscedastic = False
|
self.is_heteroscedastic = False
|
||||||
self.Nparams = 1
|
self.num_params = 1
|
||||||
self.Z = 0. # a correction factor which accounts for the approximation made
|
self.Z = 0. # a correction factor which accounts for the approximation made
|
||||||
N, self.output_dim = data.shape
|
N, self.output_dim = data.shape
|
||||||
|
|
||||||
|
|
@ -90,11 +90,25 @@ class Gaussian(likelihood):
|
||||||
_95pc = mean + 2.*np.sqrt(true_var)
|
_95pc = mean + 2.*np.sqrt(true_var)
|
||||||
return mean, true_var, _5pc, _95pc
|
return mean, true_var, _5pc, _95pc
|
||||||
|
|
||||||
def fit_full(self):
|
def log_predictive_density(self, y_test, mu_star, var_star):
|
||||||
"""
|
"""
|
||||||
No approximations needed
|
Calculation of the log predictive density
|
||||||
|
|
||||||
|
.. math:
|
||||||
|
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||||
|
|
||||||
|
:param y_test: test observations (y_{*})
|
||||||
|
:type y_test: (Nx1) array
|
||||||
|
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||||
|
:type mu_star: (Nx1) array
|
||||||
|
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||||
|
:type var_star: (Nx1) array
|
||||||
|
|
||||||
|
.. Note:
|
||||||
|
Works as if each test point was provided individually, i.e. not full_cov
|
||||||
"""
|
"""
|
||||||
pass
|
y_rescaled = (y_test - self._offset)/self._scale
|
||||||
|
return -0.5*np.log(2*np.pi) -0.5*np.log(var_star + self._variance) -0.5*(np.square(y_rescaled - mu_star))/(var_star + self._variance)
|
||||||
|
|
||||||
def _gradients(self, partial):
|
def _gradients(self, partial):
|
||||||
return np.sum(partial)
|
return np.sum(partial)
|
||||||
|
|
|
||||||
|
|
@ -23,14 +23,14 @@ class Gaussian_Mixed_Noise(likelihood):
|
||||||
:type normalize: False|True
|
:type normalize: False|True
|
||||||
"""
|
"""
|
||||||
def __init__(self, data_list, noise_params=None, normalize=True):
|
def __init__(self, data_list, noise_params=None, normalize=True):
|
||||||
self.Nparams = len(data_list)
|
self.num_params = len(data_list)
|
||||||
self.n_list = [data.size for data in data_list]
|
self.n_list = [data.size for data in data_list]
|
||||||
self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.Nparams),self.n_list)])
|
self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.num_params),self.n_list)])
|
||||||
|
|
||||||
if noise_params is None:
|
if noise_params is None:
|
||||||
noise_params = [1.] * self.Nparams
|
noise_params = [1.] * self.num_params
|
||||||
else:
|
else:
|
||||||
assert self.Nparams == len(noise_params), 'Number of noise parameters does not match the number of noise models.'
|
assert self.num_params == len(noise_params), 'Number of noise parameters does not match the number of noise models.'
|
||||||
|
|
||||||
self.noise_model_list = [Gaussian(Y,variance=v,normalize = normalize) for Y,v in zip(data_list,noise_params)]
|
self.noise_model_list = [Gaussian(Y,variance=v,normalize = normalize) for Y,v in zip(data_list,noise_params)]
|
||||||
self.n_params = [noise_model._get_params().size for noise_model in self.noise_model_list]
|
self.n_params = [noise_model._get_params().size for noise_model in self.noise_model_list]
|
||||||
|
|
|
||||||
390
GPy/likelihoods/laplace.py
Normal file
390
GPy/likelihoods/laplace.py
Normal file
|
|
@ -0,0 +1,390 @@
|
||||||
|
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
#
|
||||||
|
#Parts of this file were influenced by the Matlab GPML framework written by
|
||||||
|
#Carl Edward Rasmussen & Hannes Nickisch, however all bugs are our own.
|
||||||
|
#
|
||||||
|
#The GPML code is released under the FreeBSD License.
|
||||||
|
#Copyright (c) 2005-2013 Carl Edward Rasmussen & Hannes Nickisch. All rights reserved.
|
||||||
|
#
|
||||||
|
#The code and associated documentation is available from
|
||||||
|
#http://gaussianprocess.org/gpml/code.
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import scipy as sp
|
||||||
|
from likelihood import likelihood
|
||||||
|
from ..util.linalg import mdot, jitchol, pddet, dpotrs
|
||||||
|
from functools import partial as partial_func
|
||||||
|
|
||||||
|
class Laplace(likelihood):
|
||||||
|
"""Laplace approximation to a posterior"""
|
||||||
|
|
||||||
|
def __init__(self, data, noise_model, extra_data=None):
|
||||||
|
"""
|
||||||
|
Laplace Approximation
|
||||||
|
|
||||||
|
Find the moments \hat{f} and the hessian at this point
|
||||||
|
(using Newton-Raphson) of the unnormalised posterior
|
||||||
|
|
||||||
|
Compute the GP variables (i.e. generate some Y^{squiggle} and
|
||||||
|
z^{squiggle} which makes a gaussian the same as the laplace
|
||||||
|
approximation to the posterior, but normalised
|
||||||
|
|
||||||
|
Arguments
|
||||||
|
---------
|
||||||
|
|
||||||
|
:param data: array of data the likelihood function is approximating
|
||||||
|
:type data: NxD
|
||||||
|
:param noise_model: likelihood function - subclass of noise_model
|
||||||
|
:type noise_model: noise_model
|
||||||
|
:param extra_data: additional data used by some likelihood functions,
|
||||||
|
"""
|
||||||
|
self.data = data
|
||||||
|
self.noise_model = noise_model
|
||||||
|
self.extra_data = extra_data
|
||||||
|
|
||||||
|
#Inital values
|
||||||
|
self.N, self.D = self.data.shape
|
||||||
|
self.is_heteroscedastic = True
|
||||||
|
self.Nparams = 0
|
||||||
|
self.NORMAL_CONST = ((0.5 * self.N) * np.log(2 * np.pi))
|
||||||
|
|
||||||
|
self.restart()
|
||||||
|
likelihood.__init__(self)
|
||||||
|
|
||||||
|
def restart(self):
|
||||||
|
"""
|
||||||
|
Reset likelihood variables to their defaults
|
||||||
|
"""
|
||||||
|
#Initial values for the GP variables
|
||||||
|
self.Y = np.zeros((self.N, 1))
|
||||||
|
self.covariance_matrix = np.eye(self.N)
|
||||||
|
self.precision = np.ones(self.N)[:, None]
|
||||||
|
self.Z = 0
|
||||||
|
self.YYT = None
|
||||||
|
|
||||||
|
self.old_Ki_f = None
|
||||||
|
|
||||||
|
def predictive_values(self, mu, var, full_cov):
|
||||||
|
if full_cov:
|
||||||
|
raise NotImplementedError("Cannot make correlated predictions\
|
||||||
|
with an Laplace likelihood")
|
||||||
|
return self.noise_model.predictive_values(mu, var)
|
||||||
|
|
||||||
|
def log_predictive_density(self, y_test, mu_star, var_star):
|
||||||
|
"""
|
||||||
|
Calculation of the log predictive density
|
||||||
|
|
||||||
|
.. math:
|
||||||
|
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||||
|
|
||||||
|
:param y_test: test observations (y_{*})
|
||||||
|
:type y_test: (Nx1) array
|
||||||
|
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||||
|
:type mu_star: (Nx1) array
|
||||||
|
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||||
|
:type var_star: (Nx1) array
|
||||||
|
"""
|
||||||
|
return self.noise_model.log_predictive_density(y_test, mu_star, var_star)
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
return np.asarray(self.noise_model._get_params())
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
return self.noise_model._get_param_names()
|
||||||
|
|
||||||
|
def _set_params(self, p):
|
||||||
|
return self.noise_model._set_params(p)
|
||||||
|
|
||||||
|
def _shared_gradients_components(self):
|
||||||
|
d3lik_d3fhat = self.noise_model.d3logpdf_df3(self.f_hat, self.data, extra_data=self.extra_data)
|
||||||
|
dL_dfhat = 0.5*(np.diag(self.Ki_W_i)[:, None]*d3lik_d3fhat).T #why isn't this -0.5?
|
||||||
|
I_KW_i = np.eye(self.N) - np.dot(self.K, self.Wi_K_i)
|
||||||
|
return dL_dfhat, I_KW_i
|
||||||
|
|
||||||
|
def _Kgradients(self):
|
||||||
|
"""
|
||||||
|
Gradients with respect to prior kernel parameters dL_dK to be chained
|
||||||
|
with dK_dthetaK to give dL_dthetaK
|
||||||
|
:returns: dL_dK matrix
|
||||||
|
:rtype: Matrix (1 x num_kernel_params)
|
||||||
|
"""
|
||||||
|
dL_dfhat, I_KW_i = self._shared_gradients_components()
|
||||||
|
dlp = self.noise_model.dlogpdf_df(self.f_hat, self.data, extra_data=self.extra_data)
|
||||||
|
|
||||||
|
#Explicit
|
||||||
|
#expl_a = np.dot(self.Ki_f, self.Ki_f.T)
|
||||||
|
#expl_b = self.Wi_K_i
|
||||||
|
#expl = 0.5*expl_a - 0.5*expl_b
|
||||||
|
#dL_dthetaK_exp = dK_dthetaK(expl, X)
|
||||||
|
|
||||||
|
#Implicit
|
||||||
|
impl = mdot(dlp, dL_dfhat, I_KW_i)
|
||||||
|
|
||||||
|
#No longer required as we are computing these in the gp already
|
||||||
|
#otherwise we would take them away and add them back
|
||||||
|
#dL_dthetaK_imp = dK_dthetaK(impl, X)
|
||||||
|
#dL_dthetaK = dL_dthetaK_exp + dL_dthetaK_imp
|
||||||
|
#dL_dK = expl + impl
|
||||||
|
|
||||||
|
#No need to compute explicit as we are computing dZ_dK to account
|
||||||
|
#for the difference between the K gradients of a normal GP,
|
||||||
|
#and the K gradients including the implicit part
|
||||||
|
dL_dK = impl
|
||||||
|
return dL_dK
|
||||||
|
|
||||||
|
def _gradients(self, partial):
|
||||||
|
"""
|
||||||
|
Gradients with respect to likelihood parameters (dL_dthetaL)
|
||||||
|
|
||||||
|
:param partial: Not needed by this likelihood
|
||||||
|
:type partial: lambda function
|
||||||
|
:rtype: array of derivatives (1 x num_likelihood_params)
|
||||||
|
"""
|
||||||
|
dL_dfhat, I_KW_i = self._shared_gradients_components()
|
||||||
|
dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = self.noise_model._laplace_gradients(self.f_hat, self.data, extra_data=self.extra_data)
|
||||||
|
|
||||||
|
#len(dlik_dthetaL)
|
||||||
|
num_params = len(self._get_param_names())
|
||||||
|
# make space for one derivative for each likelihood parameter
|
||||||
|
dL_dthetaL = np.zeros(num_params)
|
||||||
|
for thetaL_i in range(num_params):
|
||||||
|
#Explicit
|
||||||
|
dL_dthetaL_exp = ( np.sum(dlik_dthetaL[:, thetaL_i])
|
||||||
|
#- 0.5*np.trace(mdot(self.Ki_W_i, (self.K, np.diagflat(dlik_hess_dthetaL[thetaL_i]))))
|
||||||
|
+ np.dot(0.5*np.diag(self.Ki_W_i)[:,None].T, dlik_hess_dthetaL[:, thetaL_i])
|
||||||
|
)
|
||||||
|
|
||||||
|
#Implicit
|
||||||
|
dfhat_dthetaL = mdot(I_KW_i, self.K, dlik_grad_dthetaL[:, thetaL_i])
|
||||||
|
dL_dthetaL_imp = np.dot(dL_dfhat, dfhat_dthetaL)
|
||||||
|
dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp
|
||||||
|
|
||||||
|
return dL_dthetaL
|
||||||
|
|
||||||
|
def _compute_GP_variables(self):
|
||||||
|
"""
|
||||||
|
Generate data Y which would give the normal distribution identical
|
||||||
|
to the laplace approximation to the posterior, but normalised
|
||||||
|
|
||||||
|
GPy expects a likelihood to be gaussian, so need to caluclate
|
||||||
|
the data Y^{\tilde} that makes the posterior match that found
|
||||||
|
by a laplace approximation to a non-gaussian likelihood but with
|
||||||
|
a gaussian likelihood
|
||||||
|
|
||||||
|
Firstly,
|
||||||
|
The hessian of the unormalised posterior distribution is (K^{-1} + W)^{-1},
|
||||||
|
i.e. z*N(f|f^{\hat}, (K^{-1} + W)^{-1}) but this assumes a non-gaussian likelihood,
|
||||||
|
we wish to find the hessian \Sigma^{\tilde}
|
||||||
|
that has the same curvature but using our new simulated data Y^{\tilde}
|
||||||
|
i.e. we do N(Y^{\tilde}|f^{\hat}, \Sigma^{\tilde})N(f|0, K) = z*N(f|f^{\hat}, (K^{-1} + W)^{-1})
|
||||||
|
and we wish to find what Y^{\tilde} and \Sigma^{\tilde}
|
||||||
|
We find that Y^{\tilde} = W^{-1}(K^{-1} + W)f^{\hat} and \Sigma^{tilde} = W^{-1}
|
||||||
|
|
||||||
|
Secondly,
|
||||||
|
GPy optimizes the log marginal log p(y) = -0.5*ln|K+\Sigma^{\tilde}| - 0.5*Y^{\tilde}^{T}(K^{-1} + \Sigma^{tilde})^{-1}Y + lik.Z
|
||||||
|
So we can suck up any differences between that and our log marginal likelihood approximation
|
||||||
|
p^{\squiggle}(y) = -0.5*f^{\hat}K^{-1}f^{\hat} + log p(y|f^{\hat}) - 0.5*log |K||K^{-1} + W|
|
||||||
|
which we want to optimize instead, by equating them and rearranging, the difference is added onto
|
||||||
|
the log p(y) that GPy optimizes by default
|
||||||
|
|
||||||
|
Thirdly,
|
||||||
|
Since we have gradients that depend on how we move f^{\hat}, we have implicit components
|
||||||
|
aswell as the explicit dL_dK, we hold these differences in dZ_dK and add them to dL_dK in the
|
||||||
|
gp.py code
|
||||||
|
"""
|
||||||
|
Wi = 1.0/self.W
|
||||||
|
self.Sigma_tilde = np.diagflat(Wi)
|
||||||
|
|
||||||
|
Y_tilde = Wi*self.Ki_f + self.f_hat
|
||||||
|
|
||||||
|
self.Wi_K_i = self.W12BiW12
|
||||||
|
self.ln_det_Wi_K = pddet(self.Sigma_tilde + self.K)
|
||||||
|
self.lik = self.noise_model.logpdf(self.f_hat, self.data, extra_data=self.extra_data)
|
||||||
|
self.y_Wi_Ki_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde)
|
||||||
|
|
||||||
|
Z_tilde = (+ self.lik
|
||||||
|
- 0.5*self.ln_B_det
|
||||||
|
+ 0.5*self.ln_det_Wi_K
|
||||||
|
- 0.5*self.f_Ki_f
|
||||||
|
+ 0.5*self.y_Wi_Ki_i_y
|
||||||
|
)
|
||||||
|
|
||||||
|
#Convert to float as its (1, 1) and Z must be a scalar
|
||||||
|
self.Z = np.float64(Z_tilde)
|
||||||
|
self.Y = Y_tilde
|
||||||
|
self.YYT = np.dot(self.Y, self.Y.T)
|
||||||
|
self.covariance_matrix = self.Sigma_tilde
|
||||||
|
self.precision = 1.0 / np.diag(self.covariance_matrix)[:, None]
|
||||||
|
|
||||||
|
#Compute dZ_dK which is how the approximated distributions gradients differ from the dL_dK computed for other likelihoods
|
||||||
|
self.dZ_dK = self._Kgradients()
|
||||||
|
#+ 0.5*self.Wi_K_i - 0.5*np.dot(self.Ki_f, self.Ki_f.T) #since we are not adding the K gradients explicit part theres no need to compute this again
|
||||||
|
|
||||||
|
def fit_full(self, K):
|
||||||
|
"""
|
||||||
|
The laplace approximation algorithm, find K and expand hessian
|
||||||
|
For nomenclature see Rasmussen & Williams 2006 - modified for numerical stability
|
||||||
|
|
||||||
|
:param K: Prior covariance matrix evaluated at locations X
|
||||||
|
:type K: NxN matrix
|
||||||
|
"""
|
||||||
|
self.K = K.copy()
|
||||||
|
|
||||||
|
#Find mode
|
||||||
|
self.f_hat = self.rasm_mode(self.K)
|
||||||
|
|
||||||
|
#Compute hessian and other variables at mode
|
||||||
|
self._compute_likelihood_variables()
|
||||||
|
|
||||||
|
#Compute fake variables replicating laplace approximation to posterior
|
||||||
|
self._compute_GP_variables()
|
||||||
|
|
||||||
|
def _compute_likelihood_variables(self):
|
||||||
|
"""
|
||||||
|
Compute the variables required to compute gaussian Y variables
|
||||||
|
"""
|
||||||
|
#At this point get the hessian matrix (or vector as W is diagonal)
|
||||||
|
self.W = -self.noise_model.d2logpdf_df2(self.f_hat, self.data, extra_data=self.extra_data)
|
||||||
|
|
||||||
|
#TODO: Could save on computation when using rasm by returning these, means it isn't just a "mode finder" though
|
||||||
|
self.W12BiW12, self.ln_B_det = self._compute_B_statistics(self.K, self.W, np.eye(self.N))
|
||||||
|
|
||||||
|
self.Ki_f = self.Ki_f
|
||||||
|
self.f_Ki_f = np.dot(self.f_hat.T, self.Ki_f)
|
||||||
|
self.Ki_W_i = self.K - mdot(self.K, self.W12BiW12, self.K)
|
||||||
|
|
||||||
|
def _compute_B_statistics(self, K, W, a):
|
||||||
|
"""
|
||||||
|
Rasmussen suggests the use of a numerically stable positive definite matrix B
|
||||||
|
Which has a positive diagonal element and can be easyily inverted
|
||||||
|
|
||||||
|
:param K: Prior Covariance matrix evaluated at locations X
|
||||||
|
:type K: NxN matrix
|
||||||
|
:param W: Negative hessian at a point (diagonal matrix)
|
||||||
|
:type W: Vector of diagonal values of hessian (1xN)
|
||||||
|
:param a: Matrix to calculate W12BiW12a
|
||||||
|
:type a: Matrix NxN
|
||||||
|
:returns: (W12BiW12, ln_B_det)
|
||||||
|
"""
|
||||||
|
if not self.noise_model.log_concave:
|
||||||
|
#print "Under 1e-10: {}".format(np.sum(W < 1e-10))
|
||||||
|
W[W < 1e-6] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
|
||||||
|
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
|
||||||
|
# To cause the posterior to become less certain than the prior and likelihood,
|
||||||
|
# This is a property only held by non-log-concave likelihoods
|
||||||
|
|
||||||
|
|
||||||
|
#W is diagonal so its sqrt is just the sqrt of the diagonal elements
|
||||||
|
W_12 = np.sqrt(W)
|
||||||
|
B = np.eye(self.N) + W_12*K*W_12.T
|
||||||
|
L = jitchol(B)
|
||||||
|
|
||||||
|
W12BiW12 = W_12*dpotrs(L, np.asfortranarray(W_12*a), lower=1)[0]
|
||||||
|
ln_B_det = 2*np.sum(np.log(np.diag(L)))
|
||||||
|
return W12BiW12, ln_B_det
|
||||||
|
|
||||||
|
def rasm_mode(self, K, MAX_ITER=30):
|
||||||
|
"""
|
||||||
|
Rasmussen's numerically stable mode finding
|
||||||
|
For nomenclature see Rasmussen & Williams 2006
|
||||||
|
Influenced by GPML (BSD) code, all errors are our own
|
||||||
|
|
||||||
|
:param K: Covariance matrix evaluated at locations X
|
||||||
|
:type K: NxD matrix
|
||||||
|
:param MAX_ITER: Maximum number of iterations of newton-raphson before forcing finish of optimisation
|
||||||
|
:type MAX_ITER: scalar
|
||||||
|
:returns: f_hat, mode on which to make laplace approxmiation
|
||||||
|
:rtype: NxD matrix
|
||||||
|
"""
|
||||||
|
#old_Ki_f = np.zeros((self.N, 1))
|
||||||
|
|
||||||
|
#Start f's at zero originally
|
||||||
|
if self.old_Ki_f is None:
|
||||||
|
old_Ki_f = np.zeros((self.N, 1))
|
||||||
|
f = np.dot(K, old_Ki_f)
|
||||||
|
else:
|
||||||
|
#Start at the old best point
|
||||||
|
old_Ki_f = self.old_Ki_f.copy()
|
||||||
|
f = self.f_hat.copy()
|
||||||
|
|
||||||
|
new_obj = -np.inf
|
||||||
|
old_obj = np.inf
|
||||||
|
|
||||||
|
def obj(Ki_f, f):
|
||||||
|
return -0.5*np.dot(Ki_f.T, f) + self.noise_model.logpdf(f, self.data, extra_data=self.extra_data)
|
||||||
|
|
||||||
|
difference = np.inf
|
||||||
|
epsilon = 1e-5
|
||||||
|
#step_size = 1
|
||||||
|
#rs = 0
|
||||||
|
i = 0
|
||||||
|
|
||||||
|
while difference > epsilon and i < MAX_ITER:
|
||||||
|
W = -self.noise_model.d2logpdf_df2(f, self.data, extra_data=self.extra_data)
|
||||||
|
|
||||||
|
W_f = W*f
|
||||||
|
grad = self.noise_model.dlogpdf_df(f, self.data, extra_data=self.extra_data)
|
||||||
|
|
||||||
|
b = W_f + grad
|
||||||
|
W12BiW12Kb, _ = self._compute_B_statistics(K, W.copy(), np.dot(K, b))
|
||||||
|
|
||||||
|
#Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
|
||||||
|
full_step_Ki_f = b - W12BiW12Kb
|
||||||
|
dKi_f = full_step_Ki_f - old_Ki_f
|
||||||
|
|
||||||
|
f_old = f.copy()
|
||||||
|
def inner_obj(step_size, old_Ki_f, dKi_f, K):
|
||||||
|
Ki_f = old_Ki_f + step_size*dKi_f
|
||||||
|
f = np.dot(K, Ki_f)
|
||||||
|
# This is nasty, need to set something within an optimization though
|
||||||
|
self.tmp_Ki_f = Ki_f.copy()
|
||||||
|
self.tmp_f = f.copy()
|
||||||
|
return -obj(Ki_f, f)
|
||||||
|
|
||||||
|
i_o = partial_func(inner_obj, old_Ki_f=old_Ki_f, dKi_f=dKi_f, K=K)
|
||||||
|
#Find the stepsize that minimizes the objective function using a brent line search
|
||||||
|
#The tolerance and maxiter matter for speed! Seems to be best to keep them low and make more full
|
||||||
|
#steps than get this exact then make a step, if B was bigger it might be the other way around though
|
||||||
|
new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-4, options={'maxiter':5}).fun
|
||||||
|
f = self.tmp_f.copy()
|
||||||
|
Ki_f = self.tmp_Ki_f.copy()
|
||||||
|
|
||||||
|
#Optimize without linesearch
|
||||||
|
#f_old = f.copy()
|
||||||
|
#update_passed = False
|
||||||
|
#while not update_passed:
|
||||||
|
#Ki_f = old_Ki_f + step_size*dKi_f
|
||||||
|
#f = np.dot(K, Ki_f)
|
||||||
|
|
||||||
|
#old_obj = new_obj
|
||||||
|
#new_obj = obj(Ki_f, f)
|
||||||
|
#difference = new_obj - old_obj
|
||||||
|
##print "difference: ",difference
|
||||||
|
#if difference < 0:
|
||||||
|
##print "Objective function rose", np.float(difference)
|
||||||
|
##If the objective function isn't rising, restart optimization
|
||||||
|
#step_size *= 0.8
|
||||||
|
##print "Reducing step-size to {ss:.3} and restarting optimization".format(ss=step_size)
|
||||||
|
##objective function isn't increasing, try reducing step size
|
||||||
|
#f = f_old.copy() #it's actually faster not to go back to old location and just zigzag across the mode
|
||||||
|
#old_obj = new_obj
|
||||||
|
#rs += 1
|
||||||
|
#else:
|
||||||
|
#update_passed = True
|
||||||
|
|
||||||
|
#old_Ki_f = self.Ki_f.copy()
|
||||||
|
|
||||||
|
#difference = abs(new_obj - old_obj)
|
||||||
|
#old_obj = new_obj.copy()
|
||||||
|
#difference = np.abs(np.sum(f - f_old))
|
||||||
|
difference = np.abs(np.sum(Ki_f - old_Ki_f))
|
||||||
|
old_Ki_f = Ki_f.copy()
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
self.old_Ki_f = old_Ki_f.copy()
|
||||||
|
if difference > epsilon:
|
||||||
|
print "Not perfect f_hat fit difference: {}".format(difference)
|
||||||
|
|
||||||
|
self.Ki_f = Ki_f
|
||||||
|
return f
|
||||||
|
|
@ -23,6 +23,7 @@ class likelihood(Parameterized):
|
||||||
"""
|
"""
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
Parameterized.__init__(self)
|
Parameterized.__init__(self)
|
||||||
|
self.dZ_dK = 0
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
@ -33,11 +34,36 @@ class likelihood(Parameterized):
|
||||||
def _set_params(self, x):
|
def _set_params(self, x):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def fit(self):
|
def fit_full(self, K):
|
||||||
raise NotImplementedError
|
"""
|
||||||
|
No approximations needed by default
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def restart(self):
|
||||||
|
"""
|
||||||
|
No need to restart if not an approximation
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
def _gradients(self, partial):
|
def _gradients(self, partial):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def predictive_values(self, mu, var):
|
def predictive_values(self, mu, var):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def log_predictive_density(self, y_test, mu_star, var_star):
|
||||||
|
"""
|
||||||
|
Calculation of the predictive density
|
||||||
|
|
||||||
|
.. math:
|
||||||
|
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||||
|
|
||||||
|
:param y_test: test observations (y_{*})
|
||||||
|
:type y_test: (Nx1) array
|
||||||
|
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||||
|
:type mu_star: (Nx1) array
|
||||||
|
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||||
|
:type var_star: (Nx1) array
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
|
||||||
|
|
@ -4,9 +4,9 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import noise_models
|
import noise_models
|
||||||
|
|
||||||
def binomial(gp_link=None):
|
def bernoulli(gp_link=None):
|
||||||
"""
|
"""
|
||||||
Construct a binomial likelihood
|
Construct a bernoulli likelihood
|
||||||
|
|
||||||
:param gp_link: a GPy gp_link function
|
:param gp_link: a GPy gp_link function
|
||||||
"""
|
"""
|
||||||
|
|
@ -27,16 +27,17 @@ def binomial(gp_link=None):
|
||||||
analytical_mean = False
|
analytical_mean = False
|
||||||
analytical_variance = False
|
analytical_variance = False
|
||||||
|
|
||||||
return noise_models.binomial_noise.Binomial(gp_link,analytical_mean,analytical_variance)
|
return noise_models.bernoulli_noise.Bernoulli(gp_link,analytical_mean,analytical_variance)
|
||||||
|
|
||||||
def exponential(gp_link=None):
|
def exponential(gp_link=None):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Construct a binomial likelihood
|
Construct a exponential likelihood
|
||||||
|
|
||||||
:param gp_link: a GPy gp_link function
|
:param gp_link: a GPy gp_link function
|
||||||
"""
|
"""
|
||||||
if gp_link is None:
|
if gp_link is None:
|
||||||
gp_link = noise_models.gp_transformations.Identity()
|
gp_link = noise_models.gp_transformations.Log_ex_1()
|
||||||
|
|
||||||
analytical_mean = False
|
analytical_mean = False
|
||||||
analytical_variance = False
|
analytical_variance = False
|
||||||
|
|
@ -85,4 +86,36 @@ def gamma(gp_link=None,beta=1.):
|
||||||
analytical_variance = False
|
analytical_variance = False
|
||||||
return noise_models.gamma_noise.Gamma(gp_link,analytical_mean,analytical_variance,beta)
|
return noise_models.gamma_noise.Gamma(gp_link,analytical_mean,analytical_variance,beta)
|
||||||
|
|
||||||
|
def gaussian(gp_link=None, variance=2, D=None, N=None):
|
||||||
|
"""
|
||||||
|
Construct a Gaussian likelihood
|
||||||
|
|
||||||
|
:param gp_link: a GPy gp_link function
|
||||||
|
:param variance: variance
|
||||||
|
:type variance: scalar
|
||||||
|
:returns: Gaussian noise model:
|
||||||
|
"""
|
||||||
|
if gp_link is None:
|
||||||
|
gp_link = noise_models.gp_transformations.Identity()
|
||||||
|
analytical_mean = True
|
||||||
|
analytical_variance = True # ?
|
||||||
|
return noise_models.gaussian_noise.Gaussian(gp_link, analytical_mean,
|
||||||
|
analytical_variance, variance=variance, D=D, N=N)
|
||||||
|
|
||||||
|
def student_t(gp_link=None, deg_free=5, sigma2=2):
|
||||||
|
"""
|
||||||
|
Construct a Student t likelihood
|
||||||
|
|
||||||
|
:param gp_link: a GPy gp_link function
|
||||||
|
:param deg_free: degrees of freedom of student-t
|
||||||
|
:type deg_free: scalar
|
||||||
|
:param sigma2: variance
|
||||||
|
:type sigma2: scalar
|
||||||
|
:returns: Student-T noise model
|
||||||
|
"""
|
||||||
|
if gp_link is None:
|
||||||
|
gp_link = noise_models.gp_transformations.Identity()
|
||||||
|
analytical_mean = True
|
||||||
|
analytical_variance = True
|
||||||
|
return noise_models.student_t_noise.StudentT(gp_link, analytical_mean,
|
||||||
|
analytical_variance,deg_free, sigma2)
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
import noise_distributions
|
import noise_distributions
|
||||||
import binomial_noise
|
import bernoulli_noise
|
||||||
import exponential_noise
|
import exponential_noise
|
||||||
import gaussian_noise
|
import gaussian_noise
|
||||||
import gamma_noise
|
import gamma_noise
|
||||||
import poisson_noise
|
import poisson_noise
|
||||||
|
import student_t_noise
|
||||||
import gp_transformations
|
import gp_transformations
|
||||||
|
|
|
||||||
216
GPy/likelihoods/noise_models/bernoulli_noise.py
Normal file
216
GPy/likelihoods/noise_models/bernoulli_noise.py
Normal file
|
|
@ -0,0 +1,216 @@
|
||||||
|
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats,special
|
||||||
|
import scipy as sp
|
||||||
|
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||||
|
import gp_transformations
|
||||||
|
from noise_distributions import NoiseDistribution
|
||||||
|
|
||||||
|
class Bernoulli(NoiseDistribution):
|
||||||
|
"""
|
||||||
|
Bernoulli likelihood
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
Y is expected to take values in {-1,1}
|
||||||
|
Probit likelihood usually used
|
||||||
|
"""
|
||||||
|
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
|
||||||
|
super(Bernoulli, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||||
|
|
||||||
|
def _preprocess_values(self,Y):
|
||||||
|
"""
|
||||||
|
Check if the values of the observations correspond to the values
|
||||||
|
assumed by the likelihood function.
|
||||||
|
|
||||||
|
..Note:: Binary classification algorithm works better with classes {-1,1}
|
||||||
|
"""
|
||||||
|
Y_prep = Y.copy()
|
||||||
|
Y1 = Y[Y.flatten()==1].size
|
||||||
|
Y2 = Y[Y.flatten()==0].size
|
||||||
|
assert Y1 + Y2 == Y.size, 'Bernoulli likelihood is meant to be used only with outputs in {0,1}.'
|
||||||
|
Y_prep[Y.flatten() == 0] = -1
|
||||||
|
return Y_prep
|
||||||
|
|
||||||
|
def _moments_match_analytical(self,data_i,tau_i,v_i):
|
||||||
|
"""
|
||||||
|
Moments match of the marginal approximation in EP algorithm
|
||||||
|
|
||||||
|
:param i: number of observation (int)
|
||||||
|
:param tau_i: precision of the cavity distribution (float)
|
||||||
|
:param v_i: mean/variance of the cavity distribution (float)
|
||||||
|
"""
|
||||||
|
if data_i == 1:
|
||||||
|
sign = 1.
|
||||||
|
elif data_i == 0:
|
||||||
|
sign = -1
|
||||||
|
else:
|
||||||
|
raise ValueError("bad value for Bernouilli observation (0,1)")
|
||||||
|
if isinstance(self.gp_link,gp_transformations.Probit):
|
||||||
|
z = sign*v_i/np.sqrt(tau_i**2 + tau_i)
|
||||||
|
Z_hat = std_norm_cdf(z)
|
||||||
|
phi = std_norm_pdf(z)
|
||||||
|
mu_hat = v_i/tau_i + sign*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
|
||||||
|
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
|
||||||
|
|
||||||
|
elif isinstance(self.gp_link,gp_transformations.Heaviside):
|
||||||
|
a = sign*v_i/np.sqrt(tau_i)
|
||||||
|
Z_hat = std_norm_cdf(a)
|
||||||
|
N = std_norm_pdf(a)
|
||||||
|
mu_hat = v_i/tau_i + sign*N/Z_hat/np.sqrt(tau_i)
|
||||||
|
sigma2_hat = (1. - a*N/Z_hat - np.square(N/Z_hat))/tau_i
|
||||||
|
if np.any(np.isnan([Z_hat, mu_hat, sigma2_hat])):
|
||||||
|
stop
|
||||||
|
else:
|
||||||
|
raise ValueError("Exact moment matching not available for link {}".format(self.gp_link.gp_transformations.__name__))
|
||||||
|
|
||||||
|
return Z_hat, mu_hat, sigma2_hat
|
||||||
|
|
||||||
|
def _predictive_mean_analytical(self,mu,sigma):
|
||||||
|
if isinstance(self.gp_link,gp_transformations.Probit):
|
||||||
|
return stats.norm.cdf(mu/np.sqrt(1+sigma**2))
|
||||||
|
elif isinstance(self.gp_link,gp_transformations.Heaviside):
|
||||||
|
return stats.norm.cdf(mu/sigma)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def _predictive_variance_analytical(self,mu,sigma, pred_mean):
|
||||||
|
if isinstance(self.gp_link,gp_transformations.Heaviside):
|
||||||
|
return 0.
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def pdf_link(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Likelihood function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in bernoulli
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
|
||||||
|
.. Note:
|
||||||
|
Each y_i must be in {0,1}
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
objective = (link_f**y) * ((1.-link_f)**(1.-y))
|
||||||
|
return np.exp(np.sum(np.log(objective)))
|
||||||
|
|
||||||
|
def logpdf_link(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Log Likelihood function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i})
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in bernoulli
|
||||||
|
:returns: log likelihood evaluated at points link(f)
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
#objective = y*np.log(link_f) + (1.-y)*np.log(link_f)
|
||||||
|
objective = np.where(y==1, np.log(link_f), np.log(1-link_f))
|
||||||
|
return np.sum(objective)
|
||||||
|
|
||||||
|
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the pdf at y, given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in bernoulli
|
||||||
|
:returns: gradient of log likelihood evaluated at points link(f)
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
grad = (y/link_f) - (1.-y)/(1-link_f)
|
||||||
|
return grad
|
||||||
|
|
||||||
|
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j
|
||||||
|
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
|
||||||
|
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in bernoulli
|
||||||
|
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
|
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2)
|
||||||
|
return d2logpdf_dlink2
|
||||||
|
|
||||||
|
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in bernoulli
|
||||||
|
:returns: third derivative of log likelihood evaluated at points link(f)
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3))
|
||||||
|
return d3logpdf_dlink3
|
||||||
|
|
||||||
|
def _mean(self,gp):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
return self.gp_link.transf(gp)
|
||||||
|
|
||||||
|
def _variance(self,gp):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
p = self.gp_link.transf(gp)
|
||||||
|
return p*(1.-p)
|
||||||
|
|
||||||
|
def samples(self, gp):
|
||||||
|
"""
|
||||||
|
Returns a set of samples of observations based on a given value of the latent variable.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
"""
|
||||||
|
orig_shape = gp.shape
|
||||||
|
gp = gp.flatten()
|
||||||
|
ns = np.ones_like(gp, dtype=int)
|
||||||
|
Ysim = np.random.binomial(ns, self.gp_link.transf(gp))
|
||||||
|
return Ysim.reshape(orig_shape)
|
||||||
|
|
@ -1,132 +0,0 @@
|
||||||
# Copyright (c) 2012, 2013 Ricardo Andrade
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from scipy import stats,special
|
|
||||||
import scipy as sp
|
|
||||||
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
|
||||||
import gp_transformations
|
|
||||||
from noise_distributions import NoiseDistribution
|
|
||||||
|
|
||||||
class Binomial(NoiseDistribution):
|
|
||||||
"""
|
|
||||||
Probit likelihood
|
|
||||||
Y is expected to take values in {-1,1}
|
|
||||||
-----
|
|
||||||
$$
|
|
||||||
L(x) = \\Phi (Y_i*f_i)
|
|
||||||
$$
|
|
||||||
"""
|
|
||||||
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
|
|
||||||
super(Binomial, self).__init__(gp_link,analytical_mean,analytical_variance)
|
|
||||||
|
|
||||||
def _preprocess_values(self,Y):
|
|
||||||
"""
|
|
||||||
Check if the values of the observations correspond to the values
|
|
||||||
assumed by the likelihood function.
|
|
||||||
|
|
||||||
..Note:: Binary classification algorithm works better with classes {-1,1}
|
|
||||||
"""
|
|
||||||
Y_prep = Y.copy()
|
|
||||||
Y1 = Y[Y.flatten()==1].size
|
|
||||||
Y2 = Y[Y.flatten()==0].size
|
|
||||||
assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.'
|
|
||||||
Y_prep[Y.flatten() == 0] = -1
|
|
||||||
return Y_prep
|
|
||||||
|
|
||||||
def _moments_match_analytical(self,data_i,tau_i,v_i):
|
|
||||||
"""
|
|
||||||
Moments match of the marginal approximation in EP algorithm
|
|
||||||
|
|
||||||
:param i: number of observation (int)
|
|
||||||
:param tau_i: precision of the cavity distribution (float)
|
|
||||||
:param v_i: mean/variance of the cavity distribution (float)
|
|
||||||
"""
|
|
||||||
if isinstance(self.gp_link,gp_transformations.Probit):
|
|
||||||
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
|
|
||||||
Z_hat = std_norm_cdf(z)
|
|
||||||
phi = std_norm_pdf(z)
|
|
||||||
mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
|
|
||||||
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
|
|
||||||
|
|
||||||
elif isinstance(self.gp_link,gp_transformations.Heaviside):
|
|
||||||
a = data_i*v_i/np.sqrt(tau_i)
|
|
||||||
Z_hat = std_norm_cdf(a)
|
|
||||||
N = std_norm_pdf(a)
|
|
||||||
mu_hat = v_i/tau_i + data_i*N/Z_hat/np.sqrt(tau_i)
|
|
||||||
sigma2_hat = (1. - a*N/Z_hat - np.square(N/Z_hat))/tau_i
|
|
||||||
if np.any(np.isnan([Z_hat, mu_hat, sigma2_hat])):
|
|
||||||
stop
|
|
||||||
|
|
||||||
return Z_hat, mu_hat, sigma2_hat
|
|
||||||
|
|
||||||
def _predictive_mean_analytical(self,mu,sigma):
|
|
||||||
if isinstance(self.gp_link,gp_transformations.Probit):
|
|
||||||
return stats.norm.cdf(mu/np.sqrt(1+sigma**2))
|
|
||||||
elif isinstance(self.gp_link,gp_transformations.Heaviside):
|
|
||||||
return stats.norm.cdf(mu/sigma)
|
|
||||||
else:
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def _predictive_variance_analytical(self,mu,sigma, pred_mean):
|
|
||||||
if isinstance(self.gp_link,gp_transformations.Heaviside):
|
|
||||||
return 0.
|
|
||||||
else:
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def _mass(self,gp,obs):
|
|
||||||
#NOTE obs must be in {0,1}
|
|
||||||
p = self.gp_link.transf(gp)
|
|
||||||
return p**obs * (1.-p)**(1.-obs)
|
|
||||||
|
|
||||||
def _nlog_mass(self,gp,obs):
|
|
||||||
p = self.gp_link.transf(gp)
|
|
||||||
return obs*np.log(p) + (1.-obs)*np.log(1-p)
|
|
||||||
|
|
||||||
def _dnlog_mass_dgp(self,gp,obs):
|
|
||||||
p = self.gp_link.transf(gp)
|
|
||||||
dp = self.gp_link.dtransf_df(gp)
|
|
||||||
return obs/p * dp - (1.-obs)/(1.-p) * dp
|
|
||||||
|
|
||||||
def _d2nlog_mass_dgp2(self,gp,obs):
|
|
||||||
p = self.gp_link.transf(gp)
|
|
||||||
return (obs/p + (1.-obs)/(1.-p))*self.gp_link.d2transf_df2(gp) + ((1.-obs)/(1.-p)**2-obs/p**2)*self.gp_link.dtransf_df(gp)
|
|
||||||
|
|
||||||
def _mean(self,gp):
|
|
||||||
"""
|
|
||||||
Mass (or density) function
|
|
||||||
"""
|
|
||||||
return self.gp_link.transf(gp)
|
|
||||||
|
|
||||||
def _dmean_dgp(self,gp):
|
|
||||||
return self.gp_link.dtransf_df(gp)
|
|
||||||
|
|
||||||
def _d2mean_dgp2(self,gp):
|
|
||||||
return self.gp_link.d2transf_df2(gp)
|
|
||||||
|
|
||||||
def _variance(self,gp):
|
|
||||||
"""
|
|
||||||
Mass (or density) function
|
|
||||||
"""
|
|
||||||
p = self.gp_link.transf(gp)
|
|
||||||
return p*(1.-p)
|
|
||||||
|
|
||||||
def _dvariance_dgp(self,gp):
|
|
||||||
return self.gp_link.dtransf_df(gp)*(1. - 2.*self.gp_link.transf(gp))
|
|
||||||
|
|
||||||
def _d2variance_dgp2(self,gp):
|
|
||||||
return self.gp_link.d2transf_df2(gp)*(1. - 2.*self.gp_link.transf(gp)) - 2*self.gp_link.dtransf_df(gp)**2
|
|
||||||
|
|
||||||
|
|
||||||
def samples(self, gp):
|
|
||||||
"""
|
|
||||||
Returns a set of samples of observations based on a given value of the latent variable.
|
|
||||||
|
|
||||||
:param size: number of samples to compute
|
|
||||||
:param gp: latent variable
|
|
||||||
"""
|
|
||||||
orig_shape = gp.shape
|
|
||||||
gp = gp.flatten()
|
|
||||||
Ysim = np.array([np.random.binomial(1,self.gp_link.transf(gpj),size=1) for gpj in gp])
|
|
||||||
return Ysim.reshape(orig_shape)
|
|
||||||
|
|
@ -24,24 +24,113 @@ class Exponential(NoiseDistribution):
|
||||||
def _preprocess_values(self,Y):
|
def _preprocess_values(self,Y):
|
||||||
return Y
|
return Y
|
||||||
|
|
||||||
def _mass(self,gp,obs):
|
def pdf_link(self, link_f, y, extra_data=None):
|
||||||
"""
|
"""
|
||||||
Mass (or density) function
|
Likelihood function given link(f)
|
||||||
"""
|
|
||||||
return np.exp(-obs/self.gp_link.transf(gp))/self.gp_link.transf(gp)
|
|
||||||
|
|
||||||
def _nlog_mass(self,gp,obs):
|
.. math::
|
||||||
"""
|
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})\\exp (-y\\lambda(f_{i}))
|
||||||
Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
|
|
||||||
"""
|
|
||||||
return obs/self.gp_link.transf(gp) + np.log(self.gp_link.transf(gp))
|
|
||||||
|
|
||||||
def _dnlog_mass_dgp(self,gp,obs):
|
:param link_f: latent variables link(f)
|
||||||
return ( 1./self.gp_link.transf(gp) - obs/self.gp_link.transf(gp)**2) * self.gp_link.dtransf_df(gp)
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in exponential distribution
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
log_objective = link_f*np.exp(-y*link_f)
|
||||||
|
return np.exp(np.sum(np.log(log_objective)))
|
||||||
|
#return np.exp(np.sum(-y/link_f - np.log(link_f) ))
|
||||||
|
|
||||||
def _d2nlog_mass_dgp2(self,gp,obs):
|
def logpdf_link(self, link_f, y, extra_data=None):
|
||||||
fgp = self.gp_link.transf(gp)
|
"""
|
||||||
return (2*obs/fgp**3 - 1./fgp**2) * self.gp_link.dtransf_df(gp)**2 + ( 1./fgp - obs/fgp**2) * self.gp_link.d2transf_df2(gp)
|
Log Likelihood Function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\lambda(f_{i}) - y_{i}\\lambda(f_{i})
|
||||||
|
|
||||||
|
:param link_f: latent variables (link(f))
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in exponential distribution
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
log_objective = np.log(link_f) - y*link_f
|
||||||
|
#logpdf_link = np.sum(-np.log(link_f) - y/link_f)
|
||||||
|
return np.sum(log_objective)
|
||||||
|
|
||||||
|
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\lambda(f)} - y_{i}
|
||||||
|
|
||||||
|
:param link_f: latent variables (f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in exponential distribution
|
||||||
|
:returns: gradient of likelihood evaluated at points
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
grad = 1./link_f - y
|
||||||
|
#grad = y/(link_f**2) - 1./link_f
|
||||||
|
return grad
|
||||||
|
|
||||||
|
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Hessian at y, given link(f), w.r.t link(f)
|
||||||
|
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||||
|
The hessian will be 0 unless i == j
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\frac{1}{\\lambda(f_{i})^{2}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in exponential distribution
|
||||||
|
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
|
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
hess = -1./(link_f**2)
|
||||||
|
#hess = -2*y/(link_f**3) + 1/(link_f**2)
|
||||||
|
return hess
|
||||||
|
|
||||||
|
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2}{\\lambda(f_{i})^{3}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in exponential distribution
|
||||||
|
:returns: third derivative of likelihood evaluated at points f
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
d3lik_dlink3 = 2./(link_f**3)
|
||||||
|
#d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
|
||||||
|
return d3lik_dlink3
|
||||||
|
|
||||||
def _mean(self,gp):
|
def _mean(self,gp):
|
||||||
"""
|
"""
|
||||||
|
|
@ -49,20 +138,19 @@ class Exponential(NoiseDistribution):
|
||||||
"""
|
"""
|
||||||
return self.gp_link.transf(gp)
|
return self.gp_link.transf(gp)
|
||||||
|
|
||||||
def _dmean_dgp(self,gp):
|
|
||||||
return self.gp_link.dtransf_df(gp)
|
|
||||||
|
|
||||||
def _d2mean_dgp2(self,gp):
|
|
||||||
return self.gp_link.d2transf_df2(gp)
|
|
||||||
|
|
||||||
def _variance(self,gp):
|
def _variance(self,gp):
|
||||||
"""
|
"""
|
||||||
Mass (or density) function
|
Mass (or density) function
|
||||||
"""
|
"""
|
||||||
return self.gp_link.transf(gp)**2
|
return self.gp_link.transf(gp)**2
|
||||||
|
|
||||||
def _dvariance_dgp(self,gp):
|
def samples(self, gp):
|
||||||
return 2*self.gp_link.transf(gp)*self.gp_link.dtransf_df(gp)
|
"""
|
||||||
|
Returns a set of samples of observations based on a given value of the latent variable.
|
||||||
|
|
||||||
def _d2variance_dgp2(self,gp):
|
:param gp: latent variable
|
||||||
return 2 * (self.gp_link.dtransf_df(gp)**2 + self.gp_link.transf(gp)*self.gp_link.d2transf_df2(gp))
|
"""
|
||||||
|
orig_shape = gp.shape
|
||||||
|
gp = gp.flatten()
|
||||||
|
Ysim = np.random.exponential(1.0/self.gp_link.transf(gp))
|
||||||
|
return Ysim.reshape(orig_shape)
|
||||||
|
|
|
||||||
|
|
@ -12,11 +12,11 @@ from noise_distributions import NoiseDistribution
|
||||||
class Gamma(NoiseDistribution):
|
class Gamma(NoiseDistribution):
|
||||||
"""
|
"""
|
||||||
Gamma likelihood
|
Gamma likelihood
|
||||||
Y is expected to take values in {0,1,2,...}
|
|
||||||
-----
|
.. math::
|
||||||
$$
|
p(y_{i}|\\lambda(f_{i})) = \\frac{\\beta^{\\alpha_{i}}}{\\Gamma(\\alpha_{i})}y_{i}^{\\alpha_{i}-1}e^{-\\beta y_{i}}\\\\
|
||||||
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
|
\\alpha_{i} = \\beta y_{i}
|
||||||
$$
|
|
||||||
"""
|
"""
|
||||||
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.):
|
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.):
|
||||||
self.beta = beta
|
self.beta = beta
|
||||||
|
|
@ -25,26 +25,122 @@ class Gamma(NoiseDistribution):
|
||||||
def _preprocess_values(self,Y):
|
def _preprocess_values(self,Y):
|
||||||
return Y
|
return Y
|
||||||
|
|
||||||
def _mass(self,gp,obs):
|
def pdf_link(self, link_f, y, extra_data=None):
|
||||||
"""
|
"""
|
||||||
Mass (or density) function
|
Likelihood function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
p(y_{i}|\\lambda(f_{i})) = \\frac{\\beta^{\\alpha_{i}}}{\\Gamma(\\alpha_{i})}y_{i}^{\\alpha_{i}-1}e^{-\\beta y_{i}}\\\\
|
||||||
|
\\alpha_{i} = \\beta y_{i}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in poisson distribution
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
#return stats.gamma.pdf(obs,a = self.gp_link.transf(gp)/self.variance,scale=self.variance)
|
#return stats.gamma.pdf(obs,a = self.gp_link.transf(gp)/self.variance,scale=self.variance)
|
||||||
alpha = self.gp_link.transf(gp)*self.beta
|
alpha = link_f*self.beta
|
||||||
return obs**(alpha - 1.) * np.exp(-self.beta*obs) * self.beta**alpha / special.gamma(alpha)
|
objective = (y**(alpha - 1.) * np.exp(-self.beta*y) * self.beta**alpha)/ special.gamma(alpha)
|
||||||
|
return np.exp(np.sum(np.log(objective)))
|
||||||
|
|
||||||
def _nlog_mass(self,gp,obs):
|
def logpdf_link(self, link_f, y, extra_data=None):
|
||||||
"""
|
"""
|
||||||
Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
|
Log Likelihood Function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\ln p(y_{i}|\lambda(f_{i})) = \\alpha_{i}\\log \\beta - \\log \\Gamma(\\alpha_{i}) + (\\alpha_{i} - 1)\\log y_{i} - \\beta y_{i}\\\\
|
||||||
|
\\alpha_{i} = \\beta y_{i}
|
||||||
|
|
||||||
|
:param link_f: latent variables (link(f))
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in poisson distribution
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
|
||||||
"""
|
"""
|
||||||
alpha = self.gp_link.transf(gp)*self.beta
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha))
|
#alpha = self.gp_link.transf(gp)*self.beta
|
||||||
|
#return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha))
|
||||||
|
alpha = link_f*self.beta
|
||||||
|
log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y
|
||||||
|
return np.sum(log_objective)
|
||||||
|
|
||||||
def _dnlog_mass_dgp(self,gp,obs):
|
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||||
return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
|
"""
|
||||||
|
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||||
|
|
||||||
def _d2nlog_mass_dgp2(self,gp,obs):
|
.. math::
|
||||||
return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
|
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\beta (\\log \\beta y_{i}) - \\Psi(\\alpha_{i})\\beta\\\\
|
||||||
|
\\alpha_{i} = \\beta y_{i}
|
||||||
|
|
||||||
|
:param link_f: latent variables (f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in gamma distribution
|
||||||
|
:returns: gradient of likelihood evaluated at points
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
grad = self.beta*np.log(self.beta*y) - special.psi(self.beta*link_f)*self.beta
|
||||||
|
#old
|
||||||
|
#return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
|
||||||
|
return grad
|
||||||
|
|
||||||
|
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Hessian at y, given link(f), w.r.t link(f)
|
||||||
|
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||||
|
The hessian will be 0 unless i == j
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\beta^{2}\\frac{d\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||||
|
\\alpha_{i} = \\beta y_{i}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in gamma distribution
|
||||||
|
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
|
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
hess = -special.polygamma(1, self.beta*link_f)*(self.beta**2)
|
||||||
|
#old
|
||||||
|
#return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
|
||||||
|
return hess
|
||||||
|
|
||||||
|
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = -\\beta^{3}\\frac{d^{2}\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||||
|
\\alpha_{i} = \\beta y_{i}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in gamma distribution
|
||||||
|
:returns: third derivative of likelihood evaluated at points f
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3)
|
||||||
|
return d3lik_dlink3
|
||||||
|
|
||||||
def _mean(self,gp):
|
def _mean(self,gp):
|
||||||
"""
|
"""
|
||||||
|
|
@ -52,20 +148,8 @@ class Gamma(NoiseDistribution):
|
||||||
"""
|
"""
|
||||||
return self.gp_link.transf(gp)
|
return self.gp_link.transf(gp)
|
||||||
|
|
||||||
def _dmean_dgp(self,gp):
|
|
||||||
return self.gp_link.dtransf_df(gp)
|
|
||||||
|
|
||||||
def _d2mean_dgp2(self,gp):
|
|
||||||
return self.gp_link.d2transf_df2(gp)
|
|
||||||
|
|
||||||
def _variance(self,gp):
|
def _variance(self,gp):
|
||||||
"""
|
"""
|
||||||
Mass (or density) function
|
Mass (or density) function
|
||||||
"""
|
"""
|
||||||
return self.gp_link.transf(gp)/self.beta
|
return self.gp_link.transf(gp)/self.beta
|
||||||
|
|
||||||
def _dvariance_dgp(self,gp):
|
|
||||||
return self.gp_link.dtransf_df(gp)/self.beta
|
|
||||||
|
|
||||||
def _d2variance_dgp2(self,gp):
|
|
||||||
return self.gp_link.d2transf_df2(gp)/self.beta
|
|
||||||
|
|
|
||||||
|
|
@ -12,11 +12,17 @@ class Gaussian(NoiseDistribution):
|
||||||
"""
|
"""
|
||||||
Gaussian likelihood
|
Gaussian likelihood
|
||||||
|
|
||||||
:param mean: mean value of the Gaussian distribution
|
.. math::
|
||||||
:param variance: mean value of the Gaussian distribution
|
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
|
||||||
|
|
||||||
|
:param variance: variance value of the Gaussian distribution
|
||||||
|
:param N: Number of data points
|
||||||
|
:type N: int
|
||||||
"""
|
"""
|
||||||
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,variance=1.):
|
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,variance=1., D=None, N=None):
|
||||||
self.variance = variance
|
self.variance = variance
|
||||||
|
self.N = N
|
||||||
|
self._set_params(np.asarray(variance))
|
||||||
super(Gaussian, self).__init__(gp_link,analytical_mean,analytical_variance)
|
super(Gaussian, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
|
|
@ -25,8 +31,13 @@ class Gaussian(NoiseDistribution):
|
||||||
def _get_param_names(self):
|
def _get_param_names(self):
|
||||||
return ['noise_model_variance']
|
return ['noise_model_variance']
|
||||||
|
|
||||||
def _set_params(self,p):
|
def _set_params(self, p):
|
||||||
self.variance = p
|
self.variance = float(p)
|
||||||
|
self.I = np.eye(self.N)
|
||||||
|
self.covariance_matrix = self.I * self.variance
|
||||||
|
self.Ki = self.I*(1.0 / self.variance)
|
||||||
|
#self.ln_det_K = np.sum(np.log(np.diag(self.covariance_matrix)))
|
||||||
|
self.ln_det_K = self.N*np.log(self.variance)
|
||||||
|
|
||||||
def _gradients(self,partial):
|
def _gradients(self,partial):
|
||||||
return np.zeros(1)
|
return np.zeros(1)
|
||||||
|
|
@ -57,42 +68,231 @@ class Gaussian(NoiseDistribution):
|
||||||
new_sigma2 = self.predictive_variance(mu,sigma)
|
new_sigma2 = self.predictive_variance(mu,sigma)
|
||||||
return new_sigma2*(mu/sigma**2 + self.gp_link.transf(mu)/self.variance)
|
return new_sigma2*(mu/sigma**2 + self.gp_link.transf(mu)/self.variance)
|
||||||
|
|
||||||
def _predictive_variance_analytical(self,mu,sigma):
|
def _predictive_variance_analytical(self,mu,sigma,predictive_mean=None):
|
||||||
return 1./(1./self.variance + 1./sigma**2)
|
return 1./(1./self.variance + 1./sigma**2)
|
||||||
|
|
||||||
def _mass(self,gp,obs):
|
def _mass(self, link_f, y, extra_data=None):
|
||||||
#return std_norm_pdf( (self.gp_link.transf(gp)-obs)/np.sqrt(self.variance) )
|
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
|
||||||
return stats.norm.pdf(obs,self.gp_link.transf(gp),np.sqrt(self.variance))
|
Please negate your function and use pdf in noise_model.py, if implementing a likelihood\
|
||||||
|
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
|
||||||
|
its derivatives")
|
||||||
|
def _nlog_mass(self, link_f, y, extra_data=None):
|
||||||
|
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
|
||||||
|
Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\
|
||||||
|
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
|
||||||
|
its derivatives")
|
||||||
|
|
||||||
def _nlog_mass(self,gp,obs):
|
def _dnlog_mass_dgp(self, link_f, y, extra_data=None):
|
||||||
return .5*((self.gp_link.transf(gp)-obs)**2/self.variance + np.log(2.*np.pi*self.variance))
|
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
|
||||||
|
Please negate your function and use dlogpdf_df in noise_model.py, if implementing a likelihood\
|
||||||
|
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
|
||||||
|
its derivatives")
|
||||||
|
|
||||||
def _dnlog_mass_dgp(self,gp,obs):
|
def _d2nlog_mass_dgp2(self, link_f, y, extra_data=None):
|
||||||
return (self.gp_link.transf(gp)-obs)/self.variance * self.gp_link.dtransf_df(gp)
|
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
|
||||||
|
Please negate your function and use d2logpdf_df2 in noise_model.py, if implementing a likelihood\
|
||||||
|
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
|
||||||
|
its derivatives")
|
||||||
|
|
||||||
def _d2nlog_mass_dgp2(self,gp,obs):
|
def pdf_link(self, link_f, y, extra_data=None):
|
||||||
return ((self.gp_link.transf(gp)-obs)*self.gp_link.d2transf_df2(gp) + self.gp_link.dtransf_df(gp)**2)/self.variance
|
"""
|
||||||
|
Likelihood function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in gaussian
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
#Assumes no covariance, exp, sum, log for numerical stability
|
||||||
|
return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance)))))
|
||||||
|
|
||||||
|
def logpdf_link(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Log likelihood function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in gaussian
|
||||||
|
:returns: log likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||||
|
return -0.5*(np.sum((y-link_f)**2/self.variance) + self.ln_det_K + self.N*np.log(2.*np.pi))
|
||||||
|
|
||||||
|
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the pdf at y, given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\sigma^{2}}(y_{i} - \\lambda(f_{i}))
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in gaussian
|
||||||
|
:returns: gradient of log likelihood evaluated at points link(f)
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||||
|
s2_i = (1.0/self.variance)
|
||||||
|
grad = s2_i*y - s2_i*link_f
|
||||||
|
return grad
|
||||||
|
|
||||||
|
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Hessian at y, given link_f, w.r.t link_f.
|
||||||
|
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
|
||||||
|
|
||||||
|
The hessian will be 0 unless i == j
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}f} = -\\frac{1}{\\sigma^{2}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in gaussian
|
||||||
|
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
|
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||||
|
"""
|
||||||
|
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||||
|
hess = -(1.0/self.variance)*np.ones((self.N, 1))
|
||||||
|
return hess
|
||||||
|
|
||||||
|
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = 0
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in gaussian
|
||||||
|
:returns: third derivative of log likelihood evaluated at points link(f)
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||||
|
d3logpdf_dlink3 = np.diagonal(0*self.I)[:, None]
|
||||||
|
return d3logpdf_dlink3
|
||||||
|
|
||||||
|
def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\sigma^{2}} = -\\frac{N}{2\\sigma^{2}} + \\frac{(y_{i} - \\lambda(f_{i}))^{2}}{2\\sigma^{4}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in gaussian
|
||||||
|
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||||
|
e = y - link_f
|
||||||
|
s_4 = 1.0/(self.variance**2)
|
||||||
|
dlik_dsigma = -0.5*self.N/self.variance + 0.5*s_4*np.sum(np.square(e))
|
||||||
|
return np.sum(dlik_dsigma) # Sure about this sum?
|
||||||
|
|
||||||
|
def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)}) = \\frac{1}{\\sigma^{4}}(-y_{i} + \\lambda(f_{i}))
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in gaussian
|
||||||
|
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||||
|
s_4 = 1.0/(self.variance**2)
|
||||||
|
dlik_grad_dsigma = -s_4*y + s_4*link_f
|
||||||
|
return dlik_grad_dsigma
|
||||||
|
|
||||||
|
def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)}) = \\frac{1}{\\sigma^{4}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data not used in gaussian
|
||||||
|
:returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||||
|
s_4 = 1.0/(self.variance**2)
|
||||||
|
d2logpdf_dlink2_dvar = np.diag(s_4*self.I)[:, None]
|
||||||
|
return d2logpdf_dlink2_dvar
|
||||||
|
|
||||||
|
def dlogpdf_link_dtheta(self, f, y, extra_data=None):
|
||||||
|
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
|
||||||
|
return np.asarray([[dlogpdf_dvar]])
|
||||||
|
|
||||||
|
def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
|
||||||
|
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
|
||||||
|
return dlogpdf_dlink_dvar
|
||||||
|
|
||||||
|
def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
|
||||||
|
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
|
||||||
|
return d2logpdf_dlink2_dvar
|
||||||
|
|
||||||
def _mean(self,gp):
|
def _mean(self,gp):
|
||||||
"""
|
"""
|
||||||
Mass (or density) function
|
Expected value of y under the Mass (or density) function p(y|f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
E_{p(y|f)}[y]
|
||||||
"""
|
"""
|
||||||
return self.gp_link.transf(gp)
|
return self.gp_link.transf(gp)
|
||||||
|
|
||||||
def _dmean_dgp(self,gp):
|
|
||||||
return self.gp_link.dtransf_df(gp)
|
|
||||||
|
|
||||||
def _d2mean_dgp2(self,gp):
|
|
||||||
return self.gp_link.d2transf_df2(gp)
|
|
||||||
|
|
||||||
def _variance(self,gp):
|
def _variance(self,gp):
|
||||||
"""
|
"""
|
||||||
Mass (or density) function
|
Variance of y under the Mass (or density) function p(y|f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
Var_{p(y|f)}[y]
|
||||||
"""
|
"""
|
||||||
return self.variance
|
return self.variance
|
||||||
|
|
||||||
def _dvariance_dgp(self,gp):
|
def samples(self, gp):
|
||||||
return 0
|
"""
|
||||||
|
Returns a set of samples of observations based on a given value of the latent variable.
|
||||||
|
|
||||||
def _d2variance_dgp2(self,gp):
|
:param gp: latent variable
|
||||||
return 0
|
"""
|
||||||
|
orig_shape = gp.shape
|
||||||
|
gp = gp.flatten()
|
||||||
|
Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp])
|
||||||
|
return Ysim.reshape(orig_shape)
|
||||||
|
|
|
||||||
|
|
@ -24,19 +24,25 @@ class GPTransformation(object):
|
||||||
"""
|
"""
|
||||||
Gaussian process tranformation function, latent space -> output space
|
Gaussian process tranformation function, latent space -> output space
|
||||||
"""
|
"""
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
def dtransf_df(self,f):
|
def dtransf_df(self,f):
|
||||||
"""
|
"""
|
||||||
derivative of transf(f) w.r.t. f
|
derivative of transf(f) w.r.t. f
|
||||||
"""
|
"""
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
def d2transf_df2(self,f):
|
def d2transf_df2(self,f):
|
||||||
"""
|
"""
|
||||||
second derivative of transf(f) w.r.t. f
|
second derivative of transf(f) w.r.t. f
|
||||||
"""
|
"""
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def d3transf_df3(self,f):
|
||||||
|
"""
|
||||||
|
third derivative of transf(f) w.r.t. f
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
class Identity(GPTransformation):
|
class Identity(GPTransformation):
|
||||||
"""
|
"""
|
||||||
|
|
@ -49,10 +55,13 @@ class Identity(GPTransformation):
|
||||||
return f
|
return f
|
||||||
|
|
||||||
def dtransf_df(self,f):
|
def dtransf_df(self,f):
|
||||||
return 1.
|
return np.ones_like(f)
|
||||||
|
|
||||||
def d2transf_df2(self,f):
|
def d2transf_df2(self,f):
|
||||||
return 0
|
return np.zeros_like(f)
|
||||||
|
|
||||||
|
def d3transf_df3(self,f):
|
||||||
|
return np.zeros_like(f)
|
||||||
|
|
||||||
|
|
||||||
class Probit(GPTransformation):
|
class Probit(GPTransformation):
|
||||||
|
|
@ -71,6 +80,10 @@ class Probit(GPTransformation):
|
||||||
def d2transf_df2(self,f):
|
def d2transf_df2(self,f):
|
||||||
return -f * std_norm_pdf(f)
|
return -f * std_norm_pdf(f)
|
||||||
|
|
||||||
|
def d3transf_df3(self,f):
|
||||||
|
f2 = f**2
|
||||||
|
return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2)
|
||||||
|
|
||||||
class Log(GPTransformation):
|
class Log(GPTransformation):
|
||||||
"""
|
"""
|
||||||
.. math::
|
.. math::
|
||||||
|
|
@ -87,6 +100,9 @@ class Log(GPTransformation):
|
||||||
def d2transf_df2(self,f):
|
def d2transf_df2(self,f):
|
||||||
return np.exp(f)
|
return np.exp(f)
|
||||||
|
|
||||||
|
def d3transf_df3(self,f):
|
||||||
|
return np.exp(f)
|
||||||
|
|
||||||
class Log_ex_1(GPTransformation):
|
class Log_ex_1(GPTransformation):
|
||||||
"""
|
"""
|
||||||
.. math::
|
.. math::
|
||||||
|
|
@ -104,15 +120,23 @@ class Log_ex_1(GPTransformation):
|
||||||
aux = np.exp(f)/(1.+np.exp(f))
|
aux = np.exp(f)/(1.+np.exp(f))
|
||||||
return aux*(1.-aux)
|
return aux*(1.-aux)
|
||||||
|
|
||||||
|
def d3transf_df3(self,f):
|
||||||
|
aux = np.exp(f)/(1.+np.exp(f))
|
||||||
|
daux_df = aux*(1.-aux)
|
||||||
|
return daux_df - (2.*aux*daux_df)
|
||||||
|
|
||||||
class Reciprocal(GPTransformation):
|
class Reciprocal(GPTransformation):
|
||||||
def transf(sefl,f):
|
def transf(self,f):
|
||||||
return 1./f
|
return 1./f
|
||||||
|
|
||||||
def dtransf_df(self,f):
|
def dtransf_df(self,f):
|
||||||
return -1./f**2
|
return -1./(f**2)
|
||||||
|
|
||||||
def d2transf_df2(self,f):
|
def d2transf_df2(self,f):
|
||||||
return 2./f**3
|
return 2./(f**3)
|
||||||
|
|
||||||
|
def d3transf_df3(self,f):
|
||||||
|
return -6./(f**4)
|
||||||
|
|
||||||
class Heaviside(GPTransformation):
|
class Heaviside(GPTransformation):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -9,15 +9,12 @@ import pylab as pb
|
||||||
from GPy.util.plot import gpplot
|
from GPy.util.plot import gpplot
|
||||||
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||||
import gp_transformations
|
import gp_transformations
|
||||||
|
from GPy.util.misc import chain_1, chain_2, chain_3
|
||||||
|
from scipy.integrate import quad
|
||||||
|
|
||||||
class NoiseDistribution(object):
|
class NoiseDistribution(object):
|
||||||
"""
|
"""
|
||||||
Likelihood class for doing Expectation propagation
|
Likelihood class for doing approximations
|
||||||
|
|
||||||
:param Y: observed output (Nx1 numpy.darray)
|
|
||||||
|
|
||||||
.. note:: Y values allowed depend on the LikelihoodFunction used
|
|
||||||
"""
|
"""
|
||||||
def __init__(self,gp_link,analytical_mean=False,analytical_variance=False):
|
def __init__(self,gp_link,analytical_mean=False,analytical_variance=False):
|
||||||
assert isinstance(gp_link,gp_transformations.GPTransformation), "gp_link is not a valid GPTransformation."
|
assert isinstance(gp_link,gp_transformations.GPTransformation), "gp_link is not a valid GPTransformation."
|
||||||
|
|
@ -35,6 +32,8 @@ class NoiseDistribution(object):
|
||||||
else:
|
else:
|
||||||
self.predictive_variance = self._predictive_variance_numerical
|
self.predictive_variance = self._predictive_variance_numerical
|
||||||
|
|
||||||
|
self.log_concave = True
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
return np.zeros(0)
|
return np.zeros(0)
|
||||||
|
|
||||||
|
|
@ -57,369 +56,379 @@ class NoiseDistribution(object):
|
||||||
"""
|
"""
|
||||||
return Y
|
return Y
|
||||||
|
|
||||||
def _product(self,gp,obs,mu,sigma):
|
|
||||||
"""
|
|
||||||
Product between the cavity distribution and a likelihood factor.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param obs: observed output
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._mass(gp,obs)
|
|
||||||
|
|
||||||
def _nlog_product_scaled(self,gp,obs,mu,sigma):
|
|
||||||
"""
|
|
||||||
Negative log-product between the cavity distribution and a likelihood factor.
|
|
||||||
|
|
||||||
.. note:: The constant term in the Gaussian distribution is ignored.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param obs: observed output
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return .5*((gp-mu)/sigma)**2 + self._nlog_mass(gp,obs)
|
|
||||||
|
|
||||||
def _dnlog_product_dgp(self,gp,obs,mu,sigma):
|
|
||||||
"""
|
|
||||||
Derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param obs: observed output
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return (gp - mu)/sigma**2 + self._dnlog_mass_dgp(gp,obs)
|
|
||||||
|
|
||||||
def _d2nlog_product_dgp2(self,gp,obs,mu,sigma):
|
|
||||||
"""
|
|
||||||
Second derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param obs: observed output
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return 1./sigma**2 + self._d2nlog_mass_dgp2(gp,obs)
|
|
||||||
|
|
||||||
def _product_mode(self,obs,mu,sigma):
|
|
||||||
"""
|
|
||||||
Newton's CG method to find the mode in _product (cavity x likelihood factor).
|
|
||||||
|
|
||||||
:param obs: observed output
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return sp.optimize.fmin_ncg(self._nlog_product_scaled,x0=mu,fprime=self._dnlog_product_dgp,fhess=self._d2nlog_product_dgp2,args=(obs,mu,sigma),disp=False)
|
|
||||||
|
|
||||||
def _moments_match_analytical(self,obs,tau,v):
|
def _moments_match_analytical(self,obs,tau,v):
|
||||||
"""
|
"""
|
||||||
If available, this function computes the moments analytically.
|
If available, this function computes the moments analytically.
|
||||||
"""
|
"""
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def log_predictive_density(self, y_test, mu_star, var_star):
|
||||||
|
"""
|
||||||
|
Calculation of the log predictive density
|
||||||
|
|
||||||
|
.. math:
|
||||||
|
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||||
|
|
||||||
|
:param y_test: test observations (y_{*})
|
||||||
|
:type y_test: (Nx1) array
|
||||||
|
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||||
|
:type mu_star: (Nx1) array
|
||||||
|
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||||
|
:type var_star: (Nx1) array
|
||||||
|
"""
|
||||||
|
assert y_test.shape==mu_star.shape
|
||||||
|
assert y_test.shape==var_star.shape
|
||||||
|
assert y_test.shape[1] == 1
|
||||||
|
def integral_generator(y, m, v):
|
||||||
|
"""Generate a function which can be integrated to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*"""
|
||||||
|
def f(f_star):
|
||||||
|
return self.pdf(f_star, y)*np.exp(-(1./(2*v))*np.square(m-f_star))
|
||||||
|
return f
|
||||||
|
|
||||||
|
scaled_p_ystar, accuracy = zip(*[quad(integral_generator(y, m, v), -np.inf, np.inf) for y, m, v in zip(y_test.flatten(), mu_star.flatten(), var_star.flatten())])
|
||||||
|
scaled_p_ystar = np.array(scaled_p_ystar).reshape(-1,1)
|
||||||
|
p_ystar = scaled_p_ystar/np.sqrt(2*np.pi*var_star)
|
||||||
|
return np.log(p_ystar)
|
||||||
|
|
||||||
def _moments_match_numerical(self,obs,tau,v):
|
def _moments_match_numerical(self,obs,tau,v):
|
||||||
"""
|
"""
|
||||||
Lapace approximation to calculate the moments.
|
Calculation of moments using quadrature
|
||||||
|
|
||||||
:param obs: observed output
|
:param obs: observed output
|
||||||
:param tau: cavity distribution 1st natural parameter (precision)
|
:param tau: cavity distribution 1st natural parameter (precision)
|
||||||
:param v: cavity distribution 2nd natural paramenter (mu*precision)
|
:param v: cavity distribution 2nd natural paramenter (mu*precision)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
#Compute first integral for zeroth moment
|
||||||
mu = v/tau
|
mu = v/tau
|
||||||
mu_hat = self._product_mode(obs,mu,np.sqrt(1./tau))
|
def int_1(f):
|
||||||
sigma2_hat = 1./(tau + self._d2nlog_mass_dgp2(mu_hat,obs))
|
return self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
|
||||||
Z_hat = np.exp(-.5*tau*(mu_hat-mu)**2) * self._mass(mu_hat,obs)*np.sqrt(tau*sigma2_hat)
|
z, accuracy = quad(int_1, -np.inf, np.inf)
|
||||||
return Z_hat,mu_hat,sigma2_hat
|
z /= np.sqrt(2*np.pi/tau)
|
||||||
|
|
||||||
def _nlog_conditional_mean_scaled(self,gp,mu,sigma):
|
#Compute second integral for first moment
|
||||||
"""
|
def int_2(f):
|
||||||
Negative logarithm of the l.v.'s predictive distribution times the output's mean given the l.v.
|
return f*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
|
||||||
|
mean, accuracy = quad(int_2, -np.inf, np.inf)
|
||||||
|
mean /= np.sqrt(2*np.pi/tau)
|
||||||
|
mean /= z
|
||||||
|
|
||||||
:param gp: latent variable
|
#Compute integral for variance
|
||||||
:param mu: cavity distribution mean
|
def int_3(f):
|
||||||
:param sigma: cavity distribution standard deviation
|
return (f**2)*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
|
||||||
|
Ef2, accuracy = quad(int_3, -np.inf, np.inf)
|
||||||
|
Ef2 /= np.sqrt(2*np.pi/tau)
|
||||||
|
Ef2 /= z
|
||||||
|
variance = Ef2 - mean**2
|
||||||
|
|
||||||
.. note:: This function helps computing E(Y_star) = E(E(Y_star|f_star))
|
return z, mean, variance
|
||||||
|
|
||||||
"""
|
|
||||||
return .5*((gp - mu)/sigma)**2 - np.log(self._mean(gp))
|
|
||||||
|
|
||||||
def _dnlog_conditional_mean_dgp(self,gp,mu,sigma):
|
|
||||||
"""
|
|
||||||
Derivative of _nlog_conditional_mean_scaled wrt. l.v.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return (gp - mu)/sigma**2 - self._dmean_dgp(gp)/self._mean(gp)
|
|
||||||
|
|
||||||
def _d2nlog_conditional_mean_dgp2(self,gp,mu,sigma):
|
|
||||||
"""
|
|
||||||
Second derivative of _nlog_conditional_mean_scaled wrt. l.v.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return 1./sigma**2 - self._d2mean_dgp2(gp)/self._mean(gp) + (self._dmean_dgp(gp)/self._mean(gp))**2
|
|
||||||
|
|
||||||
def _nlog_exp_conditional_variance_scaled(self,gp,mu,sigma):
|
|
||||||
"""
|
|
||||||
Negative logarithm of the l.v.'s predictive distribution times the output's variance given the l.v.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
.. note:: This function helps computing E(V(Y_star|f_star))
|
|
||||||
|
|
||||||
"""
|
|
||||||
return .5*((gp - mu)/sigma)**2 - np.log(self._variance(gp))
|
|
||||||
|
|
||||||
def _dnlog_exp_conditional_variance_dgp(self,gp,mu,sigma):
|
|
||||||
"""
|
|
||||||
Derivative of _nlog_exp_conditional_variance_scaled wrt. l.v.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return (gp - mu)/sigma**2 - self._dvariance_dgp(gp)/self._variance(gp)
|
|
||||||
|
|
||||||
def _d2nlog_exp_conditional_variance_dgp2(self,gp,mu,sigma):
|
|
||||||
"""
|
|
||||||
Second derivative of _nlog_exp_conditional_variance_scaled wrt. l.v.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return 1./sigma**2 - self._d2variance_dgp2(gp)/self._variance(gp) + (self._dvariance_dgp(gp)/self._variance(gp))**2
|
|
||||||
|
|
||||||
def _nlog_exp_conditional_mean_sq_scaled(self,gp,mu,sigma):
|
|
||||||
"""
|
|
||||||
Negative logarithm of the l.v.'s predictive distribution times the output's mean squared given the l.v.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
.. note:: This function helps computing E( E(Y_star|f_star)**2 )
|
|
||||||
|
|
||||||
"""
|
|
||||||
return .5*((gp - mu)/sigma)**2 - 2*np.log(self._mean(gp))
|
|
||||||
|
|
||||||
def _dnlog_exp_conditional_mean_sq_dgp(self,gp,mu,sigma):
|
|
||||||
"""
|
|
||||||
Derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return (gp - mu)/sigma**2 - 2*self._dmean_dgp(gp)/self._mean(gp)
|
|
||||||
|
|
||||||
def _d2nlog_exp_conditional_mean_sq_dgp2(self,gp,mu,sigma):
|
|
||||||
"""
|
|
||||||
Second derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return 1./sigma**2 - 2*( self._d2mean_dgp2(gp)/self._mean(gp) - (self._dmean_dgp(gp)/self._mean(gp))**2 )
|
|
||||||
|
|
||||||
def _predictive_mean_analytical(self,mu,sigma):
|
def _predictive_mean_analytical(self,mu,sigma):
|
||||||
"""
|
"""
|
||||||
|
Predictive mean
|
||||||
|
.. math::
|
||||||
|
E(Y^{*}|Y) = E( E(Y^{*}|f^{*}, Y) )
|
||||||
|
|
||||||
If available, this function computes the predictive mean analytically.
|
If available, this function computes the predictive mean analytically.
|
||||||
"""
|
"""
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
def _predictive_variance_analytical(self,mu,sigma):
|
def _predictive_variance_analytical(self,mu,sigma):
|
||||||
"""
|
"""
|
||||||
|
Predictive variance
|
||||||
|
.. math::
|
||||||
|
V(Y^{*}| Y) = E( V(Y^{*}|f^{*}, Y) ) + V( E(Y^{*}|f^{*}, Y) )
|
||||||
|
|
||||||
If available, this function computes the predictive variance analytically.
|
If available, this function computes the predictive variance analytically.
|
||||||
"""
|
"""
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
def _predictive_mean_numerical(self,mu,sigma):
|
def _predictive_mean_numerical(self,mu,sigma):
|
||||||
"""
|
"""
|
||||||
Laplace approximation to the predictive mean: E(Y_star) = E( E(Y_star|f_star) )
|
Quadrature calculation of the predictive mean: E(Y_star|Y) = E( E(Y_star|f_star, Y) )
|
||||||
|
|
||||||
:param mu: cavity distribution mean
|
:param mu: mean of posterior
|
||||||
:param sigma: cavity distribution standard deviation
|
:param sigma: standard deviation of posterior
|
||||||
|
|
||||||
"""
|
"""
|
||||||
maximum = sp.optimize.fmin_ncg(self._nlog_conditional_mean_scaled,x0=self._mean(mu),fprime=self._dnlog_conditional_mean_dgp,fhess=self._d2nlog_conditional_mean_dgp2,args=(mu,sigma),disp=False)
|
#FIXME: Quadrature does not work!
|
||||||
mean = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma))*sigma)
|
raise NotImplementedError
|
||||||
"""
|
sigma2 = sigma**2
|
||||||
|
#Compute first moment
|
||||||
|
def int_mean(f):
|
||||||
|
return self._mean(f)*np.exp(-(0.5/sigma2)*np.square(f - mu))
|
||||||
|
scaled_mean, accuracy = quad(int_mean, -np.inf, np.inf)
|
||||||
|
mean = scaled_mean / np.sqrt(2*np.pi*(sigma2))
|
||||||
|
|
||||||
pb.figure()
|
|
||||||
x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)])
|
|
||||||
f = np.array([np.exp(-self._nlog_conditional_mean_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x])
|
|
||||||
pb.plot(x,f,'b-')
|
|
||||||
sigma2 = 1./self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma)
|
|
||||||
f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2)
|
|
||||||
k = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2)
|
|
||||||
pb.plot(x,f2*mean,'r-')
|
|
||||||
pb.vlines(maximum,0,f.max())
|
|
||||||
"""
|
|
||||||
return mean
|
return mean
|
||||||
|
|
||||||
def _predictive_mean_sq(self,mu,sigma):
|
|
||||||
"""
|
|
||||||
Laplace approximation to the predictive mean squared: E(Y_star**2) = E( E(Y_star|f_star)**2 )
|
|
||||||
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_mean_sq_scaled,x0=self._mean(mu),fprime=self._dnlog_exp_conditional_mean_sq_dgp,fhess=self._d2nlog_exp_conditional_mean_sq_dgp2,args=(mu,sigma),disp=False)
|
|
||||||
mean_squared = np.exp(-self._nlog_exp_conditional_mean_sq_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_mean_sq_dgp2(maximum,mu,sigma))*sigma)
|
|
||||||
return mean_squared
|
|
||||||
|
|
||||||
def _predictive_variance_numerical(self,mu,sigma,predictive_mean=None):
|
def _predictive_variance_numerical(self,mu,sigma,predictive_mean=None):
|
||||||
"""
|
"""
|
||||||
Laplace approximation to the predictive variance: V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
|
Laplace approximation to the predictive variance: V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
|
||||||
|
|
||||||
:param mu: cavity distribution mean
|
:param mu: mean of posterior
|
||||||
:param sigma: cavity distribution standard deviation
|
:param sigma: standard deviation of posterior
|
||||||
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
|
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
sigma2 = sigma**2
|
||||||
|
normalizer = np.sqrt(2*np.pi*sigma2)
|
||||||
|
|
||||||
# E( V(Y_star|f_star) )
|
# E( V(Y_star|f_star) )
|
||||||
maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_variance_scaled,x0=self._variance(mu),fprime=self._dnlog_exp_conditional_variance_dgp,fhess=self._d2nlog_exp_conditional_variance_dgp2,args=(mu,sigma),disp=False)
|
#Compute expected value of variance
|
||||||
exp_var = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma))*sigma)
|
def int_var(f):
|
||||||
|
return self._variance(f)*np.exp(-(0.5/sigma2)*np.square(f - mu))
|
||||||
|
scaled_exp_variance, accuracy = quad(int_var, -np.inf, np.inf)
|
||||||
|
exp_var = scaled_exp_variance / normalizer
|
||||||
|
|
||||||
"""
|
#V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star) )**2
|
||||||
pb.figure()
|
|
||||||
x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)])
|
|
||||||
f = np.array([np.exp(-self._nlog_exp_conditional_variance_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x])
|
|
||||||
pb.plot(x,f,'b-')
|
|
||||||
sigma2 = 1./self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma)
|
|
||||||
f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2)
|
|
||||||
k = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2)
|
|
||||||
pb.plot(x,f2*exp_var,'r--')
|
|
||||||
pb.vlines(maximum,0,f.max())
|
|
||||||
"""
|
|
||||||
|
|
||||||
#V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star)**2 )
|
|
||||||
exp_exp2 = self._predictive_mean_sq(mu,sigma)
|
|
||||||
if predictive_mean is None:
|
if predictive_mean is None:
|
||||||
predictive_mean = self.predictive_mean(mu,sigma)
|
predictive_mean = self.predictive_mean(mu,sigma)
|
||||||
|
|
||||||
|
predictive_mean_sq = predictive_mean**2
|
||||||
|
def int_pred_mean_sq(f):
|
||||||
|
return predictive_mean_sq*np.exp(-(0.5/(sigma2))*np.square(f - mu))
|
||||||
|
|
||||||
|
scaled_exp_exp2, accuracy = quad(int_pred_mean_sq, -np.inf, np.inf)
|
||||||
|
exp_exp2 = scaled_exp_exp2 / normalizer
|
||||||
|
|
||||||
var_exp = exp_exp2 - predictive_mean**2
|
var_exp = exp_exp2 - predictive_mean**2
|
||||||
|
# V(Y_star | f_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
|
||||||
return exp_var + var_exp
|
return exp_var + var_exp
|
||||||
|
|
||||||
def _predictive_percentiles(self,p,mu,sigma):
|
def pdf_link(self, link_f, y, extra_data=None):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def logpdf_link(self, link_f, y, extra_data=None):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def dlogpdf_link_dtheta(self, link_f, y, extra_data=None):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def dlogpdf_dlink_dtheta(self, link_f, y, extra_data=None):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def d2logpdf_dlink2_dtheta(self, link_f, y, extra_data=None):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def pdf(self, f, y, extra_data=None):
|
||||||
"""
|
"""
|
||||||
Percentiles of the predictive distribution
|
Evaluates the link function link(f) then computes the likelihood (pdf) using it
|
||||||
|
|
||||||
:parm p: lower tail probability
|
.. math:
|
||||||
:param mu: cavity distribution mean
|
p(y|\\lambda(f))
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
|
|
||||||
|
|
||||||
|
:param f: latent variables f
|
||||||
|
:type f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution - not used
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
qf = stats.norm.ppf(p,mu,sigma)
|
link_f = self.gp_link.transf(f)
|
||||||
return self.gp_link.transf(qf)
|
return self.pdf_link(link_f, y, extra_data=extra_data)
|
||||||
|
|
||||||
def _nlog_joint_predictive_scaled(self,x,mu,sigma):
|
def logpdf(self, f, y, extra_data=None):
|
||||||
"""
|
"""
|
||||||
Negative logarithm of the joint predictive distribution (latent variable and output).
|
Evaluates the link function link(f) then computes the log likelihood (log pdf) using it
|
||||||
|
|
||||||
:param x: tuple (latent variable,output)
|
.. math:
|
||||||
:param mu: latent variable's predictive mean
|
\\log p(y|\\lambda(f))
|
||||||
:param sigma: latent variable's predictive standard deviation
|
|
||||||
|
|
||||||
|
:param f: latent variables f
|
||||||
|
:type f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution - not used
|
||||||
|
:returns: log likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
return self._nlog_product_scaled(x[0],x[1],mu,sigma)
|
link_f = self.gp_link.transf(f)
|
||||||
|
return self.logpdf_link(link_f, y, extra_data=extra_data)
|
||||||
|
|
||||||
def _gradient_nlog_joint_predictive(self,x,mu,sigma):
|
def dlogpdf_df(self, f, y, extra_data=None):
|
||||||
"""
|
"""
|
||||||
Gradient of _nlog_joint_predictive_scaled.
|
Evaluates the link function link(f) then computes the derivative of log likelihood using it
|
||||||
|
Uses the Faa di Bruno's formula for the chain rule
|
||||||
|
|
||||||
:param x: tuple (latent variable,output)
|
.. math::
|
||||||
:param mu: latent variable's predictive mean
|
\\frac{d\\log p(y|\\lambda(f))}{df} = \\frac{d\\log p(y|\\lambda(f))}{d\\lambda(f)}\\frac{d\\lambda(f)}{df}
|
||||||
:param sigma: latent variable's predictive standard deviation
|
|
||||||
|
|
||||||
.. note: Only available when the output is continuous
|
|
||||||
|
|
||||||
|
:param f: latent variables f
|
||||||
|
:type f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution - not used
|
||||||
|
:returns: derivative of log likelihood evaluated for this point
|
||||||
|
:rtype: 1xN array
|
||||||
"""
|
"""
|
||||||
assert not self.discrete, "Gradient not available for discrete outputs."
|
link_f = self.gp_link.transf(f)
|
||||||
return np.array((self._dnlog_product_dgp(gp=x[0],obs=x[1],mu=mu,sigma=sigma),self._dnlog_mass_dobs(obs=x[1],gp=x[0])))
|
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
|
||||||
|
dlink_df = self.gp_link.dtransf_df(f)
|
||||||
|
return chain_1(dlogpdf_dlink, dlink_df)
|
||||||
|
|
||||||
def _hessian_nlog_joint_predictive(self,x,mu,sigma):
|
def d2logpdf_df2(self, f, y, extra_data=None):
|
||||||
"""
|
"""
|
||||||
Hessian of _nlog_joint_predictive_scaled.
|
Evaluates the link function link(f) then computes the second derivative of log likelihood using it
|
||||||
|
Uses the Faa di Bruno's formula for the chain rule
|
||||||
|
|
||||||
:param x: tuple (latent variable,output)
|
.. math::
|
||||||
:param mu: latent variable's predictive mean
|
\\frac{d^{2}\\log p(y|\\lambda(f))}{df^{2}} = \\frac{d^{2}\\log p(y|\\lambda(f))}{d^{2}\\lambda(f)}\\left(\\frac{d\\lambda(f)}{df}\\right)^{2} + \\frac{d\\log p(y|\\lambda(f))}{d\\lambda(f)}\\frac{d^{2}\\lambda(f)}{df^{2}}
|
||||||
:param sigma: latent variable's predictive standard deviation
|
|
||||||
|
|
||||||
.. note: Only available when the output is continuous
|
|
||||||
|
|
||||||
|
:param f: latent variables f
|
||||||
|
:type f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution - not used
|
||||||
|
:returns: second derivative of log likelihood evaluated for this point (diagonal only)
|
||||||
|
:rtype: 1xN array
|
||||||
"""
|
"""
|
||||||
assert not self.discrete, "Hessian not available for discrete outputs."
|
link_f = self.gp_link.transf(f)
|
||||||
cross_derivative = self._d2nlog_mass_dcross(gp=x[0],obs=x[1])
|
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
|
||||||
return np.array((self._d2nlog_product_dgp2(gp=x[0],obs=x[1],mu=mu,sigma=sigma),cross_derivative,cross_derivative,self._d2nlog_mass_dobs2(obs=x[1],gp=x[0]))).reshape(2,2)
|
dlink_df = self.gp_link.dtransf_df(f)
|
||||||
|
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
|
||||||
|
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||||
|
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
|
||||||
|
|
||||||
def _joint_predictive_mode(self,mu,sigma):
|
def d3logpdf_df3(self, f, y, extra_data=None):
|
||||||
"""
|
"""
|
||||||
Negative logarithm of the joint predictive distribution (latent variable and output).
|
Evaluates the link function link(f) then computes the third derivative of log likelihood using it
|
||||||
|
Uses the Faa di Bruno's formula for the chain rule
|
||||||
|
|
||||||
:param x: tuple (latent variable,output)
|
.. math::
|
||||||
:param mu: latent variable's predictive mean
|
\\frac{d^{3}\\log p(y|\\lambda(f))}{df^{3}} = \\frac{d^{3}\\log p(y|\\lambda(f)}{d\\lambda(f)^{3}}\\left(\\frac{d\\lambda(f)}{df}\\right)^{3} + 3\\frac{d^{2}\\log p(y|\\lambda(f)}{d\\lambda(f)^{2}}\\frac{d\\lambda(f)}{df}\\frac{d^{2}\\lambda(f)}{df^{2}} + \\frac{d\\log p(y|\\lambda(f)}{d\\lambda(f)}\\frac{d^{3}\\lambda(f)}{df^{3}}
|
||||||
:param sigma: latent variable's predictive standard deviation
|
|
||||||
|
|
||||||
|
:param f: latent variables f
|
||||||
|
:type f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution - not used
|
||||||
|
:returns: third derivative of log likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.gp_link.transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma),disp=False)
|
link_f = self.gp_link.transf(f)
|
||||||
|
d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, extra_data=extra_data)
|
||||||
|
dlink_df = self.gp_link.dtransf_df(f)
|
||||||
|
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
|
||||||
|
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||||
|
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
|
||||||
|
d3link_df3 = self.gp_link.d3transf_df3(f)
|
||||||
|
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
|
||||||
|
|
||||||
def predictive_values(self,mu,var):
|
def dlogpdf_dtheta(self, f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
TODO: Doc strings
|
||||||
|
"""
|
||||||
|
if len(self._get_param_names()) > 0:
|
||||||
|
link_f = self.gp_link.transf(f)
|
||||||
|
return self.dlogpdf_link_dtheta(link_f, y, extra_data=extra_data)
|
||||||
|
else:
|
||||||
|
#Is no parameters so return an empty array for its derivatives
|
||||||
|
return np.empty([1, 0])
|
||||||
|
|
||||||
|
def dlogpdf_df_dtheta(self, f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
TODO: Doc strings
|
||||||
|
"""
|
||||||
|
if len(self._get_param_names()) > 0:
|
||||||
|
link_f = self.gp_link.transf(f)
|
||||||
|
dlink_df = self.gp_link.dtransf_df(f)
|
||||||
|
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
|
||||||
|
return chain_1(dlogpdf_dlink_dtheta, dlink_df)
|
||||||
|
else:
|
||||||
|
#Is no parameters so return an empty array for its derivatives
|
||||||
|
return np.empty([f.shape[0], 0])
|
||||||
|
|
||||||
|
def d2logpdf_df2_dtheta(self, f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
TODO: Doc strings
|
||||||
|
"""
|
||||||
|
if len(self._get_param_names()) > 0:
|
||||||
|
link_f = self.gp_link.transf(f)
|
||||||
|
dlink_df = self.gp_link.dtransf_df(f)
|
||||||
|
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||||
|
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, extra_data=extra_data)
|
||||||
|
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
|
||||||
|
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
|
||||||
|
else:
|
||||||
|
#Is no parameters so return an empty array for its derivatives
|
||||||
|
return np.empty([f.shape[0], 0])
|
||||||
|
|
||||||
|
def _laplace_gradients(self, f, y, extra_data=None):
|
||||||
|
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, extra_data=extra_data)
|
||||||
|
dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, extra_data=extra_data)
|
||||||
|
d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, extra_data=extra_data)
|
||||||
|
|
||||||
|
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
|
||||||
|
# ensure we have gradients for every parameter we want to optimize
|
||||||
|
assert dlogpdf_dtheta.shape[1] == len(self._get_param_names())
|
||||||
|
assert dlogpdf_df_dtheta.shape[1] == len(self._get_param_names())
|
||||||
|
assert d2logpdf_df2_dtheta.shape[1] == len(self._get_param_names())
|
||||||
|
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
|
||||||
|
|
||||||
|
def predictive_values(self, mu, var, full_cov=False, num_samples=30000,
|
||||||
|
sampling=False):
|
||||||
"""
|
"""
|
||||||
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction.
|
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction.
|
||||||
|
|
||||||
:param mu: mean of the latent variable
|
:param mu: mean of the latent variable, f, of posterior
|
||||||
:param var: variance of the latent variable
|
:param var: variance of the latent variable, f, of posterior
|
||||||
|
:param full_cov: whether to use the full covariance or just the diagonal
|
||||||
|
:type full_cov: Boolean
|
||||||
|
:param num_samples: number of samples to use in computing quantiles and
|
||||||
|
possibly mean variance
|
||||||
|
:type num_samples: integer
|
||||||
|
:param sampling: Whether to use samples for mean and variances anyway
|
||||||
|
:type sampling: Boolean
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if isinstance(mu,float) or isinstance(mu,int):
|
|
||||||
mu = [mu]
|
#Get gp_samples f* using posterior mean and variance
|
||||||
var = [var]
|
if not full_cov:
|
||||||
pred_mean = []
|
gp_samples = np.random.multivariate_normal(mu.flatten(), np.diag(var.flatten()),
|
||||||
pred_var = []
|
size=num_samples).T
|
||||||
q1 = []
|
else:
|
||||||
q3 = []
|
gp_samples = np.random.multivariate_normal(mu.flatten(), var,
|
||||||
for m,s in zip(mu,np.sqrt(var)):
|
size=num_samples).T
|
||||||
pred_mean.append(self.predictive_mean(m,s))
|
|
||||||
pred_var.append(self.predictive_variance(m,s,pred_mean[-1]))
|
#Push gp samples (f*) through likelihood to give p(y*|f*)
|
||||||
q1.append(self._predictive_percentiles(.025,m,s))
|
samples = self.samples(gp_samples)
|
||||||
q3.append(self._predictive_percentiles(.975,m,s))
|
axis=-1
|
||||||
|
|
||||||
|
if self.analytical_mean and not sampling:
|
||||||
|
pred_mean = self.predictive_mean(mu, np.sqrt(var))
|
||||||
|
else:
|
||||||
|
pred_mean = np.mean(samples, axis=axis)
|
||||||
|
|
||||||
|
if self.analytical_variance and not sampling:
|
||||||
|
pred_var = self.predictive_variance(mu, np.sqrt(var), pred_mean)
|
||||||
|
else:
|
||||||
|
pred_var = np.var(samples, axis=axis)
|
||||||
|
|
||||||
|
#Calculate quantiles from samples
|
||||||
|
q1 = np.percentile(samples, 2.5, axis=axis)
|
||||||
|
q3 = np.percentile(samples, 97.5, axis=axis)
|
||||||
|
print "WARNING: Using sampling to calculate predictive quantiles"
|
||||||
|
|
||||||
pred_mean = np.vstack(pred_mean)
|
pred_mean = np.vstack(pred_mean)
|
||||||
pred_var = np.vstack(pred_var)
|
pred_var = np.vstack(pred_var)
|
||||||
q1 = np.vstack(q1)
|
q1 = np.vstack(q1)
|
||||||
q3 = np.vstack(q3)
|
q3 = np.vstack(q3)
|
||||||
return pred_mean, pred_var, q1, q3
|
return pred_mean, pred_var, q1, q3
|
||||||
|
|
||||||
|
|
||||||
def samples(self, gp):
|
def samples(self, gp):
|
||||||
"""
|
"""
|
||||||
Returns a set of samples of observations based on a given value of the latent variable.
|
Returns a set of samples of observations based on a given value of the latent variable.
|
||||||
|
|
||||||
:param gp: latent variable
|
:param gp: latent variable
|
||||||
"""
|
"""
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
|
from __future__ import division
|
||||||
# Copyright (c) 2012, 2013 Ricardo Andrade
|
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy import stats,special
|
from scipy import stats,special
|
||||||
import scipy as sp
|
import scipy as sp
|
||||||
|
|
@ -14,9 +14,10 @@ class Poisson(NoiseDistribution):
|
||||||
Poisson likelihood
|
Poisson likelihood
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
L(x) = \\exp(\\lambda) * \\frac{\\lambda^Y_i}{Y_i!}
|
p(y_{i}|\\lambda(f_{i})) = \\frac{\\lambda(f_{i})^{y_{i}}}{y_{i}!}e^{-\\lambda(f_{i})}
|
||||||
|
|
||||||
..Note: Y is expected to take values in {0,1,2,...}
|
.. Note::
|
||||||
|
Y is expected to take values in {0,1,2,...}
|
||||||
"""
|
"""
|
||||||
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
|
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
|
||||||
super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance)
|
super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||||
|
|
@ -24,25 +25,108 @@ class Poisson(NoiseDistribution):
|
||||||
def _preprocess_values(self,Y): #TODO
|
def _preprocess_values(self,Y): #TODO
|
||||||
return Y
|
return Y
|
||||||
|
|
||||||
def _mass(self,gp,obs):
|
def pdf_link(self, link_f, y, extra_data=None):
|
||||||
"""
|
"""
|
||||||
Mass (or density) function
|
Likelihood function given link(f)
|
||||||
"""
|
|
||||||
return stats.poisson.pmf(obs,self.gp_link.transf(gp))
|
|
||||||
|
|
||||||
def _nlog_mass(self,gp,obs):
|
.. math::
|
||||||
"""
|
p(y_{i}|\\lambda(f_{i})) = \\frac{\\lambda(f_{i})^{y_{i}}}{y_{i}!}e^{-\\lambda(f_{i})}
|
||||||
Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
|
|
||||||
"""
|
|
||||||
return self.gp_link.transf(gp) - obs * np.log(self.gp_link.transf(gp)) + np.log(special.gamma(obs+1))
|
|
||||||
|
|
||||||
def _dnlog_mass_dgp(self,gp,obs):
|
:param link_f: latent variables link(f)
|
||||||
return self.gp_link.dtransf_df(gp) * (1. - obs/self.gp_link.transf(gp))
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in poisson distribution
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
return np.prod(stats.poisson.pmf(y,link_f))
|
||||||
|
|
||||||
def _d2nlog_mass_dgp2(self,gp,obs):
|
def logpdf_link(self, link_f, y, extra_data=None):
|
||||||
d2_df = self.gp_link.d2transf_df2(gp)
|
"""
|
||||||
transf = self.gp_link.transf(gp)
|
Log Likelihood Function given link(f)
|
||||||
return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
|
|
||||||
|
.. math::
|
||||||
|
\\ln p(y_{i}|\lambda(f_{i})) = -\\lambda(f_{i}) + y_{i}\\log \\lambda(f_{i}) - \\log y_{i}!
|
||||||
|
|
||||||
|
:param link_f: latent variables (link(f))
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in poisson distribution
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1))
|
||||||
|
|
||||||
|
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - 1
|
||||||
|
|
||||||
|
:param link_f: latent variables (f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in poisson distribution
|
||||||
|
:returns: gradient of likelihood evaluated at points
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
return y/link_f - 1
|
||||||
|
|
||||||
|
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Hessian at y, given link(f), w.r.t link(f)
|
||||||
|
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||||
|
The hessian will be 0 unless i == j
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{-y_{i}}{\\lambda(f_{i})^{2}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in poisson distribution
|
||||||
|
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
|
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
hess = -y/(link_f**2)
|
||||||
|
return hess
|
||||||
|
#d2_df = self.gp_link.d2transf_df2(gp)
|
||||||
|
#transf = self.gp_link.transf(gp)
|
||||||
|
#return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
|
||||||
|
|
||||||
|
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f_{i})^{3}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in poisson distribution
|
||||||
|
:returns: third derivative of likelihood evaluated at points f
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
d3lik_dlink3 = 2*y/(link_f)**3
|
||||||
|
return d3lik_dlink3
|
||||||
|
|
||||||
def _mean(self,gp):
|
def _mean(self,gp):
|
||||||
"""
|
"""
|
||||||
|
|
@ -50,20 +134,19 @@ class Poisson(NoiseDistribution):
|
||||||
"""
|
"""
|
||||||
return self.gp_link.transf(gp)
|
return self.gp_link.transf(gp)
|
||||||
|
|
||||||
def _dmean_dgp(self,gp):
|
|
||||||
return self.gp_link.dtransf_df(gp)
|
|
||||||
|
|
||||||
def _d2mean_dgp2(self,gp):
|
|
||||||
return self.gp_link.d2transf_df2(gp)
|
|
||||||
|
|
||||||
def _variance(self,gp):
|
def _variance(self,gp):
|
||||||
"""
|
"""
|
||||||
Mass (or density) function
|
Mass (or density) function
|
||||||
"""
|
"""
|
||||||
return self.gp_link.transf(gp)
|
return self.gp_link.transf(gp)
|
||||||
|
|
||||||
def _dvariance_dgp(self,gp):
|
def samples(self, gp):
|
||||||
return self.gp_link.dtransf_df(gp)
|
"""
|
||||||
|
Returns a set of samples of observations based on a given value of the latent variable.
|
||||||
|
|
||||||
def _d2variance_dgp2(self,gp):
|
:param gp: latent variable
|
||||||
return self.gp_link.d2transf_df2(gp)
|
"""
|
||||||
|
orig_shape = gp.shape
|
||||||
|
gp = gp.flatten()
|
||||||
|
Ysim = np.random.poisson(self.gp_link.transf(gp))
|
||||||
|
return Ysim.reshape(orig_shape)
|
||||||
|
|
|
||||||
277
GPy/likelihoods/noise_models/student_t_noise.py
Normal file
277
GPy/likelihoods/noise_models/student_t_noise.py
Normal file
|
|
@ -0,0 +1,277 @@
|
||||||
|
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats, special
|
||||||
|
import scipy as sp
|
||||||
|
import gp_transformations
|
||||||
|
from noise_distributions import NoiseDistribution
|
||||||
|
from scipy import stats, integrate
|
||||||
|
from scipy.special import gammaln, gamma
|
||||||
|
|
||||||
|
class StudentT(NoiseDistribution):
|
||||||
|
"""
|
||||||
|
Student T likelihood
|
||||||
|
|
||||||
|
For nomanclature see Bayesian Data Analysis 2003 p576
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self,gp_link=None,analytical_mean=True,analytical_variance=True, deg_free=5, sigma2=2):
|
||||||
|
self.v = deg_free
|
||||||
|
self.sigma2 = sigma2
|
||||||
|
|
||||||
|
self._set_params(np.asarray(sigma2))
|
||||||
|
super(StudentT, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||||
|
self.log_concave = False
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
return np.asarray(self.sigma2)
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
return ["t_noise_std2"]
|
||||||
|
|
||||||
|
def _set_params(self, x):
|
||||||
|
self.sigma2 = float(x)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def variance(self, extra_data=None):
|
||||||
|
return (self.v / float(self.v - 2)) * self.sigma2
|
||||||
|
|
||||||
|
def pdf_link(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Likelihood function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
e = y - link_f
|
||||||
|
#Careful gamma(big_number) is infinity!
|
||||||
|
objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5))
|
||||||
|
/ (np.sqrt(self.v * np.pi * self.sigma2)))
|
||||||
|
* ((1 + (1./float(self.v))*((e**2)/float(self.sigma2)))**(-0.5*(self.v + 1)))
|
||||||
|
)
|
||||||
|
return np.prod(objective)
|
||||||
|
|
||||||
|
def logpdf_link(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Log Likelihood Function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)
|
||||||
|
|
||||||
|
:param link_f: latent variables (link(f))
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
e = y - link_f
|
||||||
|
objective = (+ gammaln((self.v + 1) * 0.5)
|
||||||
|
- gammaln(self.v * 0.5)
|
||||||
|
- 0.5*np.log(self.sigma2 * self.v * np.pi)
|
||||||
|
- 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
|
||||||
|
)
|
||||||
|
return np.sum(objective)
|
||||||
|
|
||||||
|
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v}
|
||||||
|
|
||||||
|
:param link_f: latent variables (f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: gradient of likelihood evaluated at points
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
e = y - link_f
|
||||||
|
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
|
||||||
|
return grad
|
||||||
|
|
||||||
|
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Hessian at y, given link(f), w.r.t link(f)
|
||||||
|
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||||
|
The hessian will be 0 unless i == j
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
|
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
e = y - link_f
|
||||||
|
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
|
||||||
|
return hess
|
||||||
|
|
||||||
|
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: third derivative of likelihood evaluated at points f
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
e = y - link_f
|
||||||
|
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
|
||||||
|
((e**2 + self.sigma2*self.v)**3)
|
||||||
|
)
|
||||||
|
return d3lik_dlink3
|
||||||
|
|
||||||
|
def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
e = y - link_f
|
||||||
|
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
|
||||||
|
return np.sum(dlogpdf_dvar)
|
||||||
|
|
||||||
|
def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2}
|
||||||
|
|
||||||
|
:param link_f: latent variables link_f
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
e = y - link_f
|
||||||
|
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
|
||||||
|
return dlogpdf_dlink_dvar
|
||||||
|
|
||||||
|
def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
e = y - link_f
|
||||||
|
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
|
||||||
|
/ ((self.sigma2*self.v + (e**2))**3)
|
||||||
|
)
|
||||||
|
return d2logpdf_dlink2_dvar
|
||||||
|
|
||||||
|
def dlogpdf_link_dtheta(self, f, y, extra_data=None):
|
||||||
|
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
|
||||||
|
return np.asarray([[dlogpdf_dvar]])
|
||||||
|
|
||||||
|
def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
|
||||||
|
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
|
||||||
|
return dlogpdf_dlink_dvar
|
||||||
|
|
||||||
|
def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
|
||||||
|
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
|
||||||
|
return d2logpdf_dlink2_dvar
|
||||||
|
|
||||||
|
def _predictive_variance_analytical(self, mu, sigma, predictive_mean=None):
|
||||||
|
"""
|
||||||
|
Compute predictive variance of student_t*normal p(y*|f*)p(f*)
|
||||||
|
|
||||||
|
Need to find what the variance is at the latent points for a student t*normal p(y*|f*)p(f*)
|
||||||
|
(((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))
|
||||||
|
*((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2)))
|
||||||
|
"""
|
||||||
|
|
||||||
|
#FIXME: Not correct
|
||||||
|
#We want the variance around test points y which comes from int p(y*|f*)p(f*) df*
|
||||||
|
#Var(y*) = Var(E[y*|f*]) + E[Var(y*|f*)]
|
||||||
|
#Since we are given f* (mu) which is our mean (expected) value of y*|f* then the variance is the variance around this
|
||||||
|
#Which was also given to us as (var)
|
||||||
|
#We also need to know the expected variance of y* around samples f*, this is the variance of the student t distribution
|
||||||
|
#However the variance of the student t distribution is not dependent on f, only on sigma and the degrees of freedom
|
||||||
|
true_var = 1/(1/sigma**2 + 1/self.variance)
|
||||||
|
|
||||||
|
return true_var
|
||||||
|
|
||||||
|
def _predictive_mean_analytical(self, mu, sigma):
|
||||||
|
"""
|
||||||
|
Compute mean of the prediction
|
||||||
|
"""
|
||||||
|
#FIXME: Not correct
|
||||||
|
return mu
|
||||||
|
|
||||||
|
def samples(self, gp):
|
||||||
|
"""
|
||||||
|
Returns a set of samples of observations based on a given value of the latent variable.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
"""
|
||||||
|
orig_shape = gp.shape
|
||||||
|
gp = gp.flatten()
|
||||||
|
#FIXME: Very slow as we are computing a new random variable per input!
|
||||||
|
#Can't get it to sample all at the same time
|
||||||
|
#student_t_samples = np.array([stats.t.rvs(self.v, self.gp_link.transf(gpj),scale=np.sqrt(self.sigma2), size=1) for gpj in gp])
|
||||||
|
dfs = np.ones_like(gp)*self.v
|
||||||
|
scales = np.ones_like(gp)*np.sqrt(self.sigma2)
|
||||||
|
student_t_samples = stats.t.rvs(dfs, loc=self.gp_link.transf(gp),
|
||||||
|
scale=scales)
|
||||||
|
return student_t_samples.reshape(orig_shape)
|
||||||
|
|
@ -49,18 +49,6 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
||||||
SparseGP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs)
|
SparseGP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs)
|
||||||
self.ensure_default_constraints()
|
self.ensure_default_constraints()
|
||||||
|
|
||||||
def getstate(self):
|
|
||||||
"""
|
|
||||||
Get the current state of the class,
|
|
||||||
here just all the indices, rest can get recomputed
|
|
||||||
"""
|
|
||||||
return SparseGP.getstate(self) + [self.init]
|
|
||||||
|
|
||||||
def setstate(self, state):
|
|
||||||
self._const_jitter = None
|
|
||||||
self.init = state.pop()
|
|
||||||
SparseGP.setstate(self, state)
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
def _get_param_names(self):
|
||||||
X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
|
X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
|
||||||
S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
|
S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
|
||||||
|
|
@ -285,6 +273,19 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
||||||
fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
|
fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
|
def getstate(self):
|
||||||
|
"""
|
||||||
|
Get the current state of the class,
|
||||||
|
here just all the indices, rest can get recomputed
|
||||||
|
"""
|
||||||
|
return SparseGP.getstate(self) + [self.init]
|
||||||
|
|
||||||
|
def setstate(self, state):
|
||||||
|
self._const_jitter = None
|
||||||
|
self.init = state.pop()
|
||||||
|
SparseGP.setstate(self, state)
|
||||||
|
|
||||||
|
|
||||||
def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
|
def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
|
||||||
"""
|
"""
|
||||||
objective function for fitting the latent variables for test points
|
objective function for fitting the latent variables for test points
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ import pylab as pb
|
||||||
import sys, pdb
|
import sys, pdb
|
||||||
from ..core import GP
|
from ..core import GP
|
||||||
from ..models import GPLVM
|
from ..models import GPLVM
|
||||||
from ..mappings import *
|
from ..mappings import Kernel
|
||||||
|
|
||||||
|
|
||||||
class BCGPLVM(GPLVM):
|
class BCGPLVM(GPLVM):
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ class FITCClassification(FITC):
|
||||||
|
|
||||||
:param X: input observations
|
:param X: input observations
|
||||||
:param Y: observed values
|
:param Y: observed values
|
||||||
:param likelihood: a GPy likelihood, defaults to Binomial with probit link function
|
:param likelihood: a GPy likelihood, defaults to Bernoulli with probit link function
|
||||||
:param kernel: a GPy kernel, defaults to rbf+white
|
:param kernel: a GPy kernel, defaults to rbf+white
|
||||||
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
||||||
:type normalize_X: False|True
|
:type normalize_X: False|True
|
||||||
|
|
@ -31,7 +31,7 @@ class FITCClassification(FITC):
|
||||||
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
|
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
|
||||||
|
|
||||||
if likelihood is None:
|
if likelihood is None:
|
||||||
noise_model = likelihoods.binomial()
|
noise_model = likelihoods.bernoulli()
|
||||||
likelihood = likelihoods.EP(Y, noise_model)
|
likelihood = likelihoods.EP(Y, noise_model)
|
||||||
elif Y is not None:
|
elif Y is not None:
|
||||||
if not all(Y.flatten() == likelihood.data.flatten()):
|
if not all(Y.flatten() == likelihood.data.flatten()):
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ class GPClassification(GP):
|
||||||
|
|
||||||
:param X: input observations
|
:param X: input observations
|
||||||
:param Y: observed values, can be None if likelihood is not None
|
:param Y: observed values, can be None if likelihood is not None
|
||||||
:param likelihood: a GPy likelihood, defaults to Binomial with probit link_function
|
:param likelihood: a GPy likelihood, defaults to Bernoulli with Probit link_function
|
||||||
:param kernel: a GPy kernel, defaults to rbf
|
:param kernel: a GPy kernel, defaults to rbf
|
||||||
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
||||||
:type normalize_X: False|True
|
:type normalize_X: False|True
|
||||||
|
|
@ -31,7 +31,7 @@ class GPClassification(GP):
|
||||||
kernel = kern.rbf(X.shape[1])
|
kernel = kern.rbf(X.shape[1])
|
||||||
|
|
||||||
if likelihood is None:
|
if likelihood is None:
|
||||||
noise_model = likelihoods.binomial()
|
noise_model = likelihoods.bernoulli()
|
||||||
likelihood = likelihoods.EP(Y, noise_model)
|
likelihood = likelihoods.EP(Y, noise_model)
|
||||||
elif Y is not None:
|
elif Y is not None:
|
||||||
if not all(Y.flatten() == likelihood.data.flatten()):
|
if not all(Y.flatten() == likelihood.data.flatten()):
|
||||||
|
|
|
||||||
|
|
@ -25,11 +25,12 @@ class GPRegression(GP):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False):
|
def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, likelihood=None):
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.rbf(X.shape[1])
|
kernel = kern.rbf(X.shape[1])
|
||||||
|
|
||||||
likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y)
|
if likelihood is None:
|
||||||
|
likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y)
|
||||||
|
|
||||||
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
||||||
self.ensure_default_constraints()
|
self.ensure_default_constraints()
|
||||||
|
|
@ -39,5 +40,3 @@ class GPRegression(GP):
|
||||||
|
|
||||||
def setstate(self, state):
|
def setstate(self, state):
|
||||||
return GP.setstate(self, state)
|
return GP.setstate(self, state)
|
||||||
|
|
||||||
pass
|
|
||||||
|
|
|
||||||
|
|
@ -44,12 +44,6 @@ class GPLVM(GP):
|
||||||
Xr[:PC.shape[0], :PC.shape[1]] = PC
|
Xr[:PC.shape[0], :PC.shape[1]] = PC
|
||||||
return Xr
|
return Xr
|
||||||
|
|
||||||
def getstate(self):
|
|
||||||
return GP.getstate(self)
|
|
||||||
|
|
||||||
def setstate(self, state):
|
|
||||||
GP.setstate(self, state)
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
def _get_param_names(self):
|
||||||
return sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) + GP._get_param_names(self)
|
return sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) + GP._get_param_names(self)
|
||||||
|
|
||||||
|
|
@ -68,7 +62,7 @@ class GPLVM(GP):
|
||||||
def jacobian(self,X):
|
def jacobian(self,X):
|
||||||
target = np.zeros((X.shape[0],X.shape[1],self.output_dim))
|
target = np.zeros((X.shape[0],X.shape[1],self.output_dim))
|
||||||
for i in range(self.output_dim):
|
for i in range(self.output_dim):
|
||||||
target[:,:,i]=self.kern.dK_dX(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X)
|
target[:,:,i] = self.kern.dK_dX(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X)
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def magnification(self,X):
|
def magnification(self,X):
|
||||||
|
|
@ -91,3 +85,11 @@ class GPLVM(GP):
|
||||||
|
|
||||||
def plot_magnification(self, *args, **kwargs):
|
def plot_magnification(self, *args, **kwargs):
|
||||||
return util.plot_latent.plot_magnification(self, *args, **kwargs)
|
return util.plot_latent.plot_magnification(self, *args, **kwargs)
|
||||||
|
|
||||||
|
def getstate(self):
|
||||||
|
return GP.getstate(self)
|
||||||
|
|
||||||
|
def setstate(self, state):
|
||||||
|
GP.setstate(self, state)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -75,14 +75,14 @@ class GradientChecker(Model):
|
||||||
self.names = names
|
self.names = names
|
||||||
self.shapes = [get_shape(x0)]
|
self.shapes = [get_shape(x0)]
|
||||||
for name, xi in zip(self.names, at_least_one_element(x0)):
|
for name, xi in zip(self.names, at_least_one_element(x0)):
|
||||||
self.__setattr__(name, xi)
|
self.__setattr__(name, numpy.float_(xi))
|
||||||
# self._param_names = []
|
# self._param_names = []
|
||||||
# for name, shape in zip(self.names, self.shapes):
|
# for name, shape in zip(self.names, self.shapes):
|
||||||
# self._param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
|
# self._param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
|
||||||
self.args = args
|
self.args = args
|
||||||
self.kwargs = kwargs
|
self.kwargs = kwargs
|
||||||
self.f = f
|
self._f = f
|
||||||
self.df = df
|
self._df = df
|
||||||
|
|
||||||
def _get_x(self):
|
def _get_x(self):
|
||||||
if len(self.names) > 1:
|
if len(self.names) > 1:
|
||||||
|
|
@ -90,10 +90,10 @@ class GradientChecker(Model):
|
||||||
return [self.__getattribute__(self.names[0])] + list(self.args)
|
return [self.__getattribute__(self.names[0])] + list(self.args)
|
||||||
|
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
return float(numpy.sum(self.f(*self._get_x(), **self.kwargs)))
|
return float(numpy.sum(self._f(*self._get_x(), **self.kwargs)))
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
def _log_likelihood_gradients(self):
|
||||||
return numpy.atleast_1d(self.df(*self._get_x(), **self.kwargs)).flatten()
|
return numpy.atleast_1d(self._df(*self._get_x(), **self.kwargs)).flatten()
|
||||||
|
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
|
|
|
||||||
|
|
@ -81,29 +81,6 @@ class MRD(Model):
|
||||||
Model.__init__(self)
|
Model.__init__(self)
|
||||||
self.ensure_default_constraints()
|
self.ensure_default_constraints()
|
||||||
|
|
||||||
def getstate(self):
|
|
||||||
return Model.getstate(self) + [self.names,
|
|
||||||
self.bgplvms,
|
|
||||||
self.gref,
|
|
||||||
self.nparams,
|
|
||||||
self.input_dim,
|
|
||||||
self.num_inducing,
|
|
||||||
self.num_data,
|
|
||||||
self.NQ,
|
|
||||||
self.MQ]
|
|
||||||
|
|
||||||
def setstate(self, state):
|
|
||||||
self.MQ = state.pop()
|
|
||||||
self.NQ = state.pop()
|
|
||||||
self.num_data = state.pop()
|
|
||||||
self.num_inducing = state.pop()
|
|
||||||
self.input_dim = state.pop()
|
|
||||||
self.nparams = state.pop()
|
|
||||||
self.gref = state.pop()
|
|
||||||
self.bgplvms = state.pop()
|
|
||||||
self.names = state.pop()
|
|
||||||
Model.setstate(self, state)
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def X(self):
|
def X(self):
|
||||||
return self.gref.X
|
return self.gref.X
|
||||||
|
|
@ -211,8 +188,8 @@ class MRD(Model):
|
||||||
# g.Z = Z.reshape(self.num_inducing, self.input_dim)
|
# g.Z = Z.reshape(self.num_inducing, self.input_dim)
|
||||||
#
|
#
|
||||||
# def _set_kern_params(self, g, p):
|
# def _set_kern_params(self, g, p):
|
||||||
# g.kern._set_params(p[:g.kern.Nparam])
|
# g.kern._set_params(p[:g.kern.num_params])
|
||||||
# g.likelihood._set_params(p[g.kern.Nparam:])
|
# g.likelihood._set_params(p[g.kern.num_params:])
|
||||||
|
|
||||||
def _set_params(self, x):
|
def _set_params(self, x):
|
||||||
start = 0; end = self.NQ
|
start = 0; end = self.NQ
|
||||||
|
|
@ -371,4 +348,28 @@ class MRD(Model):
|
||||||
pylab.draw()
|
pylab.draw()
|
||||||
fig.tight_layout()
|
fig.tight_layout()
|
||||||
|
|
||||||
|
def getstate(self):
|
||||||
|
return Model.getstate(self) + [self.names,
|
||||||
|
self.bgplvms,
|
||||||
|
self.gref,
|
||||||
|
self.nparams,
|
||||||
|
self.input_dim,
|
||||||
|
self.num_inducing,
|
||||||
|
self.num_data,
|
||||||
|
self.NQ,
|
||||||
|
self.MQ]
|
||||||
|
|
||||||
|
def setstate(self, state):
|
||||||
|
self.MQ = state.pop()
|
||||||
|
self.NQ = state.pop()
|
||||||
|
self.num_data = state.pop()
|
||||||
|
self.num_inducing = state.pop()
|
||||||
|
self.input_dim = state.pop()
|
||||||
|
self.nparams = state.pop()
|
||||||
|
self.gref = state.pop()
|
||||||
|
self.bgplvms = state.pop()
|
||||||
|
self.names = state.pop()
|
||||||
|
Model.setstate(self, state)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ class SparseGPClassification(SparseGP):
|
||||||
|
|
||||||
:param X: input observations
|
:param X: input observations
|
||||||
:param Y: observed values
|
:param Y: observed values
|
||||||
:param likelihood: a GPy likelihood, defaults to Binomial with probit link_function
|
:param likelihood: a GPy likelihood, defaults to Bernoulli with probit link_function
|
||||||
:param kernel: a GPy kernel, defaults to rbf+white
|
:param kernel: a GPy kernel, defaults to rbf+white
|
||||||
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
||||||
:type normalize_X: False|True
|
:type normalize_X: False|True
|
||||||
|
|
@ -31,7 +31,7 @@ class SparseGPClassification(SparseGP):
|
||||||
kernel = kern.rbf(X.shape[1])# + kern.white(X.shape[1],1e-3)
|
kernel = kern.rbf(X.shape[1])# + kern.white(X.shape[1],1e-3)
|
||||||
|
|
||||||
if likelihood is None:
|
if likelihood is None:
|
||||||
noise_model = likelihoods.binomial()
|
noise_model = likelihoods.bernoulli()
|
||||||
likelihood = likelihoods.EP(Y, noise_model)
|
likelihood = likelihoods.EP(Y, noise_model)
|
||||||
elif Y is not None:
|
elif Y is not None:
|
||||||
if not all(Y.flatten() == likelihood.data.flatten()):
|
if not all(Y.flatten() == likelihood.data.flatten()):
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ class SVIGPRegression(SVIGP):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, q_u=None, batchsize=10):
|
def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, q_u=None, batchsize=10, normalize_Y=False):
|
||||||
# kern defaults to rbf (plus white for stability)
|
# kern defaults to rbf (plus white for stability)
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.rbf(X.shape[1], variance=1., lengthscale=4.) + kern.white(X.shape[1], 1e-3)
|
kernel = kern.rbf(X.shape[1], variance=1., lengthscale=4.) + kern.white(X.shape[1], 1e-3)
|
||||||
|
|
@ -38,7 +38,7 @@ class SVIGPRegression(SVIGP):
|
||||||
assert Z.shape[1] == X.shape[1]
|
assert Z.shape[1] == X.shape[1]
|
||||||
|
|
||||||
# likelihood defaults to Gaussian
|
# likelihood defaults to Gaussian
|
||||||
likelihood = likelihoods.Gaussian(Y, normalize=False)
|
likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y)
|
||||||
|
|
||||||
SVIGP.__init__(self, X, likelihood, kernel, Z, q_u=q_u, batchsize=batchsize)
|
SVIGP.__init__(self, X, likelihood, kernel, Z, q_u=q_u, batchsize=batchsize)
|
||||||
self.load_batch()
|
self.load_batch()
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,7 @@ def model_checkgrads(model):
|
||||||
|
|
||||||
def model_instance(model):
|
def model_instance(model):
|
||||||
#assert isinstance(model, GPy.core.model)
|
#assert isinstance(model, GPy.core.model)
|
||||||
return isinstance(model, GPy.core.model)
|
return isinstance(model, GPy.core.model.Model)
|
||||||
|
|
||||||
@nottest
|
@nottest
|
||||||
def test_models():
|
def test_models():
|
||||||
|
|
@ -54,7 +54,7 @@ def test_models():
|
||||||
print "After"
|
print "After"
|
||||||
print functions
|
print functions
|
||||||
for example in functions:
|
for example in functions:
|
||||||
if example[0] in ['oil', 'silhouette', 'GPLVM_oil_100']:
|
if example[0] in ['oil', 'silhouette', 'GPLVM_oil_100', 'brendan_faces']:
|
||||||
print "SKIPPING"
|
print "SKIPPING"
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
|
||||||
61
GPy/testing/gp_transformation_tests.py
Normal file
61
GPy/testing/gp_transformation_tests.py
Normal file
|
|
@ -0,0 +1,61 @@
|
||||||
|
from nose.tools import with_setup
|
||||||
|
from GPy.models import GradientChecker
|
||||||
|
from GPy.likelihoods.noise_models import gp_transformations
|
||||||
|
import inspect
|
||||||
|
import unittest
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
class TestTransformations(object):
|
||||||
|
"""
|
||||||
|
Generic transformations checker
|
||||||
|
"""
|
||||||
|
def setUp(self):
|
||||||
|
N = 30
|
||||||
|
self.fs = [np.random.rand(N, 1), float(np.random.rand(1))]
|
||||||
|
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.fs = None
|
||||||
|
|
||||||
|
def test_transformations(self):
|
||||||
|
self.setUp()
|
||||||
|
transformations = [gp_transformations.Identity(),
|
||||||
|
gp_transformations.Log(),
|
||||||
|
gp_transformations.Probit(),
|
||||||
|
gp_transformations.Log_ex_1(),
|
||||||
|
gp_transformations.Reciprocal(),
|
||||||
|
]
|
||||||
|
|
||||||
|
for transformation in transformations:
|
||||||
|
for f in self.fs:
|
||||||
|
yield self.t_dtransf_df, transformation, f
|
||||||
|
yield self.t_d2transf_df2, transformation, f
|
||||||
|
yield self.t_d3transf_df3, transformation, f
|
||||||
|
|
||||||
|
@with_setup(setUp, tearDown)
|
||||||
|
def t_dtransf_df(self, transformation, f):
|
||||||
|
print "\n{}".format(inspect.stack()[0][3])
|
||||||
|
grad = GradientChecker(transformation.transf, transformation.dtransf_df, f, 'f')
|
||||||
|
grad.randomize()
|
||||||
|
grad.checkgrad(verbose=1)
|
||||||
|
assert grad.checkgrad()
|
||||||
|
|
||||||
|
@with_setup(setUp, tearDown)
|
||||||
|
def t_d2transf_df2(self, transformation, f):
|
||||||
|
print "\n{}".format(inspect.stack()[0][3])
|
||||||
|
grad = GradientChecker(transformation.dtransf_df, transformation.d2transf_df2, f, 'f')
|
||||||
|
grad.randomize()
|
||||||
|
grad.checkgrad(verbose=1)
|
||||||
|
assert grad.checkgrad()
|
||||||
|
|
||||||
|
@with_setup(setUp, tearDown)
|
||||||
|
def t_d3transf_df3(self, transformation, f):
|
||||||
|
print "\n{}".format(inspect.stack()[0][3])
|
||||||
|
grad = GradientChecker(transformation.d2transf_df2, transformation.d3transf_df3, f, 'f')
|
||||||
|
grad.randomize()
|
||||||
|
grad.checkgrad(verbose=1)
|
||||||
|
assert grad.checkgrad()
|
||||||
|
|
||||||
|
#if __name__ == "__main__":
|
||||||
|
#print "Running unit tests"
|
||||||
|
#unittest.main()
|
||||||
|
|
@ -7,6 +7,13 @@ import GPy
|
||||||
|
|
||||||
verbose = False
|
verbose = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
import sympy
|
||||||
|
SYMPY_AVAILABLE=True
|
||||||
|
except ImportError:
|
||||||
|
SYMPY_AVAILABLE=False
|
||||||
|
|
||||||
|
|
||||||
class KernelTests(unittest.TestCase):
|
class KernelTests(unittest.TestCase):
|
||||||
def test_kerneltie(self):
|
def test_kerneltie(self):
|
||||||
K = GPy.kern.rbf(5, ARD=True)
|
K = GPy.kern.rbf(5, ARD=True)
|
||||||
|
|
@ -22,7 +29,16 @@ class KernelTests(unittest.TestCase):
|
||||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
||||||
|
|
||||||
def test_rbf_sympykernel(self):
|
def test_rbf_sympykernel(self):
|
||||||
kern = GPy.kern.rbf_sympy(5)
|
if SYMPY_AVAILABLE:
|
||||||
|
kern = GPy.kern.rbf_sympy(5)
|
||||||
|
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
||||||
|
|
||||||
|
def test_eq_sympykernel(self):
|
||||||
|
kern = GPy.kern.eq_sympy(5, 3, output_ind=4)
|
||||||
|
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
||||||
|
|
||||||
|
def test_sinckernel(self):
|
||||||
|
kern = GPy.kern.sinc(5)
|
||||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
||||||
|
|
||||||
def test_rbf_invkernel(self):
|
def test_rbf_invkernel(self):
|
||||||
|
|
|
||||||
577
GPy/testing/likelihoods_tests.py
Normal file
577
GPy/testing/likelihoods_tests.py
Normal file
|
|
@ -0,0 +1,577 @@
|
||||||
|
import numpy as np
|
||||||
|
import unittest
|
||||||
|
import GPy
|
||||||
|
from GPy.models import GradientChecker
|
||||||
|
import functools
|
||||||
|
import inspect
|
||||||
|
from GPy.likelihoods.noise_models import gp_transformations
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
|
def dparam_partial(inst_func, *args):
|
||||||
|
"""
|
||||||
|
If we have a instance method that needs to be called but that doesn't
|
||||||
|
take the parameter we wish to change to checkgrad, then this function
|
||||||
|
will change the variable using set params.
|
||||||
|
|
||||||
|
inst_func: should be a instance function of an object that we would like
|
||||||
|
to change
|
||||||
|
param: the param that will be given to set_params
|
||||||
|
args: anything else that needs to be given to the function (for example
|
||||||
|
the f or Y that are being used in the function whilst we tweak the
|
||||||
|
param
|
||||||
|
"""
|
||||||
|
def param_func(param, inst_func, args):
|
||||||
|
inst_func.im_self._set_params(param)
|
||||||
|
return inst_func(*args)
|
||||||
|
return functools.partial(param_func, inst_func=inst_func, args=args)
|
||||||
|
|
||||||
|
def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=False, verbose=False):
|
||||||
|
"""
|
||||||
|
checkgrad expects a f: R^N -> R^1 and df: R^N -> R^N
|
||||||
|
However if we are holding other parameters fixed and moving something else
|
||||||
|
We need to check the gradient of each of the fixed parameters
|
||||||
|
(f and y for example) seperately, whilst moving another parameter.
|
||||||
|
Otherwise f: gives back R^N and
|
||||||
|
df: gives back R^NxM where M is
|
||||||
|
The number of parameters and N is the number of data
|
||||||
|
Need to take a slice out from f and a slice out of df
|
||||||
|
"""
|
||||||
|
#print "\n{} likelihood: {} vs {}".format(func.im_self.__class__.__name__,
|
||||||
|
#func.__name__, dfunc.__name__)
|
||||||
|
partial_f = dparam_partial(func, *args)
|
||||||
|
partial_df = dparam_partial(dfunc, *args)
|
||||||
|
gradchecking = True
|
||||||
|
for param in params:
|
||||||
|
fnum = np.atleast_1d(partial_f(param)).shape[0]
|
||||||
|
dfnum = np.atleast_1d(partial_df(param)).shape[0]
|
||||||
|
for fixed_val in range(dfnum):
|
||||||
|
#dlik and dlik_dvar gives back 1 value for each
|
||||||
|
f_ind = min(fnum, fixed_val+1) - 1
|
||||||
|
print "fnum: {} dfnum: {} f_ind: {} fixed_val: {}".format(fnum, dfnum, f_ind, fixed_val)
|
||||||
|
#Make grad checker with this param moving, note that set_params is NOT being called
|
||||||
|
#The parameter is being set directly with __setattr__
|
||||||
|
grad = GradientChecker(lambda x: np.atleast_1d(partial_f(x))[f_ind],
|
||||||
|
lambda x : np.atleast_1d(partial_df(x))[fixed_val],
|
||||||
|
param, 'p')
|
||||||
|
#This is not general for more than one param...
|
||||||
|
if constraints is not None:
|
||||||
|
for constraint in constraints:
|
||||||
|
constraint('p', grad)
|
||||||
|
if randomize:
|
||||||
|
grad.randomize()
|
||||||
|
if verbose:
|
||||||
|
print grad
|
||||||
|
grad.checkgrad(verbose=1)
|
||||||
|
if not grad.checkgrad():
|
||||||
|
gradchecking = False
|
||||||
|
|
||||||
|
return gradchecking
|
||||||
|
|
||||||
|
|
||||||
|
from nose.tools import with_setup
|
||||||
|
class TestNoiseModels(object):
|
||||||
|
"""
|
||||||
|
Generic model checker
|
||||||
|
"""
|
||||||
|
def setUp(self):
|
||||||
|
self.N = 5
|
||||||
|
self.D = 3
|
||||||
|
self.X = np.random.rand(self.N, self.D)*10
|
||||||
|
|
||||||
|
self.real_std = 0.1
|
||||||
|
noise = np.random.randn(*self.X[:, 0].shape)*self.real_std
|
||||||
|
self.Y = (np.sin(self.X[:, 0]*2*np.pi) + noise)[:, None]
|
||||||
|
self.f = np.random.rand(self.N, 1)
|
||||||
|
self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=np.int)[:, None]
|
||||||
|
self.positive_Y = np.exp(self.Y.copy())
|
||||||
|
tmp = np.round(self.X[:, 0]*3-3)[:, None] + np.random.randint(0,3, self.X.shape[0])[:, None]
|
||||||
|
self.integer_Y = np.where(tmp > 0, tmp, 0)
|
||||||
|
|
||||||
|
self.var = 0.2
|
||||||
|
|
||||||
|
self.var = np.random.rand(1)
|
||||||
|
|
||||||
|
#Make a bigger step as lower bound can be quite curved
|
||||||
|
self.step = 1e-3
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.Y = None
|
||||||
|
self.f = None
|
||||||
|
self.X = None
|
||||||
|
|
||||||
|
def test_noise_models(self):
|
||||||
|
self.setUp()
|
||||||
|
|
||||||
|
####################################################
|
||||||
|
# Constraint wrappers so we can just list them off #
|
||||||
|
####################################################
|
||||||
|
def constrain_negative(regex, model):
|
||||||
|
model.constrain_negative(regex)
|
||||||
|
|
||||||
|
def constrain_positive(regex, model):
|
||||||
|
model.constrain_positive(regex)
|
||||||
|
|
||||||
|
def constrain_bounded(regex, model, lower, upper):
|
||||||
|
"""
|
||||||
|
Used like: partial(constrain_bounded, lower=0, upper=1)
|
||||||
|
"""
|
||||||
|
model.constrain_bounded(regex, lower, upper)
|
||||||
|
|
||||||
|
"""
|
||||||
|
Dictionary where we nest models we would like to check
|
||||||
|
Name: {
|
||||||
|
"model": model_instance,
|
||||||
|
"grad_params": {
|
||||||
|
"names": [names_of_params_we_want, to_grad_check],
|
||||||
|
"vals": [values_of_params, to_start_at],
|
||||||
|
"constrain": [constraint_wrappers, listed_here]
|
||||||
|
},
|
||||||
|
"laplace": boolean_of_whether_model_should_work_for_laplace,
|
||||||
|
"ep": boolean_of_whether_model_should_work_for_laplace,
|
||||||
|
"link_f_constraints": [constraint_wrappers, listed_here]
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
noise_models = {"Student_t_default": {
|
||||||
|
"model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var),
|
||||||
|
"grad_params": {
|
||||||
|
"names": ["t_noise"],
|
||||||
|
"vals": [self.var],
|
||||||
|
"constraints": [constrain_positive]
|
||||||
|
},
|
||||||
|
"laplace": True
|
||||||
|
},
|
||||||
|
"Student_t_1_var": {
|
||||||
|
"model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var),
|
||||||
|
"grad_params": {
|
||||||
|
"names": ["t_noise"],
|
||||||
|
"vals": [1],
|
||||||
|
"constraints": [constrain_positive]
|
||||||
|
},
|
||||||
|
"laplace": True
|
||||||
|
},
|
||||||
|
"Student_t_small_var": {
|
||||||
|
"model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var),
|
||||||
|
"grad_params": {
|
||||||
|
"names": ["t_noise"],
|
||||||
|
"vals": [0.01],
|
||||||
|
"constraints": [constrain_positive]
|
||||||
|
},
|
||||||
|
"laplace": True
|
||||||
|
},
|
||||||
|
"Student_t_approx_gauss": {
|
||||||
|
"model": GPy.likelihoods.student_t(deg_free=1000, sigma2=self.var),
|
||||||
|
"grad_params": {
|
||||||
|
"names": ["t_noise"],
|
||||||
|
"vals": [self.var],
|
||||||
|
"constraints": [constrain_positive]
|
||||||
|
},
|
||||||
|
"laplace": True
|
||||||
|
},
|
||||||
|
"Student_t_log": {
|
||||||
|
"model": GPy.likelihoods.student_t(gp_link=gp_transformations.Log(), deg_free=5, sigma2=self.var),
|
||||||
|
"grad_params": {
|
||||||
|
"names": ["t_noise"],
|
||||||
|
"vals": [self.var],
|
||||||
|
"constraints": [constrain_positive]
|
||||||
|
},
|
||||||
|
"laplace": True
|
||||||
|
},
|
||||||
|
"Gaussian_default": {
|
||||||
|
"model": GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N),
|
||||||
|
"grad_params": {
|
||||||
|
"names": ["noise_model_variance"],
|
||||||
|
"vals": [self.var],
|
||||||
|
"constraints": [constrain_positive]
|
||||||
|
},
|
||||||
|
"laplace": True,
|
||||||
|
"ep": True
|
||||||
|
},
|
||||||
|
"Gaussian_log": {
|
||||||
|
"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log(), variance=self.var, D=self.D, N=self.N),
|
||||||
|
"grad_params": {
|
||||||
|
"names": ["noise_model_variance"],
|
||||||
|
"vals": [self.var],
|
||||||
|
"constraints": [constrain_positive]
|
||||||
|
},
|
||||||
|
"laplace": True
|
||||||
|
},
|
||||||
|
"Gaussian_probit": {
|
||||||
|
"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Probit(), variance=self.var, D=self.D, N=self.N),
|
||||||
|
"grad_params": {
|
||||||
|
"names": ["noise_model_variance"],
|
||||||
|
"vals": [self.var],
|
||||||
|
"constraints": [constrain_positive]
|
||||||
|
},
|
||||||
|
"laplace": True
|
||||||
|
},
|
||||||
|
"Gaussian_log_ex": {
|
||||||
|
"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log_ex_1(), variance=self.var, D=self.D, N=self.N),
|
||||||
|
"grad_params": {
|
||||||
|
"names": ["noise_model_variance"],
|
||||||
|
"vals": [self.var],
|
||||||
|
"constraints": [constrain_positive]
|
||||||
|
},
|
||||||
|
"laplace": True
|
||||||
|
},
|
||||||
|
"Bernoulli_default": {
|
||||||
|
"model": GPy.likelihoods.bernoulli(),
|
||||||
|
"link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)],
|
||||||
|
"laplace": True,
|
||||||
|
"Y": self.binary_Y,
|
||||||
|
"ep": True
|
||||||
|
},
|
||||||
|
"Exponential_default": {
|
||||||
|
"model": GPy.likelihoods.exponential(),
|
||||||
|
"link_f_constraints": [constrain_positive],
|
||||||
|
"Y": self.positive_Y,
|
||||||
|
"laplace": True,
|
||||||
|
},
|
||||||
|
"Poisson_default": {
|
||||||
|
"model": GPy.likelihoods.poisson(),
|
||||||
|
"link_f_constraints": [constrain_positive],
|
||||||
|
"Y": self.integer_Y,
|
||||||
|
"laplace": True,
|
||||||
|
"ep": False #Should work though...
|
||||||
|
},
|
||||||
|
"Gamma_default": {
|
||||||
|
"model": GPy.likelihoods.gamma(),
|
||||||
|
"link_f_constraints": [constrain_positive],
|
||||||
|
"Y": self.positive_Y,
|
||||||
|
"laplace": True
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for name, attributes in noise_models.iteritems():
|
||||||
|
model = attributes["model"]
|
||||||
|
if "grad_params" in attributes:
|
||||||
|
params = attributes["grad_params"]
|
||||||
|
param_vals = params["vals"]
|
||||||
|
param_names= params["names"]
|
||||||
|
param_constraints = params["constraints"]
|
||||||
|
else:
|
||||||
|
params = []
|
||||||
|
param_vals = []
|
||||||
|
param_names = []
|
||||||
|
constrain_positive = []
|
||||||
|
if "link_f_constraints" in attributes:
|
||||||
|
link_f_constraints = attributes["link_f_constraints"]
|
||||||
|
else:
|
||||||
|
link_f_constraints = []
|
||||||
|
if "Y" in attributes:
|
||||||
|
Y = attributes["Y"].copy()
|
||||||
|
else:
|
||||||
|
Y = self.Y.copy()
|
||||||
|
if "f" in attributes:
|
||||||
|
f = attributes["f"].copy()
|
||||||
|
else:
|
||||||
|
f = self.f.copy()
|
||||||
|
if "laplace" in attributes:
|
||||||
|
laplace = attributes["laplace"]
|
||||||
|
else:
|
||||||
|
laplace = False
|
||||||
|
if "ep" in attributes:
|
||||||
|
ep = attributes["ep"]
|
||||||
|
else:
|
||||||
|
ep = False
|
||||||
|
|
||||||
|
if len(param_vals) > 1:
|
||||||
|
raise NotImplementedError("Cannot support multiple params in likelihood yet!")
|
||||||
|
|
||||||
|
#Required by all
|
||||||
|
#Normal derivatives
|
||||||
|
yield self.t_logpdf, model, Y, f
|
||||||
|
yield self.t_dlogpdf_df, model, Y, f
|
||||||
|
yield self.t_d2logpdf_df2, model, Y, f
|
||||||
|
#Link derivatives
|
||||||
|
yield self.t_dlogpdf_dlink, model, Y, f, link_f_constraints
|
||||||
|
yield self.t_d2logpdf_dlink2, model, Y, f, link_f_constraints
|
||||||
|
if laplace:
|
||||||
|
#Laplace only derivatives
|
||||||
|
yield self.t_d3logpdf_df3, model, Y, f
|
||||||
|
yield self.t_d3logpdf_dlink3, model, Y, f, link_f_constraints
|
||||||
|
#Params
|
||||||
|
yield self.t_dlogpdf_dparams, model, Y, f, param_vals, param_constraints
|
||||||
|
yield self.t_dlogpdf_df_dparams, model, Y, f, param_vals, param_constraints
|
||||||
|
yield self.t_d2logpdf2_df2_dparams, model, Y, f, param_vals, param_constraints
|
||||||
|
#Link params
|
||||||
|
yield self.t_dlogpdf_link_dparams, model, Y, f, param_vals, param_constraints
|
||||||
|
yield self.t_dlogpdf_dlink_dparams, model, Y, f, param_vals, param_constraints
|
||||||
|
yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, param_vals, param_constraints
|
||||||
|
|
||||||
|
#laplace likelihood gradcheck
|
||||||
|
yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
|
||||||
|
if ep:
|
||||||
|
#ep likelihood gradcheck
|
||||||
|
yield self.t_ep_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
|
||||||
|
|
||||||
|
|
||||||
|
self.tearDown()
|
||||||
|
|
||||||
|
#############
|
||||||
|
# dpdf_df's #
|
||||||
|
#############
|
||||||
|
@with_setup(setUp, tearDown)
|
||||||
|
def t_logpdf(self, model, Y, f):
|
||||||
|
print "\n{}".format(inspect.stack()[0][3])
|
||||||
|
print model
|
||||||
|
np.testing.assert_almost_equal(
|
||||||
|
np.log(model.pdf(f.copy(), Y.copy())),
|
||||||
|
model.logpdf(f.copy(), Y.copy()))
|
||||||
|
|
||||||
|
@with_setup(setUp, tearDown)
|
||||||
|
def t_dlogpdf_df(self, model, Y, f):
|
||||||
|
print "\n{}".format(inspect.stack()[0][3])
|
||||||
|
self.description = "\n{}".format(inspect.stack()[0][3])
|
||||||
|
logpdf = functools.partial(model.logpdf, y=Y)
|
||||||
|
dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y)
|
||||||
|
grad = GradientChecker(logpdf, dlogpdf_df, f.copy(), 'g')
|
||||||
|
grad.randomize()
|
||||||
|
grad.checkgrad(verbose=1)
|
||||||
|
print model
|
||||||
|
assert grad.checkgrad()
|
||||||
|
|
||||||
|
@with_setup(setUp, tearDown)
|
||||||
|
def t_d2logpdf_df2(self, model, Y, f):
|
||||||
|
print "\n{}".format(inspect.stack()[0][3])
|
||||||
|
dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y)
|
||||||
|
d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y)
|
||||||
|
grad = GradientChecker(dlogpdf_df, d2logpdf_df2, f.copy(), 'g')
|
||||||
|
grad.randomize()
|
||||||
|
grad.checkgrad(verbose=1)
|
||||||
|
print model
|
||||||
|
assert grad.checkgrad()
|
||||||
|
|
||||||
|
@with_setup(setUp, tearDown)
|
||||||
|
def t_d3logpdf_df3(self, model, Y, f):
|
||||||
|
print "\n{}".format(inspect.stack()[0][3])
|
||||||
|
d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y)
|
||||||
|
d3logpdf_df3 = functools.partial(model.d3logpdf_df3, y=Y)
|
||||||
|
grad = GradientChecker(d2logpdf_df2, d3logpdf_df3, f.copy(), 'g')
|
||||||
|
grad.randomize()
|
||||||
|
grad.checkgrad(verbose=1)
|
||||||
|
print model
|
||||||
|
assert grad.checkgrad()
|
||||||
|
|
||||||
|
##############
|
||||||
|
# df_dparams #
|
||||||
|
##############
|
||||||
|
@with_setup(setUp, tearDown)
|
||||||
|
def t_dlogpdf_dparams(self, model, Y, f, params, param_constraints):
|
||||||
|
print "\n{}".format(inspect.stack()[0][3])
|
||||||
|
print model
|
||||||
|
assert (
|
||||||
|
dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta,
|
||||||
|
params, args=(f, Y), constraints=param_constraints,
|
||||||
|
randomize=False, verbose=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
@with_setup(setUp, tearDown)
|
||||||
|
def t_dlogpdf_df_dparams(self, model, Y, f, params, param_constraints):
|
||||||
|
print "\n{}".format(inspect.stack()[0][3])
|
||||||
|
print model
|
||||||
|
assert (
|
||||||
|
dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta,
|
||||||
|
params, args=(f, Y), constraints=param_constraints,
|
||||||
|
randomize=False, verbose=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
@with_setup(setUp, tearDown)
|
||||||
|
def t_d2logpdf2_df2_dparams(self, model, Y, f, params, param_constraints):
|
||||||
|
print "\n{}".format(inspect.stack()[0][3])
|
||||||
|
print model
|
||||||
|
assert (
|
||||||
|
dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta,
|
||||||
|
params, args=(f, Y), constraints=param_constraints,
|
||||||
|
randomize=False, verbose=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
################
|
||||||
|
# dpdf_dlink's #
|
||||||
|
################
|
||||||
|
@with_setup(setUp, tearDown)
|
||||||
|
def t_dlogpdf_dlink(self, model, Y, f, link_f_constraints):
|
||||||
|
print "\n{}".format(inspect.stack()[0][3])
|
||||||
|
logpdf = functools.partial(model.logpdf_link, y=Y)
|
||||||
|
dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y)
|
||||||
|
grad = GradientChecker(logpdf, dlogpdf_dlink, f.copy(), 'g')
|
||||||
|
|
||||||
|
#Apply constraints to link_f values
|
||||||
|
for constraint in link_f_constraints:
|
||||||
|
constraint('g', grad)
|
||||||
|
|
||||||
|
grad.randomize()
|
||||||
|
print grad
|
||||||
|
grad.checkgrad(verbose=1)
|
||||||
|
assert grad.checkgrad()
|
||||||
|
|
||||||
|
@with_setup(setUp, tearDown)
|
||||||
|
def t_d2logpdf_dlink2(self, model, Y, f, link_f_constraints):
|
||||||
|
print "\n{}".format(inspect.stack()[0][3])
|
||||||
|
dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y)
|
||||||
|
d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y)
|
||||||
|
grad = GradientChecker(dlogpdf_dlink, d2logpdf_dlink2, f.copy(), 'g')
|
||||||
|
|
||||||
|
#Apply constraints to link_f values
|
||||||
|
for constraint in link_f_constraints:
|
||||||
|
constraint('g', grad)
|
||||||
|
|
||||||
|
grad.randomize()
|
||||||
|
grad.checkgrad(verbose=1)
|
||||||
|
print grad
|
||||||
|
assert grad.checkgrad()
|
||||||
|
|
||||||
|
@with_setup(setUp, tearDown)
|
||||||
|
def t_d3logpdf_dlink3(self, model, Y, f, link_f_constraints):
|
||||||
|
print "\n{}".format(inspect.stack()[0][3])
|
||||||
|
d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y)
|
||||||
|
d3logpdf_dlink3 = functools.partial(model.d3logpdf_dlink3, y=Y)
|
||||||
|
grad = GradientChecker(d2logpdf_dlink2, d3logpdf_dlink3, f.copy(), 'g')
|
||||||
|
|
||||||
|
#Apply constraints to link_f values
|
||||||
|
for constraint in link_f_constraints:
|
||||||
|
constraint('g', grad)
|
||||||
|
|
||||||
|
grad.randomize()
|
||||||
|
grad.checkgrad(verbose=1)
|
||||||
|
print grad
|
||||||
|
assert grad.checkgrad()
|
||||||
|
|
||||||
|
#################
|
||||||
|
# dlink_dparams #
|
||||||
|
#################
|
||||||
|
@with_setup(setUp, tearDown)
|
||||||
|
def t_dlogpdf_link_dparams(self, model, Y, f, params, param_constraints):
|
||||||
|
print "\n{}".format(inspect.stack()[0][3])
|
||||||
|
print model
|
||||||
|
assert (
|
||||||
|
dparam_checkgrad(model.logpdf_link, model.dlogpdf_link_dtheta,
|
||||||
|
params, args=(f, Y), constraints=param_constraints,
|
||||||
|
randomize=False, verbose=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
@with_setup(setUp, tearDown)
|
||||||
|
def t_dlogpdf_dlink_dparams(self, model, Y, f, params, param_constraints):
|
||||||
|
print "\n{}".format(inspect.stack()[0][3])
|
||||||
|
print model
|
||||||
|
assert (
|
||||||
|
dparam_checkgrad(model.dlogpdf_dlink, model.dlogpdf_dlink_dtheta,
|
||||||
|
params, args=(f, Y), constraints=param_constraints,
|
||||||
|
randomize=False, verbose=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
@with_setup(setUp, tearDown)
|
||||||
|
def t_d2logpdf2_dlink2_dparams(self, model, Y, f, params, param_constraints):
|
||||||
|
print "\n{}".format(inspect.stack()[0][3])
|
||||||
|
print model
|
||||||
|
assert (
|
||||||
|
dparam_checkgrad(model.d2logpdf_dlink2, model.d2logpdf_dlink2_dtheta,
|
||||||
|
params, args=(f, Y), constraints=param_constraints,
|
||||||
|
randomize=False, verbose=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
################
|
||||||
|
# laplace test #
|
||||||
|
################
|
||||||
|
@with_setup(setUp, tearDown)
|
||||||
|
def t_laplace_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints):
|
||||||
|
print "\n{}".format(inspect.stack()[0][3])
|
||||||
|
#Normalize
|
||||||
|
Y = Y/Y.max()
|
||||||
|
white_var = 0.001
|
||||||
|
kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||||
|
laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), model)
|
||||||
|
m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=laplace_likelihood)
|
||||||
|
m.ensure_default_constraints()
|
||||||
|
m.constrain_fixed('white', white_var)
|
||||||
|
|
||||||
|
for param_num in range(len(param_names)):
|
||||||
|
name = param_names[param_num]
|
||||||
|
m[name] = param_vals[param_num]
|
||||||
|
constraints[param_num](name, m)
|
||||||
|
|
||||||
|
m.randomize()
|
||||||
|
m.checkgrad(verbose=1, step=step)
|
||||||
|
print m
|
||||||
|
assert m.checkgrad(step=step)
|
||||||
|
|
||||||
|
###########
|
||||||
|
# EP test #
|
||||||
|
###########
|
||||||
|
@with_setup(setUp, tearDown)
|
||||||
|
def t_ep_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints):
|
||||||
|
print "\n{}".format(inspect.stack()[0][3])
|
||||||
|
#Normalize
|
||||||
|
Y = Y/Y.max()
|
||||||
|
white_var = 0.001
|
||||||
|
kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||||
|
ep_likelihood = GPy.likelihoods.EP(Y.copy(), model)
|
||||||
|
m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=ep_likelihood)
|
||||||
|
m.ensure_default_constraints()
|
||||||
|
m.constrain_fixed('white', white_var)
|
||||||
|
|
||||||
|
for param_num in range(len(param_names)):
|
||||||
|
name = param_names[param_num]
|
||||||
|
m[name] = param_vals[param_num]
|
||||||
|
constraints[param_num](name, m)
|
||||||
|
|
||||||
|
m.randomize()
|
||||||
|
m.checkgrad(verbose=1, step=step)
|
||||||
|
print m
|
||||||
|
assert m.checkgrad(step=step)
|
||||||
|
|
||||||
|
|
||||||
|
class LaplaceTests(unittest.TestCase):
|
||||||
|
"""
|
||||||
|
Specific likelihood tests, not general enough for the above tests
|
||||||
|
"""
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.N = 5
|
||||||
|
self.D = 3
|
||||||
|
self.X = np.random.rand(self.N, self.D)*10
|
||||||
|
|
||||||
|
self.real_std = 0.1
|
||||||
|
noise = np.random.randn(*self.X[:, 0].shape)*self.real_std
|
||||||
|
self.Y = (np.sin(self.X[:, 0]*2*np.pi) + noise)[:, None]
|
||||||
|
self.f = np.random.rand(self.N, 1)
|
||||||
|
|
||||||
|
self.var = 0.2
|
||||||
|
|
||||||
|
self.var = np.random.rand(1)
|
||||||
|
self.stu_t = GPy.likelihoods.student_t(deg_free=5, sigma2=self.var)
|
||||||
|
self.gauss = GPy.likelihoods.gaussian(gp_transformations.Log(), variance=self.var, D=self.D, N=self.N)
|
||||||
|
|
||||||
|
#Make a bigger step as lower bound can be quite curved
|
||||||
|
self.step = 1e-6
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.stu_t = None
|
||||||
|
self.gauss = None
|
||||||
|
self.Y = None
|
||||||
|
self.f = None
|
||||||
|
self.X = None
|
||||||
|
|
||||||
|
def test_gaussian_d2logpdf_df2_2(self):
|
||||||
|
print "\n{}".format(inspect.stack()[0][3])
|
||||||
|
self.Y = None
|
||||||
|
self.gauss = None
|
||||||
|
|
||||||
|
self.N = 2
|
||||||
|
self.D = 1
|
||||||
|
self.X = np.linspace(0, self.D, self.N)[:, None]
|
||||||
|
self.real_std = 0.2
|
||||||
|
noise = np.random.randn(*self.X.shape)*self.real_std
|
||||||
|
self.Y = np.sin(self.X*2*np.pi) + noise
|
||||||
|
self.f = np.random.rand(self.N, 1)
|
||||||
|
self.gauss = GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N)
|
||||||
|
|
||||||
|
dlogpdf_df = functools.partial(self.gauss.dlogpdf_df, y=self.Y)
|
||||||
|
d2logpdf_df2 = functools.partial(self.gauss.d2logpdf_df2, y=self.Y)
|
||||||
|
grad = GradientChecker(dlogpdf_df, d2logpdf_df2, self.f.copy(), 'g')
|
||||||
|
grad.randomize()
|
||||||
|
grad.checkgrad(verbose=1)
|
||||||
|
self.assertTrue(grad.checkgrad())
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print "Running unit tests"
|
||||||
|
unittest.main()
|
||||||
|
|
@ -28,8 +28,8 @@ def ard(p):
|
||||||
class Test(unittest.TestCase):
|
class Test(unittest.TestCase):
|
||||||
input_dim = 9
|
input_dim = 9
|
||||||
num_inducing = 4
|
num_inducing = 4
|
||||||
N = 3
|
N = 30
|
||||||
Nsamples = 5e6
|
Nsamples = 9e6
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
i_s_dim_list = [2,4,3]
|
i_s_dim_list = [2,4,3]
|
||||||
|
|
@ -45,20 +45,26 @@ class Test(unittest.TestCase):
|
||||||
input_slices = input_slices
|
input_slices = input_slices
|
||||||
)
|
)
|
||||||
self.kerns = (
|
self.kerns = (
|
||||||
input_slice_kern,
|
# input_slice_kern,
|
||||||
# (GPy.kern.rbf(self.input_dim, ARD=True) +
|
# (GPy.kern.rbf(self.input_dim, ARD=True) +
|
||||||
# GPy.kern.linear(self.input_dim, ARD=True) +
|
# GPy.kern.linear(self.input_dim, ARD=True) +
|
||||||
# GPy.kern.bias(self.input_dim) +
|
# GPy.kern.bias(self.input_dim) +
|
||||||
# GPy.kern.white(self.input_dim)),
|
# GPy.kern.white(self.input_dim)),
|
||||||
# (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
|
# (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
|
||||||
# GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
|
# GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
|
||||||
# GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) +
|
# GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) +
|
||||||
# GPy.kern.bias(self.input_dim) +
|
# GPy.kern.bias(self.input_dim) +
|
||||||
# GPy.kern.white(self.input_dim)),
|
# GPy.kern.white(self.input_dim)),
|
||||||
# GPy.kern.rbf(self.input_dim), GPy.kern.rbf(self.input_dim, ARD=True),
|
(GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) +
|
||||||
|
GPy.kern.bias(self.input_dim, np.random.rand()) +
|
||||||
|
GPy.kern.white(self.input_dim, np.random.rand())),
|
||||||
|
(GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
|
||||||
|
GPy.kern.bias(self.input_dim, np.random.rand()) +
|
||||||
|
GPy.kern.white(self.input_dim, np.random.rand())),
|
||||||
|
# GPy.kern.rbf(self.input_dim), GPy.kern.rbf(self.input_dim, ARD=True),
|
||||||
# GPy.kern.linear(self.input_dim, ARD=False), GPy.kern.linear(self.input_dim, ARD=True),
|
# GPy.kern.linear(self.input_dim, ARD=False), GPy.kern.linear(self.input_dim, ARD=True),
|
||||||
# GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim),
|
# GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim),
|
||||||
# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim),
|
# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim),
|
||||||
# GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim),
|
# GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim),
|
||||||
# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim),
|
# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim),
|
||||||
# GPy.kern.bias(self.input_dim), GPy.kern.white(self.input_dim),
|
# GPy.kern.bias(self.input_dim), GPy.kern.white(self.input_dim),
|
||||||
|
|
@ -79,7 +85,7 @@ class Test(unittest.TestCase):
|
||||||
|
|
||||||
def test_psi1(self):
|
def test_psi1(self):
|
||||||
for kern in self.kerns:
|
for kern in self.kerns:
|
||||||
Nsamples = np.floor(self.Nsamples/300.)
|
Nsamples = np.floor(self.Nsamples/self.N)
|
||||||
psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance)
|
psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance)
|
||||||
K_ = np.zeros((Nsamples, self.num_inducing))
|
K_ = np.zeros((Nsamples, self.num_inducing))
|
||||||
diffs = []
|
diffs = []
|
||||||
|
|
@ -105,7 +111,7 @@ class Test(unittest.TestCase):
|
||||||
|
|
||||||
def test_psi2(self):
|
def test_psi2(self):
|
||||||
for kern in self.kerns:
|
for kern in self.kerns:
|
||||||
Nsamples = self.Nsamples/300.
|
Nsamples = int(np.floor(self.Nsamples/self.N))
|
||||||
psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
|
psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
|
||||||
K_ = np.zeros((self.num_inducing, self.num_inducing))
|
K_ = np.zeros((self.num_inducing, self.num_inducing))
|
||||||
diffs = []
|
diffs = []
|
||||||
|
|
@ -119,10 +125,10 @@ class Test(unittest.TestCase):
|
||||||
try:
|
try:
|
||||||
import pylab
|
import pylab
|
||||||
pylab.figure(msg)
|
pylab.figure(msg)
|
||||||
pylab.plot(diffs)
|
pylab.plot(diffs, marker='x', mew=1.3)
|
||||||
# print msg, np.allclose(psi2.squeeze(), K_, rtol=1e-1, atol=.1)
|
# print msg, np.allclose(psi2.squeeze(), K_, rtol=1e-1, atol=.1)
|
||||||
self.assertTrue(np.allclose(psi2.squeeze(), K_,
|
self.assertTrue(np.allclose(psi2.squeeze(), K_),
|
||||||
rtol=1e-1, atol=.1),
|
#rtol=1e-1, atol=.1),
|
||||||
msg=msg + ": not matching")
|
msg=msg + ": not matching")
|
||||||
# sys.stdout.write(".")
|
# sys.stdout.write(".")
|
||||||
except:
|
except:
|
||||||
|
|
@ -135,7 +141,7 @@ class Test(unittest.TestCase):
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
sys.argv = ['',
|
sys.argv = ['',
|
||||||
#'Test.test_psi0',
|
#'Test.test_psi0',
|
||||||
'Test.test_psi1',
|
#'Test.test_psi1',
|
||||||
'Test.test_psi2',
|
'Test.test_psi2',
|
||||||
]
|
]
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
||||||
|
|
@ -209,7 +209,7 @@ class GradientTests(unittest.TestCase):
|
||||||
Z = np.linspace(0, 15, 4)[:, None]
|
Z = np.linspace(0, 15, 4)[:, None]
|
||||||
kernel = GPy.kern.rbf(1)
|
kernel = GPy.kern.rbf(1)
|
||||||
m = GPy.models.SparseGPClassification(X,Y,kernel=kernel,Z=Z)
|
m = GPy.models.SparseGPClassification(X,Y,kernel=kernel,Z=Z)
|
||||||
#distribution = GPy.likelihoods.likelihood_functions.Binomial()
|
#distribution = GPy.likelihoods.likelihood_functions.Bernoulli()
|
||||||
#likelihood = GPy.likelihoods.EP(Y, distribution)
|
#likelihood = GPy.likelihoods.EP(Y, distribution)
|
||||||
#m = GPy.core.SparseGP(X, likelihood, kernel, Z)
|
#m = GPy.core.SparseGP(X, likelihood, kernel, Z)
|
||||||
#m.ensure_default_constraints()
|
#m.ensure_default_constraints()
|
||||||
|
|
|
||||||
|
|
@ -14,3 +14,5 @@ import visualize
|
||||||
import decorators
|
import decorators
|
||||||
import classification
|
import classification
|
||||||
import latent_space_visualizations
|
import latent_space_visualizations
|
||||||
|
|
||||||
|
import netpbmfile
|
||||||
|
|
|
||||||
17
GPy/util/config.py
Normal file
17
GPy/util/config.py
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
#
|
||||||
|
# This loads the configuration
|
||||||
|
#
|
||||||
|
import ConfigParser
|
||||||
|
import os
|
||||||
|
config = ConfigParser.ConfigParser()
|
||||||
|
|
||||||
|
user_file = os.path.join(os.getenv('HOME'),'.gpy_config.cfg')
|
||||||
|
default_file = os.path.join('..','gpy_config.cfg')
|
||||||
|
|
||||||
|
# 1. check if the user has a ~/.gpy_config.cfg
|
||||||
|
if os.path.isfile(user_file):
|
||||||
|
config.read(user_file)
|
||||||
|
else:
|
||||||
|
# 2. if not, use the default one
|
||||||
|
path = os.path.dirname(__file__)
|
||||||
|
config.read(os.path.join(path,default_file))
|
||||||
|
|
@ -8,17 +8,12 @@ import zipfile
|
||||||
import tarfile
|
import tarfile
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
ipython_notebook = False
|
ipython_available=True
|
||||||
if ipython_notebook:
|
try:
|
||||||
import IPython.core.display
|
import IPython
|
||||||
def ipynb_input(varname, prompt=''):
|
except ImportError:
|
||||||
"""Prompt user for input and assign string val to given variable name."""
|
ipython_available=False
|
||||||
js_code = ("""
|
|
||||||
var value = prompt("{prompt}","");
|
|
||||||
var py_code = "{varname} = '" + value + "'";
|
|
||||||
IPython.notebook.kernel.execute(py_code);
|
|
||||||
""").format(prompt=prompt, varname=varname)
|
|
||||||
return IPython.core.display.Javascript(js_code)
|
|
||||||
|
|
||||||
import sys, urllib
|
import sys, urllib
|
||||||
|
|
||||||
|
|
@ -34,8 +29,11 @@ data_path = os.path.join(os.path.dirname(__file__), 'datasets')
|
||||||
default_seed = 10000
|
default_seed = 10000
|
||||||
overide_manual_authorize=False
|
overide_manual_authorize=False
|
||||||
neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/'
|
neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/'
|
||||||
|
sam_url = 'http://www.cs.nyu.edu/~roweis/data/'
|
||||||
cmu_url = 'http://mocap.cs.cmu.edu/subjects/'
|
cmu_url = 'http://mocap.cs.cmu.edu/subjects/'
|
||||||
# Note: there may be a better way of storing data resources. One of the pythonistas will need to take a look.
|
|
||||||
|
# Note: there may be a better way of storing data resources, for the
|
||||||
|
# moment we are storing them in a dictionary.
|
||||||
data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'],
|
data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'],
|
||||||
'files' : [['ankurDataPoseSilhouette.mat']],
|
'files' : [['ankurDataPoseSilhouette.mat']],
|
||||||
'license' : None,
|
'license' : None,
|
||||||
|
|
@ -49,7 +47,7 @@ data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'],
|
||||||
'license' : None,
|
'license' : None,
|
||||||
'size' : 51276
|
'size' : 51276
|
||||||
},
|
},
|
||||||
'brendan_faces' : {'urls' : ['http://www.cs.nyu.edu/~roweis/data/'],
|
'brendan_faces' : {'urls' : [sam_url],
|
||||||
'files': [['frey_rawface.mat']],
|
'files': [['frey_rawface.mat']],
|
||||||
'citation' : 'Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.',
|
'citation' : 'Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.',
|
||||||
'details' : """A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.""",
|
'details' : """A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.""",
|
||||||
|
|
@ -93,6 +91,12 @@ The database was created with funding from NSF EIA-0196217.""",
|
||||||
'details' : """Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.""",
|
'details' : """Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.""",
|
||||||
'license' : None,
|
'license' : None,
|
||||||
'size' : 21949154},
|
'size' : 21949154},
|
||||||
|
'olivetti_faces' : {'urls' : [neil_url + 'olivetti_faces/', sam_url],
|
||||||
|
'files' : [['att_faces.zip'], ['olivettifaces.mat']],
|
||||||
|
'citation' : 'Ferdinando Samaria and Andy Harter, Parameterisation of a Stochastic Model for Human Face Identification. Proceedings of 2nd IEEE Workshop on Applications of Computer Vision, Sarasota FL, December 1994',
|
||||||
|
'details' : """Olivetti Research Labs Face data base, acquired between December 1992 and December 1994 in the Olivetti Research Lab, Cambridge (which later became AT&T Laboratories, Cambridge). When using these images please give credit to AT&T Laboratories, Cambridge. """,
|
||||||
|
'license': None,
|
||||||
|
'size' : 8561331},
|
||||||
'olympic_marathon_men' : {'urls' : [neil_url + 'olympic_marathon_men/'],
|
'olympic_marathon_men' : {'urls' : [neil_url + 'olympic_marathon_men/'],
|
||||||
'files' : [['olympicMarathonTimes.csv']],
|
'files' : [['olympicMarathonTimes.csv']],
|
||||||
'citation' : None,
|
'citation' : None,
|
||||||
|
|
@ -141,26 +145,41 @@ The database was created with funding from NSF EIA-0196217.""",
|
||||||
'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000',
|
'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000',
|
||||||
'license' : None,
|
'license' : None,
|
||||||
'size' : 24229368},
|
'size' : 24229368},
|
||||||
|
'xw_pen' : {'urls' : [neil_url + 'xw_pen/'],
|
||||||
|
'files' : [['xw_pen_15.csv']],
|
||||||
|
'details' : """Accelerometer pen data used for robust regression by Tipping and Lawrence.""",
|
||||||
|
'citation' : 'Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005',
|
||||||
|
'license' : None,
|
||||||
|
'size' : 3410}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def prompt_user():
|
def prompt_user(prompt):
|
||||||
"""Ask user for agreeing to data set licenses."""
|
"""Ask user for agreeing to data set licenses."""
|
||||||
# raw_input returns the empty string for "enter"
|
# raw_input returns the empty string for "enter"
|
||||||
yes = set(['yes', 'y'])
|
yes = set(['yes', 'y'])
|
||||||
no = set(['no','n'])
|
no = set(['no','n'])
|
||||||
choice = ''
|
|
||||||
if ipython_notebook:
|
try:
|
||||||
ipynb_input(choice, prompt='provide your answer here')
|
print(prompt)
|
||||||
else:
|
|
||||||
choice = raw_input().lower()
|
choice = raw_input().lower()
|
||||||
|
# would like to test for exception here, but not sure if we can do that without importing IPython
|
||||||
|
except:
|
||||||
|
print('Stdin is not implemented.')
|
||||||
|
print('You need to set')
|
||||||
|
print('overide_manual_authorize=True')
|
||||||
|
print('to proceed with the download. Please set that variable and continue.')
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
if choice in yes:
|
if choice in yes:
|
||||||
return True
|
return True
|
||||||
elif choice in no:
|
elif choice in no:
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
sys.stdout.write("Please respond with 'yes', 'y' or 'no', 'n'")
|
print("Your response was a " + choice)
|
||||||
return prompt_user()
|
print("Please respond with 'yes', 'y' or 'no', 'n'")
|
||||||
|
#return prompt_user()
|
||||||
|
|
||||||
|
|
||||||
def data_available(dataset_name=None):
|
def data_available(dataset_name=None):
|
||||||
|
|
@ -212,15 +231,14 @@ def authorize_download(dataset_name=None):
|
||||||
print('You must also agree to the following license:')
|
print('You must also agree to the following license:')
|
||||||
print(dr['license'])
|
print(dr['license'])
|
||||||
print('')
|
print('')
|
||||||
print('Do you wish to proceed with the download? [yes/no]')
|
return prompt_user('Do you wish to proceed with the download? [yes/no]')
|
||||||
return prompt_user()
|
|
||||||
|
|
||||||
def download_data(dataset_name=None):
|
def download_data(dataset_name=None):
|
||||||
"""Check with the user that the are happy with terms and conditions for the data set, then download it."""
|
"""Check with the user that the are happy with terms and conditions for the data set, then download it."""
|
||||||
|
|
||||||
dr = data_resources[dataset_name]
|
dr = data_resources[dataset_name]
|
||||||
if not authorize_download(dataset_name):
|
if not authorize_download(dataset_name):
|
||||||
return False
|
raise Exception("Permission to download data set denied.")
|
||||||
|
|
||||||
if dr.has_key('suffices'):
|
if dr.has_key('suffices'):
|
||||||
for url, files, suffices in zip(dr['urls'], dr['files'], dr['suffices']):
|
for url, files, suffices in zip(dr['urls'], dr['files'], dr['suffices']):
|
||||||
|
|
@ -489,13 +507,13 @@ def ripley_synth(data_set='ripley_prnn_data'):
|
||||||
return data_details_return({'X': X, 'y': y, 'Xtest': Xtest, 'ytest': ytest, 'info': 'Synthetic data generated by Ripley for a two class classification problem.'}, data_set)
|
return data_details_return({'X': X, 'y': y, 'Xtest': Xtest, 'ytest': ytest, 'info': 'Synthetic data generated by Ripley for a two class classification problem.'}, data_set)
|
||||||
|
|
||||||
def osu_run1(data_set='osu_run1', sample_every=4):
|
def osu_run1(data_set='osu_run1', sample_every=4):
|
||||||
|
path = os.path.join(data_path, data_set)
|
||||||
if not data_available(data_set):
|
if not data_available(data_set):
|
||||||
download_data(data_set)
|
download_data(data_set)
|
||||||
zip = zipfile.ZipFile(os.path.join(data_path, data_set, 'sprintTXT.ZIP'), 'r')
|
zip = zipfile.ZipFile(os.path.join(data_path, data_set, 'run1TXT.ZIP'), 'r')
|
||||||
path = os.path.join(data_path, data_set)
|
for name in zip.namelist():
|
||||||
for name in zip.namelist():
|
zip.extract(name, path)
|
||||||
zip.extract(name, path)
|
Y, connect = GPy.util.mocap.load_text_data('Aug210106', path)
|
||||||
Y, connect = GPy.util.mocap.load_text_data('Aug210107', path)
|
|
||||||
Y = Y[0:-1:sample_every, :]
|
Y = Y[0:-1:sample_every, :]
|
||||||
return data_details_return({'Y': Y, 'connect' : connect}, data_set)
|
return data_details_return({'Y': Y, 'connect' : connect}, data_set)
|
||||||
|
|
||||||
|
|
@ -579,8 +597,34 @@ def toy_linear_1d_classification(seed=default_seed):
|
||||||
X = (np.r_[x1, x2])[:, None]
|
X = (np.r_[x1, x2])[:, None]
|
||||||
return {'X': X, 'Y': sample_class(2.*X), 'F': 2.*X, 'seed' : seed}
|
return {'X': X, 'Y': sample_class(2.*X), 'F': 2.*X, 'seed' : seed}
|
||||||
|
|
||||||
def olympic_100m_men(data_set='rogers_girolami_data'):
|
def olivetti_faces(data_set='olivetti_faces'):
|
||||||
|
path = os.path.join(data_path, data_set)
|
||||||
if not data_available(data_set):
|
if not data_available(data_set):
|
||||||
|
download_data(data_set)
|
||||||
|
zip = zipfile.ZipFile(os.path.join(path, 'att_faces.zip'), 'r')
|
||||||
|
for name in zip.namelist():
|
||||||
|
zip.extract(name, path)
|
||||||
|
Y = []
|
||||||
|
lbls = []
|
||||||
|
for subject in range(40):
|
||||||
|
for image in range(10):
|
||||||
|
image_path = os.path.join(path, 'orl_faces', 's'+str(subject+1), str(image+1) + '.pgm')
|
||||||
|
Y.append(GPy.util.netpbmfile.imread(image_path).flatten())
|
||||||
|
lbls.append(subject)
|
||||||
|
Y = np.asarray(Y)
|
||||||
|
lbls = np.asarray(lbls)[:, None]
|
||||||
|
return data_details_return({'Y': Y, 'lbls' : lbls, 'info': "ORL Faces processed to 64x64 images."}, data_set)
|
||||||
|
|
||||||
|
def xw_pen(data_set='xw_pen'):
|
||||||
|
if not data_available(data_set):
|
||||||
|
download_data(data_set)
|
||||||
|
Y = np.loadtxt(os.path.join(data_path, data_set, 'xw_pen_15.csv'), delimiter=',')
|
||||||
|
X = np.arange(485)[:, None]
|
||||||
|
return data_details_return({'Y': Y, 'X': X, 'info': "Tilt data from a personalized digital assistant pen. Plot in original paper showed regression between time steps 175 and 275."}, data_set)
|
||||||
|
|
||||||
|
|
||||||
|
def download_rogers_girolami_data():
|
||||||
|
if not data_available('rogers_girolami_data'):
|
||||||
download_data(data_set)
|
download_data(data_set)
|
||||||
path = os.path.join(data_path, data_set)
|
path = os.path.join(data_path, data_set)
|
||||||
tar_file = os.path.join(path, 'firstcoursemldata.tar.gz')
|
tar_file = os.path.join(path, 'firstcoursemldata.tar.gz')
|
||||||
|
|
@ -588,6 +632,9 @@ def olympic_100m_men(data_set='rogers_girolami_data'):
|
||||||
print('Extracting file.')
|
print('Extracting file.')
|
||||||
tar.extractall(path=path)
|
tar.extractall(path=path)
|
||||||
tar.close()
|
tar.close()
|
||||||
|
|
||||||
|
def olympic_100m_men(data_set='rogers_girolami_data'):
|
||||||
|
download_rogers_girolami_data()
|
||||||
olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['male100']
|
olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['male100']
|
||||||
|
|
||||||
X = olympic_data[:, 0][:, None]
|
X = olympic_data[:, 0][:, None]
|
||||||
|
|
@ -595,20 +642,45 @@ def olympic_100m_men(data_set='rogers_girolami_data'):
|
||||||
return data_details_return({'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m men from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
|
return data_details_return({'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m men from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
|
||||||
|
|
||||||
def olympic_100m_women(data_set='rogers_girolami_data'):
|
def olympic_100m_women(data_set='rogers_girolami_data'):
|
||||||
if not data_available(data_set):
|
download_rogers_girolami_data()
|
||||||
download_data(data_set)
|
|
||||||
path = os.path.join(data_path, data_set)
|
|
||||||
tar_file = os.path.join(path, 'firstcoursemldata.tar.gz')
|
|
||||||
tar = tarfile.open(tar_file)
|
|
||||||
print('Extracting file.')
|
|
||||||
tar.extractall(path=path)
|
|
||||||
tar.close()
|
|
||||||
olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['female100']
|
olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['female100']
|
||||||
|
|
||||||
X = olympic_data[:, 0][:, None]
|
X = olympic_data[:, 0][:, None]
|
||||||
Y = olympic_data[:, 1][:, None]
|
Y = olympic_data[:, 1][:, None]
|
||||||
return data_details_return({'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m women from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
|
return data_details_return({'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m women from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
|
||||||
|
|
||||||
|
def olympic_200m_women(data_set='rogers_girolami_data'):
|
||||||
|
download_rogers_girolami_data()
|
||||||
|
olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['female200']
|
||||||
|
|
||||||
|
X = olympic_data[:, 0][:, None]
|
||||||
|
Y = olympic_data[:, 1][:, None]
|
||||||
|
return data_details_return({'X': X, 'Y': Y, 'info': "Olympic 200 m winning times for women from 1896 until 2008. Data is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
|
||||||
|
|
||||||
|
def olympic_200m_men(data_set='rogers_girolami_data'):
|
||||||
|
download_rogers_girolami_data()
|
||||||
|
olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['male200']
|
||||||
|
|
||||||
|
X = olympic_data[:, 0][:, None]
|
||||||
|
Y = olympic_data[:, 1][:, None]
|
||||||
|
return data_details_return({'X': X, 'Y': Y, 'info': "Male 200 m winning times for women from 1896 until 2008. Data is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
|
||||||
|
|
||||||
|
def olympic_400m_women(data_set='rogers_girolami_data'):
|
||||||
|
download_rogers_girolami_data()
|
||||||
|
olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['female400']
|
||||||
|
|
||||||
|
X = olympic_data[:, 0][:, None]
|
||||||
|
Y = olympic_data[:, 1][:, None]
|
||||||
|
return data_details_return({'X': X, 'Y': Y, 'info': "Olympic 400 m winning times for women until 2008. Data is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
|
||||||
|
|
||||||
|
def olympic_400m_men(data_set='rogers_girolami_data'):
|
||||||
|
download_rogers_girolami_data()
|
||||||
|
olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['male400']
|
||||||
|
|
||||||
|
X = olympic_data[:, 0][:, None]
|
||||||
|
Y = olympic_data[:, 1][:, None]
|
||||||
|
return data_details_return({'X': X, 'Y': Y, 'info': "Male 400 m winning times for women until 2008. Data is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
|
||||||
|
|
||||||
def olympic_marathon_men(data_set='olympic_marathon_men'):
|
def olympic_marathon_men(data_set='olympic_marathon_men'):
|
||||||
if not data_available(data_set):
|
if not data_available(data_set):
|
||||||
download_data(data_set)
|
download_data(data_set)
|
||||||
|
|
@ -617,6 +689,26 @@ def olympic_marathon_men(data_set='olympic_marathon_men'):
|
||||||
Y = olympics[:, 1:2]
|
Y = olympics[:, 1:2]
|
||||||
return data_details_return({'X': X, 'Y': Y}, data_set)
|
return data_details_return({'X': X, 'Y': Y}, data_set)
|
||||||
|
|
||||||
|
def olympics():
|
||||||
|
"""All olympics sprint winning times for multiple output prediction."""
|
||||||
|
X = np.zeros((0, 2))
|
||||||
|
Y = np.zeros((0, 1))
|
||||||
|
for i, dataset in enumerate([olympic_100m_men,
|
||||||
|
olympic_100m_women,
|
||||||
|
olympic_200m_men,
|
||||||
|
olympic_200m_women,
|
||||||
|
olympic_400m_men,
|
||||||
|
olympic_400m_women]):
|
||||||
|
data = dataset()
|
||||||
|
year = data['X']
|
||||||
|
time = data['Y']
|
||||||
|
X = np.vstack((X, np.hstack((year, np.ones_like(year)*i))))
|
||||||
|
Y = np.vstack((Y, time))
|
||||||
|
data['X'] = X
|
||||||
|
data['Y'] = Y
|
||||||
|
data['info'] = "Olympics sprint event winning for men and women to 2008. Data is from Rogers and Girolami's First Course in Machine Learning."
|
||||||
|
return data
|
||||||
|
|
||||||
# def movielens_small(partNo=1,seed=default_seed):
|
# def movielens_small(partNo=1,seed=default_seed):
|
||||||
# np.random.seed(seed=seed)
|
# np.random.seed(seed=seed)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,14 @@ def dpotri(A, lower=0):
|
||||||
"""
|
"""
|
||||||
return lapack.dpotri(A, lower=lower)
|
return lapack.dpotri(A, lower=lower)
|
||||||
|
|
||||||
|
def pddet(A):
|
||||||
|
"""
|
||||||
|
Determinant of a positive definite matrix, only symmetric matricies though
|
||||||
|
"""
|
||||||
|
L = jitchol(A)
|
||||||
|
logdetA = 2*sum(np.log(np.diag(L)))
|
||||||
|
return logdetA
|
||||||
|
|
||||||
def trace_dot(a, b):
|
def trace_dot(a, b):
|
||||||
"""
|
"""
|
||||||
Efficiently compute the trace of the matrix product of a and b
|
Efficiently compute the trace of the matrix product of a and b
|
||||||
|
|
@ -325,6 +333,7 @@ def symmetrify(A, upper=False):
|
||||||
"""
|
"""
|
||||||
N, M = A.shape
|
N, M = A.shape
|
||||||
assert N == M
|
assert N == M
|
||||||
|
|
||||||
c_contig_code = """
|
c_contig_code = """
|
||||||
int iN;
|
int iN;
|
||||||
for (int i=1; i<N; i++){
|
for (int i=1; i<N; i++){
|
||||||
|
|
@ -343,6 +352,8 @@ def symmetrify(A, upper=False):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
N = int(N) # for safe type casting
|
||||||
if A.flags['C_CONTIGUOUS'] and upper:
|
if A.flags['C_CONTIGUOUS'] and upper:
|
||||||
weave.inline(f_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
|
weave.inline(f_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
|
||||||
elif A.flags['C_CONTIGUOUS'] and not upper:
|
elif A.flags['C_CONTIGUOUS'] and not upper:
|
||||||
|
|
@ -403,4 +414,3 @@ def backsub_both_sides(L, X, transpose='left'):
|
||||||
else:
|
else:
|
||||||
tmp, _ = lapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=0)
|
tmp, _ = lapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=0)
|
||||||
return lapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=0)[0].T
|
return lapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=0)[0].T
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,34 @@
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy import weave
|
from scipy import weave
|
||||||
|
from config import *
|
||||||
|
|
||||||
|
def chain_1(df_dg, dg_dx):
|
||||||
|
"""
|
||||||
|
Generic chaining function for first derivative
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d(f . g)}{dx} = \\frac{df}{dg} \\frac{dg}{dx}
|
||||||
|
"""
|
||||||
|
return df_dg * dg_dx
|
||||||
|
|
||||||
|
def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
|
||||||
|
"""
|
||||||
|
Generic chaining function for second derivative
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{2}(f . g)}{dx^{2}} = \\frac{d^{2}f}{dg^{2}}(\\frac{dg}{dx})^{2} + \\frac{df}{dg}\\frac{d^{2}g}{dx^{2}}
|
||||||
|
"""
|
||||||
|
return d2f_dg2*(dg_dx**2) + df_dg*d2g_dx2
|
||||||
|
|
||||||
|
def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3):
|
||||||
|
"""
|
||||||
|
Generic chaining function for third derivative
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{3}(f . g)}{dx^{3}} = \\frac{d^{3}f}{dg^{3}}(\\frac{dg}{dx})^{3} + 3\\frac{d^{2}f}{dg^{2}}\\frac{dg}{dx}\\frac{d^{2}g}{dx^{2}} + \\frac{df}{dg}\\frac{d^{3}g}{dx^{3}}
|
||||||
|
"""
|
||||||
|
return d3f_dg3*(dg_dx**3) + 3*d2f_dg2*dg_dx*d2g_dx2 + df_dg*d3g_dx3
|
||||||
|
|
||||||
def opt_wrapper(m, **kwargs):
|
def opt_wrapper(m, **kwargs):
|
||||||
"""
|
"""
|
||||||
|
|
@ -57,11 +85,18 @@ def kmm_init(X, m = 10):
|
||||||
return X[inducing]
|
return X[inducing]
|
||||||
|
|
||||||
def fast_array_equal(A, B):
|
def fast_array_equal(A, B):
|
||||||
|
|
||||||
|
|
||||||
|
if config.getboolean('parallel', 'openmp'):
|
||||||
|
pragma_string = '#pragma omp parallel for private(i, j)'
|
||||||
|
else:
|
||||||
|
pragma_string = ''
|
||||||
|
|
||||||
code2="""
|
code2="""
|
||||||
int i, j;
|
int i, j;
|
||||||
return_val = 1;
|
return_val = 1;
|
||||||
|
|
||||||
#pragma omp parallel for private(i, j)
|
%s
|
||||||
for(i=0;i<N;i++){
|
for(i=0;i<N;i++){
|
||||||
for(j=0;j<D;j++){
|
for(j=0;j<D;j++){
|
||||||
if(A(i, j) != B(i, j)){
|
if(A(i, j) != B(i, j)){
|
||||||
|
|
@ -70,13 +105,18 @@ def fast_array_equal(A, B):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
""" % pragma_string
|
||||||
|
|
||||||
|
if config.getboolean('parallel', 'openmp'):
|
||||||
|
pragma_string = '#pragma omp parallel for private(i, j, z)'
|
||||||
|
else:
|
||||||
|
pragma_string = ''
|
||||||
|
|
||||||
code3="""
|
code3="""
|
||||||
int i, j, z;
|
int i, j, z;
|
||||||
return_val = 1;
|
return_val = 1;
|
||||||
|
|
||||||
#pragma omp parallel for private(i, j, z)
|
%s
|
||||||
for(i=0;i<N;i++){
|
for(i=0;i<N;i++){
|
||||||
for(j=0;j<D;j++){
|
for(j=0;j<D;j++){
|
||||||
for(z=0;z<Q;z++){
|
for(z=0;z<Q;z++){
|
||||||
|
|
@ -87,35 +127,48 @@ def fast_array_equal(A, B):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
""" % pragma_string
|
||||||
|
|
||||||
|
if config.getboolean('parallel', 'openmp'):
|
||||||
|
pragma_string = '#include <omp.h>'
|
||||||
|
else:
|
||||||
|
pragma_string = ''
|
||||||
|
|
||||||
support_code = """
|
support_code = """
|
||||||
#include <omp.h>
|
%s
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
"""
|
""" % pragma_string
|
||||||
|
|
||||||
weave_options = {'headers' : ['<omp.h>'],
|
|
||||||
'extra_compile_args': ['-fopenmp -O3'],
|
|
||||||
'extra_link_args' : ['-lgomp']}
|
|
||||||
|
|
||||||
|
weave_options_openmp = {'headers' : ['<omp.h>'],
|
||||||
|
'extra_compile_args': ['-fopenmp -O3'],
|
||||||
|
'extra_link_args' : ['-lgomp'],
|
||||||
|
'libraries': ['gomp']}
|
||||||
|
weave_options_noopenmp = {'extra_compile_args': ['-O3']}
|
||||||
|
|
||||||
|
if config.getboolean('parallel', 'openmp'):
|
||||||
|
weave_options = weave_options_openmp
|
||||||
|
else:
|
||||||
|
weave_options = weave_options_noopenmp
|
||||||
|
|
||||||
value = False
|
value = False
|
||||||
|
|
||||||
|
|
||||||
if (A == None) and (B == None):
|
if (A == None) and (B == None):
|
||||||
return True
|
return True
|
||||||
elif ((A == None) and (B != None)) or ((A != None) and (B == None)):
|
elif ((A == None) and (B != None)) or ((A != None) and (B == None)):
|
||||||
return False
|
return False
|
||||||
elif A.shape == B.shape:
|
elif A.shape == B.shape:
|
||||||
if A.ndim == 2:
|
if A.ndim == 2:
|
||||||
N, D = A.shape
|
N, D = [int(i) for i in A.shape]
|
||||||
value = weave.inline(code2, support_code=support_code, libraries=['gomp'],
|
value = weave.inline(code2, support_code=support_code,
|
||||||
arg_names=['A', 'B', 'N', 'D'],
|
arg_names=['A', 'B', 'N', 'D'],
|
||||||
type_converters=weave.converters.blitz,**weave_options)
|
type_converters=weave.converters.blitz, **weave_options)
|
||||||
elif A.ndim == 3:
|
elif A.ndim == 3:
|
||||||
N, D, Q = A.shape
|
N, D, Q = [int(i) for i in A.shape]
|
||||||
value = weave.inline(code3, support_code=support_code, libraries=['gomp'],
|
value = weave.inline(code3, support_code=support_code,
|
||||||
arg_names=['A', 'B', 'N', 'D', 'Q'],
|
arg_names=['A', 'B', 'N', 'D', 'Q'],
|
||||||
type_converters=weave.converters.blitz,**weave_options)
|
type_converters=weave.converters.blitz, **weave_options)
|
||||||
else:
|
else:
|
||||||
value = np.array_equal(A,B)
|
value = np.array_equal(A,B)
|
||||||
|
|
||||||
|
|
|
||||||
331
GPy/util/netpbmfile.py
Normal file
331
GPy/util/netpbmfile.py
Normal file
|
|
@ -0,0 +1,331 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# netpbmfile.py
|
||||||
|
|
||||||
|
# Copyright (c) 2011-2013, Christoph Gohlke
|
||||||
|
# Copyright (c) 2011-2013, The Regents of the University of California
|
||||||
|
# Produced at the Laboratory for Fluorescence Dynamics.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions are met:
|
||||||
|
#
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in the
|
||||||
|
# documentation and/or other materials provided with the distribution.
|
||||||
|
# * Neither the name of the copyright holders nor the names of any
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
# POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
"""Read and write image data from respectively to Netpbm files.
|
||||||
|
|
||||||
|
This implementation follows the Netpbm format specifications at
|
||||||
|
http://netpbm.sourceforge.net/doc/. No gamma correction is performed.
|
||||||
|
|
||||||
|
The following image formats are supported: PBM (bi-level), PGM (grayscale),
|
||||||
|
PPM (color), PAM (arbitrary), XV thumbnail (RGB332, read-only).
|
||||||
|
|
||||||
|
:Author:
|
||||||
|
`Christoph Gohlke <http://www.lfd.uci.edu/~gohlke/>`_
|
||||||
|
|
||||||
|
:Organization:
|
||||||
|
Laboratory for Fluorescence Dynamics, University of California, Irvine
|
||||||
|
|
||||||
|
:Version: 2013.01.18
|
||||||
|
|
||||||
|
Requirements
|
||||||
|
------------
|
||||||
|
* `CPython 2.7, 3.2 or 3.3 <http://www.python.org>`_
|
||||||
|
* `Numpy 1.7 <http://www.numpy.org>`_
|
||||||
|
* `Matplotlib 1.2 <http://www.matplotlib.org>`_ (optional for plotting)
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> im1 = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16)
|
||||||
|
>>> imsave('_tmp.pgm', im1)
|
||||||
|
>>> im2 = imread('_tmp.pgm')
|
||||||
|
>>> assert numpy.all(im1 == im2)
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import division, print_function
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
import math
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
|
import numpy
|
||||||
|
|
||||||
|
__version__ = '2013.01.18'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
__all__ = ['imread', 'imsave', 'NetpbmFile']
|
||||||
|
|
||||||
|
|
||||||
|
def imread(filename, *args, **kwargs):
|
||||||
|
"""Return image data from Netpbm file as numpy array.
|
||||||
|
|
||||||
|
`args` and `kwargs` are arguments to NetpbmFile.asarray().
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> image = imread('_tmp.pgm')
|
||||||
|
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
netpbm = NetpbmFile(filename)
|
||||||
|
image = netpbm.asarray()
|
||||||
|
finally:
|
||||||
|
netpbm.close()
|
||||||
|
return image
|
||||||
|
|
||||||
|
|
||||||
|
def imsave(filename, data, maxval=None, pam=False):
|
||||||
|
"""Write image data to Netpbm file.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> image = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16)
|
||||||
|
>>> imsave('_tmp.pgm', image)
|
||||||
|
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
netpbm = NetpbmFile(data, maxval=maxval)
|
||||||
|
netpbm.write(filename, pam=pam)
|
||||||
|
finally:
|
||||||
|
netpbm.close()
|
||||||
|
|
||||||
|
|
||||||
|
class NetpbmFile(object):
|
||||||
|
"""Read and write Netpbm PAM, PBM, PGM, PPM, files."""
|
||||||
|
|
||||||
|
_types = {b'P1': b'BLACKANDWHITE', b'P2': b'GRAYSCALE', b'P3': b'RGB',
|
||||||
|
b'P4': b'BLACKANDWHITE', b'P5': b'GRAYSCALE', b'P6': b'RGB',
|
||||||
|
b'P7 332': b'RGB', b'P7': b'RGB_ALPHA'}
|
||||||
|
|
||||||
|
def __init__(self, arg=None, **kwargs):
|
||||||
|
"""Initialize instance from filename, open file, or numpy array."""
|
||||||
|
for attr in ('header', 'magicnum', 'width', 'height', 'maxval',
|
||||||
|
'depth', 'tupltypes', '_filename', '_fh', '_data'):
|
||||||
|
setattr(self, attr, None)
|
||||||
|
if arg is None:
|
||||||
|
self._fromdata([], **kwargs)
|
||||||
|
elif isinstance(arg, basestring):
|
||||||
|
self._fh = open(arg, 'rb')
|
||||||
|
self._filename = arg
|
||||||
|
self._fromfile(self._fh, **kwargs)
|
||||||
|
elif hasattr(arg, 'seek'):
|
||||||
|
self._fromfile(arg, **kwargs)
|
||||||
|
self._fh = arg
|
||||||
|
else:
|
||||||
|
self._fromdata(arg, **kwargs)
|
||||||
|
|
||||||
|
def asarray(self, copy=True, cache=False, **kwargs):
|
||||||
|
"""Return image data from file as numpy array."""
|
||||||
|
data = self._data
|
||||||
|
if data is None:
|
||||||
|
data = self._read_data(self._fh, **kwargs)
|
||||||
|
if cache:
|
||||||
|
self._data = data
|
||||||
|
else:
|
||||||
|
return data
|
||||||
|
return deepcopy(data) if copy else data
|
||||||
|
|
||||||
|
def write(self, arg, **kwargs):
|
||||||
|
"""Write instance to file."""
|
||||||
|
if hasattr(arg, 'seek'):
|
||||||
|
self._tofile(arg, **kwargs)
|
||||||
|
else:
|
||||||
|
with open(arg, 'wb') as fid:
|
||||||
|
self._tofile(fid, **kwargs)
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
"""Close open file. Future asarray calls might fail."""
|
||||||
|
if self._filename and self._fh:
|
||||||
|
self._fh.close()
|
||||||
|
self._fh = None
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
self.close()
|
||||||
|
|
||||||
|
def _fromfile(self, fh):
|
||||||
|
"""Initialize instance from open file."""
|
||||||
|
fh.seek(0)
|
||||||
|
data = fh.read(4096)
|
||||||
|
if (len(data) < 7) or not (b'0' < data[1:2] < b'8'):
|
||||||
|
raise ValueError("Not a Netpbm file:\n%s" % data[:32])
|
||||||
|
try:
|
||||||
|
self._read_pam_header(data)
|
||||||
|
except Exception:
|
||||||
|
try:
|
||||||
|
self._read_pnm_header(data)
|
||||||
|
except Exception:
|
||||||
|
raise ValueError("Not a Netpbm file:\n%s" % data[:32])
|
||||||
|
|
||||||
|
def _read_pam_header(self, data):
|
||||||
|
"""Read PAM header and initialize instance."""
|
||||||
|
regroups = re.search(
|
||||||
|
b"(^P7[\n\r]+(?:(?:[\n\r]+)|(?:#.*)|"
|
||||||
|
b"(HEIGHT\s+\d+)|(WIDTH\s+\d+)|(DEPTH\s+\d+)|(MAXVAL\s+\d+)|"
|
||||||
|
b"(?:TUPLTYPE\s+\w+))*ENDHDR\n)", data).groups()
|
||||||
|
self.header = regroups[0]
|
||||||
|
self.magicnum = b'P7'
|
||||||
|
for group in regroups[1:]:
|
||||||
|
key, value = group.split()
|
||||||
|
setattr(self, unicode(key).lower(), int(value))
|
||||||
|
matches = re.findall(b"(TUPLTYPE\s+\w+)", self.header)
|
||||||
|
self.tupltypes = [s.split(None, 1)[1] for s in matches]
|
||||||
|
|
||||||
|
def _read_pnm_header(self, data):
|
||||||
|
"""Read PNM header and initialize instance."""
|
||||||
|
bpm = data[1:2] in b"14"
|
||||||
|
regroups = re.search(b"".join((
|
||||||
|
b"(^(P[123456]|P7 332)\s+(?:#.*[\r\n])*",
|
||||||
|
b"\s*(\d+)\s+(?:#.*[\r\n])*",
|
||||||
|
b"\s*(\d+)\s+(?:#.*[\r\n])*" * (not bpm),
|
||||||
|
b"\s*(\d+)\s(?:\s*#.*[\r\n]\s)*)")), data).groups() + (1, ) * bpm
|
||||||
|
self.header = regroups[0]
|
||||||
|
self.magicnum = regroups[1]
|
||||||
|
self.width = int(regroups[2])
|
||||||
|
self.height = int(regroups[3])
|
||||||
|
self.maxval = int(regroups[4])
|
||||||
|
self.depth = 3 if self.magicnum in b"P3P6P7 332" else 1
|
||||||
|
self.tupltypes = [self._types[self.magicnum]]
|
||||||
|
|
||||||
|
def _read_data(self, fh, byteorder='>'):
|
||||||
|
"""Return image data from open file as numpy array."""
|
||||||
|
fh.seek(len(self.header))
|
||||||
|
data = fh.read()
|
||||||
|
dtype = 'u1' if self.maxval < 256 else byteorder + 'u2'
|
||||||
|
depth = 1 if self.magicnum == b"P7 332" else self.depth
|
||||||
|
shape = [-1, self.height, self.width, depth]
|
||||||
|
size = numpy.prod(shape[1:])
|
||||||
|
if self.magicnum in b"P1P2P3":
|
||||||
|
data = numpy.array(data.split(None, size)[:size], dtype)
|
||||||
|
data = data.reshape(shape)
|
||||||
|
elif self.maxval == 1:
|
||||||
|
shape[2] = int(math.ceil(self.width / 8))
|
||||||
|
data = numpy.frombuffer(data, dtype).reshape(shape)
|
||||||
|
data = numpy.unpackbits(data, axis=-2)[:, :, :self.width, :]
|
||||||
|
else:
|
||||||
|
data = numpy.frombuffer(data, dtype)
|
||||||
|
data = data[:size * (data.size // size)].reshape(shape)
|
||||||
|
if data.shape[0] < 2:
|
||||||
|
data = data.reshape(data.shape[1:])
|
||||||
|
if data.shape[-1] < 2:
|
||||||
|
data = data.reshape(data.shape[:-1])
|
||||||
|
if self.magicnum == b"P7 332":
|
||||||
|
rgb332 = numpy.array(list(numpy.ndindex(8, 8, 4)), numpy.uint8)
|
||||||
|
rgb332 *= [36, 36, 85]
|
||||||
|
data = numpy.take(rgb332, data, axis=0)
|
||||||
|
return data
|
||||||
|
|
||||||
|
def _fromdata(self, data, maxval=None):
|
||||||
|
"""Initialize instance from numpy array."""
|
||||||
|
data = numpy.array(data, ndmin=2, copy=True)
|
||||||
|
if data.dtype.kind not in "uib":
|
||||||
|
raise ValueError("not an integer type: %s" % data.dtype)
|
||||||
|
if data.dtype.kind == 'i' and numpy.min(data) < 0:
|
||||||
|
raise ValueError("data out of range: %i" % numpy.min(data))
|
||||||
|
if maxval is None:
|
||||||
|
maxval = numpy.max(data)
|
||||||
|
maxval = 255 if maxval < 256 else 65535
|
||||||
|
if maxval < 0 or maxval > 65535:
|
||||||
|
raise ValueError("data out of range: %i" % maxval)
|
||||||
|
data = data.astype('u1' if maxval < 256 else '>u2')
|
||||||
|
self._data = data
|
||||||
|
if data.ndim > 2 and data.shape[-1] in (3, 4):
|
||||||
|
self.depth = data.shape[-1]
|
||||||
|
self.width = data.shape[-2]
|
||||||
|
self.height = data.shape[-3]
|
||||||
|
self.magicnum = b'P7' if self.depth == 4 else b'P6'
|
||||||
|
else:
|
||||||
|
self.depth = 1
|
||||||
|
self.width = data.shape[-1]
|
||||||
|
self.height = data.shape[-2]
|
||||||
|
self.magicnum = b'P5' if maxval > 1 else b'P4'
|
||||||
|
self.maxval = maxval
|
||||||
|
self.tupltypes = [self._types[self.magicnum]]
|
||||||
|
self.header = self._header()
|
||||||
|
|
||||||
|
def _tofile(self, fh, pam=False):
|
||||||
|
"""Write Netbm file."""
|
||||||
|
fh.seek(0)
|
||||||
|
fh.write(self._header(pam))
|
||||||
|
data = self.asarray(copy=False)
|
||||||
|
if self.maxval == 1:
|
||||||
|
data = numpy.packbits(data, axis=-1)
|
||||||
|
data.tofile(fh)
|
||||||
|
|
||||||
|
def _header(self, pam=False):
|
||||||
|
"""Return file header as byte string."""
|
||||||
|
if pam or self.magicnum == b'P7':
|
||||||
|
header = "\n".join((
|
||||||
|
"P7",
|
||||||
|
"HEIGHT %i" % self.height,
|
||||||
|
"WIDTH %i" % self.width,
|
||||||
|
"DEPTH %i" % self.depth,
|
||||||
|
"MAXVAL %i" % self.maxval,
|
||||||
|
"\n".join("TUPLTYPE %s" % unicode(i) for i in self.tupltypes),
|
||||||
|
"ENDHDR\n"))
|
||||||
|
elif self.maxval == 1:
|
||||||
|
header = "P4 %i %i\n" % (self.width, self.height)
|
||||||
|
elif self.depth == 1:
|
||||||
|
header = "P5 %i %i %i\n" % (self.width, self.height, self.maxval)
|
||||||
|
else:
|
||||||
|
header = "P6 %i %i %i\n" % (self.width, self.height, self.maxval)
|
||||||
|
if sys.version_info[0] > 2:
|
||||||
|
header = bytes(header, 'ascii')
|
||||||
|
return header
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
"""Return information about instance."""
|
||||||
|
return unicode(self.header)
|
||||||
|
|
||||||
|
|
||||||
|
if sys.version_info[0] > 2:
|
||||||
|
basestring = str
|
||||||
|
unicode = lambda x: str(x, 'ascii')
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Show images specified on command line or all images in current directory
|
||||||
|
from glob import glob
|
||||||
|
from matplotlib import pyplot
|
||||||
|
files = sys.argv[1:] if len(sys.argv) > 1 else glob('*.p*m')
|
||||||
|
for fname in files:
|
||||||
|
try:
|
||||||
|
pam = NetpbmFile(fname)
|
||||||
|
img = pam.asarray(copy=False)
|
||||||
|
if False:
|
||||||
|
pam.write('_tmp.pgm.out', pam=True)
|
||||||
|
img2 = imread('_tmp.pgm.out')
|
||||||
|
assert numpy.all(img == img2)
|
||||||
|
imsave('_tmp.pgm.out', img)
|
||||||
|
img2 = imread('_tmp.pgm.out')
|
||||||
|
assert numpy.all(img == img2)
|
||||||
|
pam.close()
|
||||||
|
except ValueError as e:
|
||||||
|
print(fname, e)
|
||||||
|
continue
|
||||||
|
_shape = img.shape
|
||||||
|
if img.ndim > 3 or (img.ndim > 2 and img.shape[-1] not in (3, 4)):
|
||||||
|
img = img[0]
|
||||||
|
cmap = 'gray' if pam.maxval > 1 else 'binary'
|
||||||
|
pyplot.imshow(img, cmap, interpolation='nearest')
|
||||||
|
pyplot.title("%s %s %s %s" % (fname, unicode(pam.magicnum),
|
||||||
|
_shape, img.dtype))
|
||||||
|
pyplot.show()
|
||||||
|
|
@ -1,32 +1,113 @@
|
||||||
from sympy import Function, S, oo, I, cos, sin
|
from sympy import Function, S, oo, I, cos, sin, asin, log, erf,pi,exp
|
||||||
|
|
||||||
|
|
||||||
|
class ln_diff_erf(Function):
|
||||||
|
nargs = 2
|
||||||
|
|
||||||
|
def fdiff(self, argindex=2):
|
||||||
|
if argindex == 2:
|
||||||
|
x0, x1 = self.args
|
||||||
|
return -2*exp(-x1**2)/(sqrt(pi)*(erf(x0)-erf(x1)))
|
||||||
|
elif argindex == 1:
|
||||||
|
x0, x1 = self.args
|
||||||
|
return 2*exp(-x0**2)/(sqrt(pi)*(erf(x0)-erf(x1)))
|
||||||
|
else:
|
||||||
|
raise ArgumentIndexError(self, argindex)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def eval(cls, x0, x1):
|
||||||
|
if x0.is_Number and x1.is_Number:
|
||||||
|
return log(erf(x0)-erf(x1))
|
||||||
|
|
||||||
|
class sim_h(Function):
|
||||||
|
nargs = 5
|
||||||
|
|
||||||
|
def fdiff(self, argindex=1):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def eval(cls, t, tprime, d_i, d_j, l):
|
||||||
|
# putting in the is_Number stuff forces it to look for a fdiff method for derivative.
|
||||||
|
if (t.is_Number
|
||||||
|
and tprime.is_Number
|
||||||
|
and d_i.is_Number
|
||||||
|
and d_j.is_Number
|
||||||
|
and l.is_Number):
|
||||||
|
if (t is S.NaN
|
||||||
|
or tprime is S.NaN
|
||||||
|
or d_i is S.NaN
|
||||||
|
or d_j is S.NaN
|
||||||
|
or l is S.NaN):
|
||||||
|
return S.NaN
|
||||||
|
else:
|
||||||
|
return (exp((d_j/2*l)**2)/(d_i+d_j)
|
||||||
|
*(exp(-d_j*(tprime - t))
|
||||||
|
*(erf((tprime-t)/l - d_j/2*l)
|
||||||
|
+ erf(t/l + d_j/2*l))
|
||||||
|
- exp(-(d_j*tprime + d_i))
|
||||||
|
*(erf(tprime/l - d_j/2*l)
|
||||||
|
+ erf(d_j/2*l))))
|
||||||
|
|
||||||
|
class erfc(Function):
|
||||||
|
nargs = 1
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def eval(cls, arg):
|
||||||
|
return 1-erf(arg)
|
||||||
|
|
||||||
|
class erfcx(Function):
|
||||||
|
nargs = 1
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def eval(cls, arg):
|
||||||
|
return erfc(arg)*exp(arg*arg)
|
||||||
|
|
||||||
class sinc_grad(Function):
|
class sinc_grad(Function):
|
||||||
nargs = 1
|
nargs = 1
|
||||||
|
|
||||||
def fdiff(self, argindex=1):
|
def fdiff(self, argindex=1):
|
||||||
return ((2-x*x)*sin(self.args[0]) - 2*x*cos(x))/(x*x*x)
|
if argindex==1:
|
||||||
|
# Strictly speaking this should be computed separately, as it won't work when x=0. See http://calculus.subwiki.org/wiki/Sinc_function
|
||||||
|
return ((2-x*x)*sin(self.args[0]) - 2*x*cos(x))/(x*x*x)
|
||||||
|
else:
|
||||||
|
raise ArgumentIndexError(self, argindex)
|
||||||
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def eval(cls, x):
|
def eval(cls, x):
|
||||||
if x is S.Zero:
|
if x.is_Number:
|
||||||
return S.Zero
|
if x is S.NaN:
|
||||||
else:
|
return S.NaN
|
||||||
return (x*cos(x) - sin(x))/(x*x)
|
elif x is S.Zero:
|
||||||
|
return S.Zero
|
||||||
|
else:
|
||||||
|
return (x*cos(x) - sin(x))/(x*x)
|
||||||
|
|
||||||
class sinc(Function):
|
class sinc(Function):
|
||||||
|
|
||||||
nargs = 1
|
nargs = 1
|
||||||
|
|
||||||
def fdiff(self, argindex=1):
|
def fdiff(self, argindex=1):
|
||||||
return sinc_grad(self.args[0])
|
if argindex==1:
|
||||||
|
return sinc_grad(self.args[0])
|
||||||
|
else:
|
||||||
|
raise ArgumentIndexError(self, argindex)
|
||||||
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def eval(cls, x):
|
def eval(cls, arg):
|
||||||
if x is S.Zero:
|
if arg.is_Number:
|
||||||
return S.One
|
if arg is S.NaN:
|
||||||
else:
|
return S.NaN
|
||||||
return sin(x)/x
|
elif arg is S.Zero:
|
||||||
|
return S.One
|
||||||
|
else:
|
||||||
|
return sin(arg)/arg
|
||||||
|
|
||||||
|
if arg.func is asin:
|
||||||
|
x = arg.args[0]
|
||||||
|
return x / arg
|
||||||
|
|
||||||
def _eval_is_real(self):
|
def _eval_is_real(self):
|
||||||
return self.args[0].is_real
|
return self.args[0].is_real
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,24 +13,32 @@ def std_norm_cdf(x):
|
||||||
Cumulative standard Gaussian distribution
|
Cumulative standard Gaussian distribution
|
||||||
Based on Abramowitz, M. and Stegun, I. (1970)
|
Based on Abramowitz, M. and Stegun, I. (1970)
|
||||||
"""
|
"""
|
||||||
|
#Generalize for many x
|
||||||
|
x = np.asarray(x).copy()
|
||||||
|
cdf_x = np.zeros_like(x)
|
||||||
|
N = x.size
|
||||||
support_code = "#include <math.h>"
|
support_code = "#include <math.h>"
|
||||||
code = """
|
code = """
|
||||||
|
|
||||||
double sign = 1.0;
|
double sign, t, erf;
|
||||||
if (x < 0.0){
|
for (int i=0; i<N; i++){
|
||||||
sign = -1.0;
|
sign = 1.0;
|
||||||
x = -x;
|
if (x[i] < 0.0){
|
||||||
|
sign = -1.0;
|
||||||
|
x[i] = -x[i];
|
||||||
|
}
|
||||||
|
x[i] = x[i]/sqrt(2.0);
|
||||||
|
|
||||||
|
t = 1.0/(1.0 + 0.3275911*x[i]);
|
||||||
|
|
||||||
|
erf = 1. - exp(-x[i]*x[i])*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))));
|
||||||
|
|
||||||
|
//return_val = 0.5*(1.0 + sign*erf);
|
||||||
|
cdf_x[i] = 0.5*(1.0 + sign*erf);
|
||||||
}
|
}
|
||||||
x = x/sqrt(2.0);
|
|
||||||
|
|
||||||
double t = 1.0/(1.0 + 0.3275911*x);
|
|
||||||
|
|
||||||
double erf = 1. - exp(-x*x)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))));
|
|
||||||
|
|
||||||
return_val = 0.5*(1.0 + sign*erf);
|
|
||||||
"""
|
"""
|
||||||
x = float(x)
|
weave.inline(code, arg_names=['x', 'cdf_x', 'N'], support_code=support_code)
|
||||||
return weave.inline(code,arg_names=['x'],support_code=support_code)
|
return cdf_x
|
||||||
|
|
||||||
def inv_std_norm_cdf(x):
|
def inv_std_norm_cdf(x):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -246,17 +246,36 @@ class lvm_dimselect(lvm):
|
||||||
|
|
||||||
|
|
||||||
class image_show(matplotlib_show):
|
class image_show(matplotlib_show):
|
||||||
"""Show a data vector as an image."""
|
"""Show a data vector as an image. This visualizer rehapes the output vector and displays it as an image.
|
||||||
def __init__(self, vals, axes=None, dimensions=(16,16), transpose=False, invert=False, scale=False, palette=[], presetMean = 0., presetSTD = -1., selectImage=0):
|
|
||||||
|
:param vals: the values of the output to display.
|
||||||
|
:type vals: ndarray
|
||||||
|
:param axes: the axes to show the output on.
|
||||||
|
:type vals: axes handle
|
||||||
|
:param dimensions: the dimensions that the image needs to be transposed to for display.
|
||||||
|
:type dimensions: tuple
|
||||||
|
:param transpose: whether to transpose the image before display.
|
||||||
|
:type bool: default is False.
|
||||||
|
:param order: whether array is in Fortan ordering ('F') or Python ordering ('C'). Default is python ('C').
|
||||||
|
:type order: string
|
||||||
|
:param invert: whether to invert the pixels or not (default False).
|
||||||
|
:type invert: bool
|
||||||
|
:param palette: a palette to use for the image.
|
||||||
|
:param preset_mean: the preset mean of a scaled image.
|
||||||
|
:type preset_mean: double
|
||||||
|
:param preset_std: the preset standard deviation of a scaled image.
|
||||||
|
:type preset_std: double"""
|
||||||
|
def __init__(self, vals, axes=None, dimensions=(16,16), transpose=False, order='C', invert=False, scale=False, palette=[], preset_mean = 0., preset_std = -1., select_image=0):
|
||||||
matplotlib_show.__init__(self, vals, axes)
|
matplotlib_show.__init__(self, vals, axes)
|
||||||
self.dimensions = dimensions
|
self.dimensions = dimensions
|
||||||
self.transpose = transpose
|
self.transpose = transpose
|
||||||
|
self.order = order
|
||||||
self.invert = invert
|
self.invert = invert
|
||||||
self.scale = scale
|
self.scale = scale
|
||||||
self.palette = palette
|
self.palette = palette
|
||||||
self.presetMean = presetMean
|
self.preset_mean = preset_mean
|
||||||
self.presetSTD = presetSTD
|
self.preset_std = preset_std
|
||||||
self.selectImage = selectImage # This is used when the y vector contains multiple images concatenated.
|
self.select_image = select_image # This is used when the y vector contains multiple images concatenated.
|
||||||
|
|
||||||
self.set_image(self.vals)
|
self.set_image(self.vals)
|
||||||
if not self.palette == []: # Can just show the image (self.set_image() took care of setting the palette)
|
if not self.palette == []: # Can just show the image (self.set_image() took care of setting the palette)
|
||||||
|
|
@ -272,22 +291,22 @@ class image_show(matplotlib_show):
|
||||||
|
|
||||||
def set_image(self, vals):
|
def set_image(self, vals):
|
||||||
dim = self.dimensions[0] * self.dimensions[1]
|
dim = self.dimensions[0] * self.dimensions[1]
|
||||||
nImg = np.sqrt(vals[0,].size/dim)
|
num_images = np.sqrt(vals[0,].size/dim)
|
||||||
if nImg > 1 and nImg.is_integer(): # Show a mosaic of images
|
if num_images > 1 and num_images.is_integer(): # Show a mosaic of images
|
||||||
nImg = np.int(nImg)
|
num_images = np.int(num_images)
|
||||||
self.vals = np.zeros((self.dimensions[0]*nImg, self.dimensions[1]*nImg))
|
self.vals = np.zeros((self.dimensions[0]*num_images, self.dimensions[1]*num_images))
|
||||||
for iR in range(nImg):
|
for iR in range(num_images):
|
||||||
for iC in range(nImg):
|
for iC in range(num_images):
|
||||||
currImgId = iR*nImg + iC
|
cur_img_id = iR*num_images + iC
|
||||||
currImg = np.reshape(vals[0,dim*currImgId+np.array(range(dim))], self.dimensions, order='F')
|
cur_img = np.reshape(vals[0,dim*cur_img_id+np.array(range(dim))], self.dimensions, order=self.order)
|
||||||
firstRow = iR*self.dimensions[0]
|
first_row = iR*self.dimensions[0]
|
||||||
lastRow = (iR+1)*self.dimensions[0]
|
last_row = (iR+1)*self.dimensions[0]
|
||||||
firstCol = iC*self.dimensions[1]
|
first_col = iC*self.dimensions[1]
|
||||||
lastCol = (iC+1)*self.dimensions[1]
|
last_col = (iC+1)*self.dimensions[1]
|
||||||
self.vals[firstRow:lastRow, firstCol:lastCol] = currImg
|
self.vals[first_row:last_row, first_col:last_col] = cur_img
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self.vals = np.reshape(vals[0,dim*self.selectImage+np.array(range(dim))], self.dimensions, order='F')
|
self.vals = np.reshape(vals[0,dim*self.select_image+np.array(range(dim))], self.dimensions, order=self.order)
|
||||||
if self.transpose:
|
if self.transpose:
|
||||||
self.vals = self.vals.T
|
self.vals = self.vals.T
|
||||||
# if not self.scale:
|
# if not self.scale:
|
||||||
|
|
@ -296,8 +315,8 @@ class image_show(matplotlib_show):
|
||||||
self.vals = -self.vals
|
self.vals = -self.vals
|
||||||
|
|
||||||
# un-normalizing, for visualisation purposes:
|
# un-normalizing, for visualisation purposes:
|
||||||
if self.presetSTD >= 0: # The Mean is assumed to be in the range (0,255)
|
if self.preset_std >= 0: # The Mean is assumed to be in the range (0,255)
|
||||||
self.vals = self.vals*self.presetSTD + self.presetMean
|
self.vals = self.vals*self.preset_std + self.preset_mean
|
||||||
# Clipping the values:
|
# Clipping the values:
|
||||||
self.vals[self.vals < 0] = 0
|
self.vals[self.vals < 0] = 0
|
||||||
self.vals[self.vals > 255] = 255
|
self.vals[self.vals > 255] = 255
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,14 @@ GPy.examples.dimensionality_reduction module
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
GPy.examples.laplace_approximations module
|
||||||
|
------------------------------------------
|
||||||
|
|
||||||
|
.. automodule:: GPy.examples.laplace_approximations
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
GPy.examples.regression module
|
GPy.examples.regression module
|
||||||
------------------------------
|
------------------------------
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,14 @@ GPy.kern.parts.Matern52 module
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
GPy.kern.parts.ODE_1 module
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
.. automodule:: GPy.kern.parts.ODE_1
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
GPy.kern.parts.bias module
|
GPy.kern.parts.bias module
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
||||||
|
|
@ -44,6 +52,14 @@ GPy.kern.parts.coregionalize module
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
GPy.kern.parts.eq_ode1 module
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
.. automodule:: GPy.kern.parts.eq_ode1
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
GPy.kern.parts.exponential module
|
GPy.kern.parts.exponential module
|
||||||
---------------------------------
|
---------------------------------
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,10 +4,10 @@ GPy.likelihoods.noise_models package
|
||||||
Submodules
|
Submodules
|
||||||
----------
|
----------
|
||||||
|
|
||||||
GPy.likelihoods.noise_models.binomial_noise module
|
GPy.likelihoods.noise_models.bernoulli_noise module
|
||||||
--------------------------------------------------
|
---------------------------------------------------
|
||||||
|
|
||||||
.. automodule:: GPy.likelihoods.noise_models.binomial_noise
|
.. automodule:: GPy.likelihoods.noise_models.bernoulli_noise
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
@ -60,6 +60,14 @@ GPy.likelihoods.noise_models.poisson_noise module
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
GPy.likelihoods.noise_models.student_t_noise module
|
||||||
|
---------------------------------------------------
|
||||||
|
|
||||||
|
.. automodule:: GPy.likelihoods.noise_models.student_t_noise
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
Module contents
|
Module contents
|
||||||
---------------
|
---------------
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,14 @@ GPy.likelihoods.gaussian_mixed_noise module
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
GPy.likelihoods.laplace module
|
||||||
|
------------------------------
|
||||||
|
|
||||||
|
.. automodule:: GPy.likelihoods.laplace
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
GPy.likelihoods.likelihood module
|
GPy.likelihoods.likelihood module
|
||||||
---------------------------------
|
---------------------------------
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,14 @@ GPy.testing package
|
||||||
Submodules
|
Submodules
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
GPy.testing.bcgplvm_tests module
|
||||||
|
--------------------------------
|
||||||
|
|
||||||
|
.. automodule:: GPy.testing.bcgplvm_tests
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
GPy.testing.bgplvm_tests module
|
GPy.testing.bgplvm_tests module
|
||||||
-------------------------------
|
-------------------------------
|
||||||
|
|
||||||
|
|
@ -28,6 +36,14 @@ GPy.testing.examples_tests module
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
GPy.testing.gp_transformation_tests module
|
||||||
|
------------------------------------------
|
||||||
|
|
||||||
|
.. automodule:: GPy.testing.gp_transformation_tests
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
GPy.testing.gplvm_tests module
|
GPy.testing.gplvm_tests module
|
||||||
------------------------------
|
------------------------------
|
||||||
|
|
||||||
|
|
@ -44,6 +60,14 @@ GPy.testing.kernel_tests module
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
GPy.testing.likelihoods_tests module
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
.. automodule:: GPy.testing.likelihoods_tests
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
GPy.testing.mapping_tests module
|
GPy.testing.mapping_tests module
|
||||||
--------------------------------
|
--------------------------------
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,14 @@ GPy.util.classification module
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
GPy.util.config module
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
.. automodule:: GPy.util.config
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
GPy.util.datasets module
|
GPy.util.datasets module
|
||||||
------------------------
|
------------------------
|
||||||
|
|
||||||
|
|
@ -43,6 +51,14 @@ GPy.util.decorators module
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
GPy.util.erfcx module
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
.. automodule:: GPy.util.erfcx
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
GPy.util.linalg module
|
GPy.util.linalg module
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
@ -51,6 +67,14 @@ GPy.util.linalg module
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
GPy.util.ln_diff_erfs module
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
.. automodule:: GPy.util.ln_diff_erfs
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
GPy.util.misc module
|
GPy.util.misc module
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
|
|
@ -75,6 +99,14 @@ GPy.util.multioutput module
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
GPy.util.netpbmfile module
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
.. automodule:: GPy.util.netpbmfile
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
GPy.util.plot module
|
GPy.util.plot module
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
|
|
@ -99,6 +131,14 @@ GPy.util.squashers module
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
GPy.util.symbolic module
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
.. automodule:: GPy.util.symbolic
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
GPy.util.univariate_Gaussian module
|
GPy.util.univariate_Gaussian module
|
||||||
-----------------------------------
|
-----------------------------------
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue