Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
Ricardo 2013-11-07 16:32:01 +00:00
commit df43f59dbf
78 changed files with 4930 additions and 1652 deletions

2
.gitignore vendored
View file

@ -45,4 +45,4 @@ iterate.dat
# git merge files #
###################
*.orig
*.orig

View file

@ -7,7 +7,7 @@ virtualenv:
system_site_packages: true
# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
before_install:
before_install:
- sudo apt-get install -qq python-scipy python-pip
- sudo apt-get install -qq python-matplotlib
# Workaround for a permissions issue with Travis virtual machine images
@ -17,10 +17,10 @@ before_install:
- sudo ln -s /run/shm /dev/shm
install:
- pip install --upgrade numpy==1.7.1
- pip install sphinx
- pip install --upgrade numpy==1.7.1
- pip install sphinx
- pip install nose
- pip install . --use-mirrors
# command to run tests, e.g. python setup.py test
script:
script:
- nosetests GPy/testing

View file

@ -126,7 +126,7 @@ class FITC(SparseGP):
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:])
# the partial derivative vector for the likelihood
if self.likelihood.Nparams == 0:
if self.likelihood.num_params == 0:
# save computation here.
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:

View file

@ -6,7 +6,7 @@ import numpy as np
import pylab as pb
from .. import kern
from ..util.linalg import pdinv, mdot, tdot, dpotrs, dtrtrs
from ..likelihoods import EP
from ..likelihoods import EP, Laplace
from gp_base import GPBase
class GP(GPBase):
@ -25,20 +25,23 @@ class GP(GPBase):
"""
def __init__(self, X, likelihood, kernel, normalize_X=False):
GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self._set_params(self._get_params())
self.update_likelihood_approximation()
def getstate(self):
return GPBase.getstate(self)
def setstate(self, state):
GPBase.setstate(self, state)
self._set_params(self._get_params())
def _set_params(self, p):
self.kern._set_params_transformed(p[:self.kern.num_params_transformed()])
self.likelihood._set_params(p[self.kern.num_params_transformed():])
new_kern_params = p[:self.kern.num_params_transformed()]
new_likelihood_params = p[self.kern.num_params_transformed():]
old_likelihood_params = self.likelihood._get_params()
self.kern._set_params_transformed(new_kern_params)
self.likelihood._set_params_transformed(new_likelihood_params)
self.K = self.kern.K(self.X)
#Re fit likelihood approximation (if it is an approx), as parameters have changed
if isinstance(self.likelihood, Laplace):
self.likelihood.fit_full(self.K)
self.K += self.likelihood.covariance_matrix
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
@ -55,6 +58,10 @@ class GP(GPBase):
tmp, _ = dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
self.dL_dK = 0.5 * (tmp - self.output_dim * self.Ki)
#Adding dZ_dK (0 for a non-approximate likelihood, compensates for
#additional gradients of K when log-likelihood has non-zero Z term)
self.dL_dK += self.likelihood.dZ_dK
def _get_params(self):
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
@ -94,19 +101,13 @@ class GP(GPBase):
return (-0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) -
0.5 * self.output_dim * self.K_logdet + self._model_fit_term() + self.likelihood.Z)
def _log_likelihood_gradients(self):
"""
The gradient of all parameters.
Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta
"""
#return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
if not isinstance(self.likelihood,EP):
tmp = np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
else:
tmp = np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
return tmp
return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False):
"""
@ -193,3 +194,11 @@ class GP(GPBase):
"""
Xnew = self._add_output_index(Xnew, output)
return self.predict(Xnew, which_parts=which_parts, full_cov=full_cov, likelihood_args=likelihood_args)
def getstate(self):
return GPBase.getstate(self)
def setstate(self, state):
GPBase.setstate(self, state)
self._set_params(self._get_params())

View file

@ -9,7 +9,9 @@ from ..likelihoods import Gaussian, Gaussian_Mixed_Noise
class GPBase(Model):
"""
Gaussian process base model for holding shared behaviour between
sparse_GP and GP models.
sparse_GP and GP models, and potentially other models in the future.
Here we define some functions that are use
"""
def __init__(self, X, likelihood, kernel, normalize_X=False):
self.X = X
@ -34,29 +36,6 @@ class GPBase(Model):
# All leaf nodes should call self._set_params(self._get_params()) at
# the end
def getstate(self):
"""
Get the current state of the class, here we return everything that is needed to recompute the model.
"""
return Model.getstate(self) + [self.X,
self.num_data,
self.input_dim,
self.kern,
self.likelihood,
self.output_dim,
self._Xoffset,
self._Xscale]
def setstate(self, state):
self._Xscale = state.pop()
self._Xoffset = state.pop()
self.output_dim = state.pop()
self.likelihood = state.pop()
self.kern = state.pop()
self.input_dim = state.pop()
self.num_data = state.pop()
self.X = state.pop()
Model.setstate(self, state)
def posterior_samples_f(self,X,size=10,which_parts='all',full_cov=True):
"""
@ -110,90 +89,43 @@ class GPBase(Model):
return Ysim
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
def plot_f(self, *args, **kwargs):
"""
Plot the GP's view of the world, where the data is normalized and the
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- Not implemented in higher dimensions
Plot the GP's view of the world, where the data is normalized and before applying a likelihood.
:param samples: the number of a posteriori samples to plot
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param full_cov:
:type full_cov: bool
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
This is a convenience function: we simply call self.plot with the
argument use_raw_predict set True. All args and kwargs are passed on to
plot.
:param output: which output to plot (for multiple output models only)
:type output: integer (first output is 0)
see also: gp_base.plot
"""
if which_data == 'all':
which_data = slice(None)
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if self.X.shape[1] == 1:
resolution = resolution or 200
Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
m, v = self._raw_predict(Xnew, which_parts=which_parts)
if samples:
Ysim = self.posterior_samples_f(Xnew, samples, which_parts=which_parts, full_cov=True)
for yi in Ysim.T:
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
ax.set_xlim(xmin, xmax)
ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 2:
resolution = resolution or 50
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
m, v = self._raw_predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T
ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable
ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1])
if samples:
warnings.warn("Samples only implemented for 1 dimensional inputs.")
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
"""
Plot the GP with noise where the likelihood is Gaussian.
kwargs['plot_raw'] = True
self.plot(*args, **kwargs)
def plot(self, plot_limits=None, which_data_rows='all',
which_data_ycols='all', which_parts='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=False,
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
"""
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- Not implemented in higher dimensions
- In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed.
Can plot only part of the data and part of the posterior functions
using which_data and which_functions
using which_data_rowsm which_data_ycols and which_parts
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param which_data_rows: which of the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice self.X, self.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_rows: 'all' or a list of integers
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param levels: number of levels to plot in a contour plot.
@ -205,216 +137,138 @@ class GPBase(Model):
:param ax: axes to plot on.
:type ax: axes handle
:type output: integer (first output is 0)
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param linecol: color of line to plot.
:type linecol:
:param fillcol: color of fill
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
"""
if which_data == 'all':
which_data = slice(None)
#deal with optional arguments
if which_data_rows == 'all':
which_data_rows = slice(None)
if which_data_ycols == 'all':
which_data_ycols = np.arange(self.output_dim)
if len(which_data_ycols)==0:
raise ValueError('No data selected for plotting')
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
plotdims = self.input_dim - len(fixed_inputs)
if plotdims == 1:
#work out what the inputs are for plotting (1D or 2D)
fixed_dims = np.array([i for i,v in fixed_inputs])
free_dims = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
#one dimensional plotting
if len(free_dims) == 1:
#define the frame on which to plot
resolution = resolution or 200
Xu = self.X * self._Xscale + self._Xoffset #NOTE self.X are the normalized values now
fixed_dims = np.array([i for i,v in fixed_inputs])
freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
Xnew, xmin, xmax = x_frame1D(Xu[:,freedim], plot_limits=plot_limits)
Xnew, xmin, xmax = x_frame1D(Xu[:,free_dims], plot_limits=plot_limits)
Xgrid = np.empty((Xnew.shape[0],self.input_dim))
Xgrid[:,freedim] = Xnew
Xgrid[:,free_dims] = Xnew
for i,v in fixed_inputs:
Xgrid[:,i] = v
m, v, lower, upper = self.predict(Xgrid, which_parts=which_parts)
#make a prediction on the frame and plot it
if plot_raw:
m, v = self._raw_predict(Xgrid, which_parts=which_parts)
lower = m - 2*np.sqrt(v)
upper = m + 2*np.sqrt(v)
Y = self.likelihood.Y
else:
m, v, lower, upper = self.predict(Xgrid, which_parts=which_parts)
Y = self.likelihood.data
for d in which_data_ycols:
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
ax.plot(Xu[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5)
#optionally plot some samples
if samples: #NOTE not tested with fixed_inputs
Ysim = self.posterior_samples(Xgrid, samples, which_parts=which_parts, full_cov=True)
for yi in Ysim.T:
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
for d in range(m.shape[1]):
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
ax.plot(Xu[which_data,freedim], self.likelihood.data[which_data, d], 'kx', mew=1.5)
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
#set the limits of the plot to some sensible values
ymin, ymax = min(np.append(Y[which_data_rows, which_data_ycols].flatten(), lower)), max(np.append(Y[which_data_rows, which_data_ycols].flatten(), upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 2:
#2D plotting
elif len(free_dims) == 2:
#define the frame for plotting on
resolution = resolution or 50
Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
Xu = self.X * self._Xscale + self._Xoffset #NOTE self.X are the normalized values now
Xnew, _, _, xmin, xmax = x_frame2D(Xu[:,free_dims], plot_limits, resolution)
Xgrid = np.empty((Xnew.shape[0],self.input_dim))
Xgrid[:,free_dims] = Xnew
for i,v in fixed_inputs:
Xgrid[:,i] = v
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T
ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
Yf = self.likelihood.Y.flatten()
ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable
#predict on the frame and plot
if use_raw_predict:
m, _ = self._raw_predict(Xgrid, which_parts=which_parts)
Y = self.likelihood.Y
else:
m, _, _, _ = self.predict(Xgrid, which_parts=which_parts)
Y = self.likelihood.data
for d in which_data_ycols:
m_d = m[:,d].reshape(resolution, resolution).T
ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
ax.scatter(self.X[which_data_rows, free_dims[0]], self.X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
#set the limits of the plot to some sensible values
ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1])
if samples:
warnings.warn("Samples only implemented for 1 dimensional inputs.")
warnings.warn("Samples are rather difficult to plot for 2D inputs...")
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def plot_single_output_f(self, output=None, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
def getstate(self):
"""
For a specific output, in a multioutput model, this function works just as plot_f on single output models.
:param output: which output to plot (for multiple output models only)
:type output: integer (first output is 0)
:param samples: the number of a posteriori samples to plot
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param full_cov:
:type full_cov: bool
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
Get the curent state of the class. This is only used to efficiently
pickle the model. See also self.setstate
"""
assert output is not None, "An output must be specified."
assert len(self.likelihood.noise_model_list) > output, "The model has only %s outputs." %(self.output_dim + 1)
return Model.getstate(self) + [self.X,
self.num_data,
self.input_dim,
self.kern,
self.likelihood,
self.output_dim,
self._Xoffset,
self._Xscale]
if which_data == 'all':
which_data = slice(None)
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if self.X.shape[1] == 2:
Xu = self.X[self.X[:,-1]==output ,0:1]
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
Xnew_indexed = self._add_output_index(Xnew,output)
m, v = self._raw_predict(Xnew_indexed, which_parts=which_parts)
if samples:
Ysim = self.posterior_samples_f(Xnew_indexed, samples, which_parts=which_parts, full_cov=True)
for yi in Ysim.T:
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5)
ax.set_xlim(xmin, xmax)
ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 3:
raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet"
#if samples:
# warnings.warn("Samples only implemented for 1 dimensional inputs.")
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def plot_single_output(self, output=None, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
def setstate(self, state):
"""
For a specific output, in a multioutput model, this function works just as plot_f on single output models.
:param output: which output to plot (for multiple output models only)
:type output: integer (first output is 0)
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param levels: number of levels to plot in a contour plot.
:type levels: int
:param samples: the number of a posteriori samples to plot
:type samples: int
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:type output: integer (first output is 0)
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param linecol: color of line to plot.
:type linecol:
:param fillcol: color of fill
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
Set the state of the model. Used for efficient pickling
"""
assert output is not None, "An output must be specified."
assert len(self.likelihood.noise_model_list) > output, "The model has only %s outputs." %(self.output_dim + 1)
if which_data == 'all':
which_data = slice(None)
self._Xscale = state.pop()
self._Xoffset = state.pop()
self.output_dim = state.pop()
self.likelihood = state.pop()
self.kern = state.pop()
self.input_dim = state.pop()
self.num_data = state.pop()
self.X = state.pop()
Model.setstate(self, state)
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if self.X.shape[1] == 2:
resolution = resolution or 200
Xu = self.X[self.X[:,-1]==output,:] #keep the output of interest
Xu = self.X * self._Xscale + self._Xoffset
Xu = self.X[self.X[:,-1]==output ,0:1] #get rid of the index column
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
Xnew_indexed = self._add_output_index(Xnew,output)
m, v, lower, upper = self.predict(Xnew_indexed, which_parts=which_parts,noise_model=output)
if samples: #NOTE not tested with fixed_inputs
Ysim = self.posterior_samples(Xnew_indexed, samples, which_parts=which_parts, full_cov=True,noise_model=output)
for yi in Ysim.T:
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
for d in range(m.shape[1]):
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
ax.plot(Xu[which_data], self.likelihood.noise_model_list[output].data, 'kx', mew=1.5)
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 3:
raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet"
#if samples:
# warnings.warn("Samples only implemented for 1 dimensional inputs.")
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def _add_output_index(self,X,output):
def log_predictive_density(self, x_test, y_test):
"""
In a multioutput model, appends an index column to X to specify the output it is related to.
Calculation of the log predictive density
:param X: Input data
:type X: np.ndarray, N x self.input_dim
:param output: output X is related to
:type output: integer in {0,..., output_dim-1}
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
.. Note:: For multiple non-independent outputs models only.
:param x_test: test observations (x_{*})
:type x_test: (Nx1) array
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
"""
assert hasattr(self,'multioutput'), 'This function is for multiple output models only.'
index = np.ones((X.shape[0],1))*output
return np.hstack((X,index))
mu_star, var_star = self._raw_predict(x_test)
return self.likelihood.log_predictive_density(y_test, mu_star, var_star)

View file

@ -251,7 +251,7 @@ class Model(Parameterized):
self._set_params_transformed(initial_parameters)
def ensure_default_constraints(self):
"""
"""
Ensure that any variables which should clearly be positive
have been constrained somehow. The method performs a regular
expression search on parameter names looking for the terms
@ -259,7 +259,7 @@ class Model(Parameterized):
these terms are present in the name the parameter is
constrained positive.
"""
positive_strings = ['variance', 'lengthscale', 'precision', 'kappa']
positive_strings = ['variance', 'lengthscale', 'precision', 'decay', 'kappa']
# param_names = self._get_param_names()
currently_constrained = self.all_constrained_indices()
to_make_positive = []
@ -274,7 +274,7 @@ class Model(Parameterized):
"""
The objective function passed to the optimizer. It combines
the likelihood and the priors.
Failures are handled robustly. The algorithm will try several times to
return the objective, and will raise the original exception if it
the objective cannot be computed.
@ -462,7 +462,7 @@ class Model(Parameterized):
numerical_gradient = (f1 - f2) / (2 * dx)
global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient==0, 1e-32, gradient)))
return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() < tolerance)
else:
# check the gradient of each parameter individually, and do some pretty printing
@ -547,9 +547,9 @@ class Model(Parameterized):
:param stop_crit: convergence criterion
:type stop_crit: float
.. Note: kwargs are passed to update_likelihood and optimize functions.
.. Note: kwargs are passed to update_likelihood and optimize functions.
"""
assert isinstance(self.likelihood, likelihoods.EP) or isinstance(self.likelihood, likelihoods.EP_Mixed_Noise), "pseudo_EM is only available for EP likelihoods"
assert isinstance(self.likelihood, (likelihoods.EP, likelihoods.EP_Mixed_Noise, likelihoods.Laplace)), "pseudo_EM is only available for approximate likelihoods"
ll_change = stop_crit + 1.
iteration = 0
last_ll = -np.inf

View file

@ -52,23 +52,6 @@ class SparseGP(GPBase):
self._const_jitter = None
def getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return GPBase.getstate(self) + [self.Z,
self.num_inducing,
self.has_uncertain_inputs,
self.X_variance]
def setstate(self, state):
self.X_variance = state.pop()
self.has_uncertain_inputs = state.pop()
self.num_inducing = state.pop()
self.Z = state.pop()
GPBase.setstate(self, state)
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z)
@ -87,7 +70,6 @@ class SparseGP(GPBase):
# factor Kmm
self._Lm = jitchol(self.Kmm + self._const_jitter)
# TODO: no white kernel needed anymore, all noise in likelihood --------
# The rather complex computations of self._A
if self.has_uncertain_inputs:
@ -156,7 +138,7 @@ class SparseGP(GPBase):
# the partial derivative vector for the likelihood
if self.likelihood.Nparams == 0:
if self.likelihood.num_params == 0:
# save computation here.
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
@ -341,7 +323,10 @@ class SparseGP(GPBase):
return mean, var, _025pm, _975pm
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
def plot_f(self, samples=0, plot_limits=None, which_data_rows='all',
which_data_ycols='all', which_parts='all', resolution=None,
full_cov=False, fignum=None, ax=None):
"""
Plot the GP's view of the world, where the data is normalized and the
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
@ -350,8 +335,8 @@ class SparseGP(GPBase):
:param samples: the number of a posteriori samples to plot
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param which_data_rows: which if the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice self.X, self.Y
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
@ -371,10 +356,10 @@ class SparseGP(GPBase):
ax = fig.add_subplot(111)
if fignum is None and ax is None:
fignum = fig.num
if which_data is 'all':
which_data = slice(None)
if which_data_rows is 'all':
which_data_rows = slice(None)
GPBase.plot_f(self, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, full_cov=full_cov, fignum=fignum, ax=ax)
GPBase.plot_f(self, samples=samples, plot_limits=plot_limits, which_data_rows=which_data_rows, which_data_ycols=which_data_ycols, which_parts=which_parts, resolution=resolution, fignum=fignum, ax=ax)
if self.X.shape[1] == 1:
if self.has_uncertain_inputs:
@ -389,177 +374,98 @@ class SparseGP(GPBase):
Zu = self.Z * self._Xscale + self._Xoffset
ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None):
def plot(self, plot_limits=None, which_data_rows='all',
which_data_ycols='all', which_parts='all', fixed_inputs=[],
plot_raw=False,
levels=20, samples=0, fignum=None, ax=None, resolution=None):
"""
Plot the posterior of the sparse GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed.
Can plot only part of the data and part of the posterior functions
using which_data_rowsm which_data_ycols and which_parts
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array
:param which_data_rows: which of the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice self.X, self.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_rows: 'all' or a list of integers
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param levels: number of levels to plot in a contour plot.
:type levels: int
:param samples: the number of a posteriori samples to plot
:type samples: int
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:type output: integer (first output is 0)
:param linecol: color of line to plot.
:type linecol:
:param fillcol: color of fill
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
"""
#deal work out which ax to plot on
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if fignum is None and ax is None:
fignum = fig.num
if which_data is 'all':
which_data = slice(None)
GPBase.plot(self, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, levels=20, fignum=fignum, ax=ax)
#work out what the inputs are for plotting (1D or 2D)
fixed_dims = np.array([i for i,v in fixed_inputs])
free_dims = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
if self.X.shape[1] == 1:
#call the base plotting
GPBase.plot(self, samples=samples, plot_limits=plot_limits,
which_data_rows=which_data_rows,
which_data_ycols=which_data_ycols, fixed_inputs=fixed_inputs,
which_parts=which_parts, resolution=resolution, levels=20,
fignum=fignum, ax=ax)
if len(free_dims) == 1:
#plot errorbars for the uncertain inputs
if self.has_uncertain_inputs:
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
ax.errorbar(Xu[which_data_rows, 0], self.likelihood.data[which_data_rows, 0],
xerr=2 * np.sqrt(self.X_variance[which_data_rows, 0]),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
#plot the inducing inputs
Zu = self.Z * self._Xscale + self._Xoffset
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
elif self.X.shape[1] == 2:
elif len(free_dims) == 2:
Zu = self.Z * self._Xscale + self._Xoffset
ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False):
def getstate(self):
"""
For a specific output, predict the function at the new point(s) Xnew.
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param output: output to predict
:type output: integer in {0,..., num_outputs-1}
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
.. Note:: For multiple output models only
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return GPBase.getstate(self) + [self.Z,
self.num_inducing,
self.has_uncertain_inputs,
self.X_variance]
assert hasattr(self,'multioutput')
index = np.ones_like(Xnew)*output
Xnew = np.hstack((Xnew,index))
# normalize X values
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output)
return mean, var, _025pm, _975pm
def _raw_predict_single_output(self, _Xnew, output=0, X_variance_new=None, which_parts='all', full_cov=False,stop=False):
"""
Internal helper function for making predictions for a specific output,
does not account for normalization or likelihood
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param output: output to predict
:type output: integer in {0,..., num_outputs-1}
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just the diagonal
.. Note:: For multiple output models only
"""
Bi, _ = dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work!
symmetrify(Bi)
Kmmi_LmiBLmi = backsub_both_sides(self._Lm, np.eye(self.num_inducing) - Bi)
if self.Cpsi1V is None:
psi1V = np.dot(self.psi1.T,self.likelihood.V)
tmp, _ = dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0)
tmp, _ = dpotrs(self.LB, tmp, lower=1)
self.Cpsi1V, _ = dtrtrs(self._Lm, tmp, lower=1, trans=1)
assert hasattr(self,'multioutput')
index = np.ones_like(_Xnew)*output
_Xnew = np.hstack((_Xnew,index))
if X_variance_new is None:
Kx = self.kern.K(self.Z, _Xnew, which_parts=which_parts)
mu = np.dot(Kx.T, self.Cpsi1V)
if full_cov:
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
else:
Kx = self.kern.psi1(self.Z, _Xnew, X_variance_new)
mu = np.dot(Kx, self.Cpsi1V)
if full_cov:
raise NotImplementedError, "TODO"
else:
Kxx = self.kern.psi0(self.Z, _Xnew, X_variance_new)
psi2 = self.kern.psi2(self.Z, _Xnew, X_variance_new)
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
return mu, var[:, None]
def setstate(self, state):
self.X_variance = state.pop()
self.has_uncertain_inputs = state.pop()
self.num_inducing = state.pop()
self.Z = state.pop()
GPBase.setstate(self, state)
def plot_single_output_f(self, output=None, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if fignum is None and ax is None:
fignum = fig.num
if which_data is 'all':
which_data = slice(None)
GPBase.plot_single_output_f(self, output=output, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, full_cov=full_cov, fignum=fignum, ax=ax)
if self.X.shape[1] == 2:
if self.has_uncertain_inputs:
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
Zu = self.Z * self._Xscale + self._Xoffset
Zu = Zu[Zu[:,1]==output,0:1]
ax.plot(Zu[:,0], np.zeros_like(Zu[:,0]) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
elif self.X.shape[1] == 2:
Zu = self.Z * self._Xscale + self._Xoffset
Zu = Zu[Zu[:,1]==output,0:2]
ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def plot_single_output(self, output=None, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None):
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if fignum is None and ax is None:
fignum = fig.num
if which_data is 'all':
which_data = slice(None)
GPBase.plot_single_output(self, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, levels=20, fignum=fignum, ax=ax, output=output)
if self.X.shape[1] == 2:
if self.has_uncertain_inputs:
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
Zu = self.Z * self._Xscale + self._Xoffset
Zu = Zu[Zu[:,1]==output,0:1]
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
elif self.X.shape[1] == 3:
Zu = self.Z * self._Xscale + self._Xoffset
Zu = Zu[Zu[:,1]==output,0:1]
ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"

View file

@ -18,30 +18,16 @@ class SVIGP(GPBase):
Stochastic Variational inference in a Gaussian Process
:param X: inputs
:type X: np.ndarray (N x Q)
:type X: np.ndarray (num_data x num_inputs)
:param Y: observed data
:type Y: np.ndarray of observations (N x D)
:param batchsize: the size of a h
Additional kwargs are used as for a sparse GP. They include:
:type Y: np.ndarray of observations (num_data x output_dim)
:param batchsize: the size of a minibatch
:param q_u: canonical parameters of the distribution squasehd into a 1D array
:type q_u: np.ndarray
:param M: Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param kernel: the kernel/covariance function. See link kernels
:type kernel: a GPy kernel
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
:type X_uncertainty: np.ndarray (N x Q) | None
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
:param M: Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param beta: noise precision. TODO: ignore beta if doing EP
:type beta: float
:param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
:param Z: inducing inputs
:type Z: np.ndarray (num_inducing x num_inputs)
"""
@ -350,8 +336,8 @@ class SVIGP(GPBase):
#callback
if i and not i%callback_interval:
callback()
time.sleep(0.1)
callback(self) # Change this to callback()
time.sleep(0.01)
if self.epochs > 10:
self._adapt_steplength()
@ -367,13 +353,13 @@ class SVIGP(GPBase):
assert self.vb_steplength > 0
if self.adapt_param_steplength:
# self._adaptive_param_steplength()
self._adaptive_param_steplength()
# self._adaptive_param_steplength_log()
self._adaptive_param_steplength_from_vb()
# self._adaptive_param_steplength_from_vb()
self._param_steplength_trace.append(self.param_steplength)
def _adaptive_param_steplength(self):
decr_factor = 0.1
decr_factor = 0.02
g_tp = self._transform_gradients(self._log_likelihood_gradients())
self.gbar_tp = (1-1/self.tau_tp)*self.gbar_tp + 1/self.tau_tp * g_tp
self.hbar_tp = (1-1/self.tau_tp)*self.hbar_tp + 1/self.tau_tp * np.dot(g_tp.T, g_tp)
@ -407,7 +393,7 @@ class SVIGP(GPBase):
self.tau_t = self.tau_t*(1-self.vb_steplength) + 1
def _adaptive_vb_steplength_KL(self):
decr_factor = 1 #0.1
decr_factor = 0.1
natgrad = self.vb_grad_natgrad()
g_t1 = natgrad[0]
g_t2 = natgrad[1]

19
GPy/core/variational.py Normal file
View file

@ -0,0 +1,19 @@
'''
Created on 6 Nov 2013
@author: maxz
'''
from parameterized import Parameterized
from parameter import Param
class Normal(Parameterized):
'''
Normal distribution for variational approximations.
holds the means and variances for a factorizing multivariate normal distribution
'''
def __init__(self, name, means, variances):
Parameterized.__init__(self, name=name)
self.means = Param("mean", means)
self.variances = Param('variance', variances)
self.add_parameters(self.means, self.variances)

View file

@ -43,7 +43,7 @@ def oil(num_inducing=50, max_iters=100, kernel=None):
def toy_linear_1d_classification(seed=default_seed):
"""
Simple 1D classification example
Simple 1D classification example using EP approximation
:param seed: seed value for data generation (default is 4).
:type seed: int
@ -61,6 +61,7 @@ def toy_linear_1d_classification(seed=default_seed):
#m.update_likelihood_approximation()
# Parameters optimization:
#m.optimize()
#m.update_likelihood_approximation()
m.pseudo_EM()
# Plot
@ -71,6 +72,41 @@ def toy_linear_1d_classification(seed=default_seed):
return m
def toy_linear_1d_classification_laplace(seed=default_seed):
"""
Simple 1D classification example using Laplace approximation
:param seed: seed value for data generation (default is 4).
:type seed: int
"""
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0
bern_noise_model = GPy.likelihoods.bernoulli()
laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), bern_noise_model)
# Model definition
m = GPy.models.GPClassification(data['X'], Y, likelihood=laplace_likelihood)
print m
# Optimize
#m.update_likelihood_approximation()
# Parameters optimization:
m.optimize('bfgs', messages=1)
#m.pseudo_EM()
# Plot
fig, axes = pb.subplots(2,1)
m.plot_f(ax=axes[0])
m.plot(ax=axes[1])
print(m)
return m
def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed):
"""
Sparse 1D classification example
@ -116,7 +152,7 @@ def toy_heaviside(seed=default_seed):
Y[Y.flatten() == -1] = 0
# Model definition
noise_model = GPy.likelihoods.binomial(GPy.likelihoods.noise_models.gp_transformations.Heaviside())
noise_model = GPy.likelihoods.bernoulli(GPy.likelihoods.noise_models.gp_transformations.Heaviside())
likelihood = GPy.likelihoods.EP(Y,noise_model)
m = GPy.models.GPClassification(data['X'], likelihood=likelihood)

View file

@ -12,10 +12,10 @@ from GPy.likelihoods.gaussian import Gaussian
default_seed = np.random.seed(123344)
def BGPLVM(seed=default_seed):
N = 5
num_inducing = 4
Q = 3
D = 2
N = 13
num_inducing = 5
Q = 6
D = 25
# generate GPLVM-like data
X = np.random.rand(N, Q)
lengthscales = np.random.rand(Q)
@ -25,9 +25,12 @@ def BGPLVM(seed=default_seed):
Y = np.random.multivariate_normal(np.zeros(N), K, D).T
lik = Gaussian(Y, normalize=True)
k = GPy.kern.rbf_inv(Q, .5, np.ones(Q) * 2., ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
# k = GPy.kern.rbf_inv(Q, .5, np.ones(Q) * 2., ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
# k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
# k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001)
# k = GPy.kern.rbf(Q, .5, np.ones(Q) * 2., ARD=True) + GPy.kern.rbf(Q, .3, np.ones(Q) * .2, ARD=True)
k = GPy.kern.rbf(Q, .5, np.ones(Q) * 2., ARD=True) + GPy.kern.linear(Q, np.ones(Q) * .2, ARD=True)
# k = GPy.kern.rbf(Q, .5, 2., ARD=0) + GPy.kern.rbf(Q, .3, .2, ARD=0)
m = GPy.models.BayesianGPLVM(lik, Q, kernel=k, num_inducing=num_inducing)
m.lengthscales = lengthscales
@ -331,27 +334,46 @@ def brendan_faces():
from GPy import kern
data = GPy.util.datasets.brendan_faces()
Q = 2
Y = data['Y'][0:-1:10, :]
# Y = data['Y']
Y = data['Y']
Yn = Y - Y.mean()
Yn /= Yn.std()
m = GPy.models.GPLVM(Yn, Q)
# m = GPy.models.BayesianGPLVM(Yn, Q, num_inducing=100)
# optimize
m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped())
m.optimize('scg', messages=1, max_f_eval=10000)
m.optimize('scg', messages=1, max_iters=1000)
ax = m.plot_latent(which_indices=(0, 1))
y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False)
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish')
return m
def olivetti_faces():
from GPy import kern
data = GPy.util.datasets.olivetti_faces()
Q = 2
Y = data['Y']
Yn = Y - Y.mean()
Yn /= Yn.std()
m = GPy.models.GPLVM(Yn, Q)
m.optimize('scg', messages=1, max_iters=1000)
ax = m.plot_latent(which_indices=(0, 1))
y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False)
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish')
return m
def stick_play(range=None, frame_rate=15):
data = GPy.util.datasets.osu_run1()
# optimize
if range == None:

View file

@ -0,0 +1,296 @@
import GPy
import numpy as np
import matplotlib.pyplot as plt
from GPy.util import datasets
np.random.seed(1)
def student_t_approx():
"""
Example of regressing with a student t likelihood
"""
real_std = 0.1
#Start a function, any function
X = np.linspace(0.0, np.pi*2, 100)[:, None]
Y = np.sin(X) + np.random.randn(*X.shape)*real_std
Yc = Y.copy()
X_full = np.linspace(0.0, np.pi*2, 500)[:, None]
Y_full = np.sin(X_full)
Y = Y/Y.max()
#Slightly noisy data
Yc[75:80] += 1
#Very noisy data
#Yc[10] += 100
#Yc[25] += 10
#Yc[23] += 10
#Yc[26] += 1000
#Yc[24] += 10
#Yc = Yc/Yc.max()
#Add student t random noise to datapoints
deg_free = 5
print "Real noise: ", real_std
initial_var_guess = 0.5
#t_rv = t(deg_free, loc=0, scale=real_var)
#noise = t_rvrvs(size=Y.shape)
#Y += noise
plt.figure(1)
plt.suptitle('Gaussian likelihood')
# Kernel object
kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
kernel2 = kernel1.copy()
kernel3 = kernel1.copy()
kernel4 = kernel1.copy()
kernel5 = kernel1.copy()
kernel6 = kernel1.copy()
print "Clean Gaussian"
#A GP should completely break down due to the points as they get a lot of weight
# create simple GP model
m = GPy.models.GPRegression(X, Y, kernel=kernel1)
# optimize
m.ensure_default_constraints()
m.constrain_fixed('white', 1e-4)
m.randomize()
m.optimize()
# plot
ax = plt.subplot(211)
m.plot(ax=ax)
plt.plot(X_full, Y_full)
plt.ylim(-1.5, 1.5)
plt.title('Gaussian clean')
print m
#Corrupt
print "Corrupt Gaussian"
m = GPy.models.GPRegression(X, Yc, kernel=kernel2)
m.ensure_default_constraints()
m.constrain_fixed('white', 1e-4)
m.randomize()
m.optimize()
ax = plt.subplot(212)
m.plot(ax=ax)
plt.plot(X_full, Y_full)
plt.ylim(-1.5, 1.5)
plt.title('Gaussian corrupt')
print m
plt.figure(2)
plt.suptitle('Student-t likelihood')
edited_real_sd = initial_var_guess
print "Clean student t, rasm"
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution)
m = GPy.models.GPRegression(X, Y.copy(), kernel6, likelihood=stu_t_likelihood)
m.ensure_default_constraints()
m.constrain_positive('t_noise')
m.constrain_fixed('white', 1e-4)
m.randomize()
#m.update_likelihood_approximation()
m.optimize()
print(m)
ax = plt.subplot(211)
m.plot(ax=ax)
plt.plot(X_full, Y_full)
plt.ylim(-1.5, 1.5)
plt.title('Student-t rasm clean')
print "Corrupt student t, rasm"
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution)
m = GPy.models.GPRegression(X, Yc.copy(), kernel4, likelihood=corrupt_stu_t_likelihood)
m.ensure_default_constraints()
m.constrain_positive('t_noise')
m.constrain_fixed('white', 1e-4)
m.randomize()
for a in range(1):
m.randomize()
m_start = m.copy()
print m
m.optimize('scg', messages=1)
print(m)
ax = plt.subplot(212)
m.plot(ax=ax)
plt.plot(X_full, Y_full)
plt.ylim(-1.5, 1.5)
plt.title('Student-t rasm corrupt')
return m
def boston_example():
import sklearn
from sklearn.cross_validation import KFold
optimizer='bfgs'
messages=0
data = datasets.boston_housing()
degrees_freedoms = [3, 5, 8, 10]
X = data['X'].copy()
Y = data['Y'].copy()
X = X-X.mean(axis=0)
X = X/X.std(axis=0)
Y = Y-Y.mean()
Y = Y/Y.std()
num_folds = 10
kf = KFold(len(Y), n_folds=num_folds, indices=True)
num_models = len(degrees_freedoms) + 3 #3 for baseline, gaussian, gaussian laplace approx
score_folds = np.zeros((num_models, num_folds))
pred_density = score_folds.copy()
def rmse(Y, Ystar):
return np.sqrt(np.mean((Y-Ystar)**2))
for n, (train, test) in enumerate(kf):
X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
print "Fold {}".format(n)
noise = 1e-1 #np.exp(-2)
rbf_len = 0.5
data_axis_plot = 4
plot = False
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
kernelgp = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
#Baseline
score_folds[0, n] = rmse(Y_test, np.mean(Y_train))
#Gaussian GP
print "Gauss GP"
mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy())
mgp.ensure_default_constraints()
mgp.constrain_fixed('white', 1e-5)
mgp['rbf_len'] = rbf_len
mgp['noise'] = noise
print mgp
mgp.optimize(optimizer=optimizer, messages=messages)
Y_test_pred = mgp.predict(X_test)
score_folds[1, n] = rmse(Y_test, Y_test_pred[0])
pred_density[1, n] = np.mean(mgp.log_predictive_density(X_test, Y_test))
print mgp
print pred_density
if plot:
plt.figure()
plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0])
plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
plt.title('GP gauss')
print "Gaussian Laplace GP"
N, D = Y_train.shape
g_distribution = GPy.likelihoods.noise_model_constructors.gaussian(variance=noise, N=N, D=D)
g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution)
mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=g_likelihood)
mg.ensure_default_constraints()
mg.constrain_positive('noise_variance')
mg.constrain_fixed('white', 1e-5)
mg['rbf_len'] = rbf_len
mg['noise'] = noise
print mg
try:
mg.optimize(optimizer=optimizer, messages=messages)
except Exception:
print "Blew up"
Y_test_pred = mg.predict(X_test)
score_folds[2, n] = rmse(Y_test, Y_test_pred[0])
pred_density[2, n] = np.mean(mg.log_predictive_density(X_test, Y_test))
print pred_density
print mg
if plot:
plt.figure()
plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0])
plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
plt.title('Lap gauss')
for stu_num, df in enumerate(degrees_freedoms):
#Student T
print "Student-T GP {}df".format(df)
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=df, sigma2=noise)
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
mstu_t.ensure_default_constraints()
mstu_t.constrain_fixed('white', 1e-5)
mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
mstu_t['rbf_len'] = rbf_len
mstu_t['t_noise'] = noise
print mstu_t
try:
mstu_t.optimize(optimizer=optimizer, messages=messages)
except Exception:
print "Blew up"
Y_test_pred = mstu_t.predict(X_test)
score_folds[3+stu_num, n] = rmse(Y_test, Y_test_pred[0])
pred_density[3+stu_num, n] = np.mean(mstu_t.log_predictive_density(X_test, Y_test))
print pred_density
print mstu_t
if plot:
plt.figure()
plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0])
plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
plt.title('Stu t {}df'.format(df))
print "Average scores: {}".format(np.mean(score_folds, 1))
print "Average pred density: {}".format(np.mean(pred_density, 1))
#Plotting
stu_t_legends = ['Student T, df={}'.format(df) for df in degrees_freedoms]
legends = ['Baseline', 'Gaussian', 'Laplace Approx Gaussian'] + stu_t_legends
#Plot boxplots for RMSE density
fig = plt.figure()
ax=fig.add_subplot(111)
plt.title('RMSE')
bp = ax.boxplot(score_folds.T, notch=0, sym='+', vert=1, whis=1.5)
plt.setp(bp['boxes'], color='black')
plt.setp(bp['whiskers'], color='black')
plt.setp(bp['fliers'], color='red', marker='+')
xtickNames = plt.setp(ax, xticklabels=legends)
plt.setp(xtickNames, rotation=45, fontsize=8)
ax.set_ylabel('RMSE')
ax.set_xlabel('Distribution')
#Make grid and put it below boxes
ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
alpha=0.5)
ax.set_axisbelow(True)
#Plot boxplots for predictive density
fig = plt.figure()
ax=fig.add_subplot(111)
plt.title('Predictive density')
bp = ax.boxplot(pred_density[1:,:].T, notch=0, sym='+', vert=1, whis=1.5)
plt.setp(bp['boxes'], color='black')
plt.setp(bp['whiskers'], color='black')
plt.setp(bp['fliers'], color='red', marker='+')
xtickNames = plt.setp(ax, xticklabels=legends[1:])
plt.setp(xtickNames, rotation=45, fontsize=8)
ax.set_ylabel('Mean Log probability P(Y*|Y)')
ax.set_xlabel('Distribution')
#Make grid and put it below boxes
ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
alpha=0.5)
ax.set_axisbelow(True)
return mstu_t
def precipitation_example():
import sklearn
from sklearn.cross_validation import KFold
data = datasets.boston_housing()
X = data['X'].copy()
Y = data['Y'].copy()
X = X-X.mean(axis=0)
X = X/X.std(axis=0)
Y = Y-Y.mean()
Y = Y/Y.std()
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
num_folds = 10
kf = KFold(len(Y), n_folds=num_folds, indices=True)
score_folds = np.zeros((4, num_folds))
def rmse(Y, Ystar):
return np.sqrt(np.mean((Y-Ystar)**2))
#for train, test in kf:
for n, (train, test) in enumerate(kf):
X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
print "Fold {}".format(n)

View file

@ -57,8 +57,8 @@ def coregionalization_toy(max_iters=100):
m.optimize(max_iters=max_iters)
fig, axes = pb.subplots(2,1)
m.plot_single_output(output=0,ax=axes[0])
m.plot_single_output(output=1,ax=axes[1])
m.plot(fixed_inputs=[(1,0)],ax=axes[0])
m.plot(fixed_inputs=[(1,1)],ax=axes[1])
axes[0].set_title('Output 0')
axes[1].set_title('Output 1')
return m
@ -270,6 +270,50 @@ def toy_rbf_1d_50(max_iters=100):
print(m)
return m
def toy_poisson_rbf_1d(optimizer='bfgs', max_nb_eval_optim=100):
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
x_len = 400
X = np.linspace(0, 10, x_len)[:, None]
f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X))
Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None]
noise_model = GPy.likelihoods.poisson()
likelihood = GPy.likelihoods.EP(Y,noise_model)
# create simple GP Model
m = GPy.models.GPRegression(X, Y, likelihood=likelihood)
# optimize
m.optimize(optimizer, max_f_eval=max_nb_eval_optim)
# plot
m.plot()
print(m)
return m
def toy_poisson_rbf_1d_laplace(optimizer='bfgs', max_nb_eval_optim=100):
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
x_len = 30
X = np.linspace(0, 10, x_len)[:, None]
f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X))
Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None]
noise_model = GPy.likelihoods.poisson()
likelihood = GPy.likelihoods.Laplace(Y,noise_model)
# create simple GP Model
m = GPy.models.GPRegression(X, Y, likelihood=likelihood)
# optimize
m.optimize(optimizer, max_f_eval=max_nb_eval_optim)
# plot
m.plot()
# plot the real underlying rate function
pb.plot(X, np.exp(f_true), '--k', linewidth=2)
print(m)
return m
def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4):
# Create an artificial dataset where the values in the targets (Y)
# only depend in dimensions 1 and 3 of the inputs (X). Run ARD to

7
GPy/gpy_config.cfg Normal file
View file

@ -0,0 +1,7 @@
# This is the configuration file for GPy
[parallel]
# Enable openmp support. This speeds up some computations, depending on the number
# of cores available. Setting up a compiler with openmp support can be difficult on
# some platforms, hence this option.
openmp=False

View file

@ -62,7 +62,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient.
if any(np.isnan(gradnew)):
raise UnexpectedInfOrNan
raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value"
current_grad = np.dot(gradnew, gradnew)
gradold = gradnew.copy()
d = -gradnew # Initial search direction.

View file

@ -298,43 +298,67 @@ if sympy_available:
"""
Radial Basis Function covariance.
"""
X = [sp.var('x%i' % i) for i in range(input_dim)]
Z = [sp.var('z%i' % i) for i in range(input_dim)]
X = sp.symbols('x_:' + str(input_dim))
Z = sp.symbols('z_:' + str(input_dim))
variance = sp.var('variance',positive=True)
if ARD:
lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)]
dist_string = ' + '.join(['(x%i-z%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
lengthscales = sp.symbols('lengthscale_:' + str(input_dim))
dist_string = ' + '.join(['(x_%i-z_%i)**2/lengthscale%i**2' % (i, i, i) for i in range(input_dim)])
dist = parse_expr(dist_string)
f = variance*sp.exp(-dist/2.)
else:
lengthscale = sp.var('lengthscale',positive=True)
dist_string = ' + '.join(['(x%i-z%i)**2' % (i, i) for i in range(input_dim)])
dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(input_dim)])
dist = parse_expr(dist_string)
f = variance*sp.exp(-dist/(2*lengthscale**2))
return kern(input_dim, [spkern(input_dim, f, name='rbf_sympy')])
def eq_sympy(input_dim, output_dim, ARD=False, variance=1., lengthscale=1.):
"""
Exponentiated quadratic with multiple outputs.
"""
real_input_dim = input_dim
if output_dim>1:
real_input_dim -= 1
X = sp.symbols('x_:' + str(real_input_dim))
Z = sp.symbols('z_:' + str(real_input_dim))
scale = sp.var('scale_i scale_j',positive=True)
if ARD:
lengthscales = [sp.var('lengthscale%i_i lengthscale%i_j' % i, positive=True) for i in range(real_input_dim)]
shared_lengthscales = [sp.var('shared_lengthscale%i' % i, positive=True) for i in range(real_input_dim)]
dist_string = ' + '.join(['(x_%i-z_%i)**2/(shared_lengthscale%i**2 + lengthscale%i_i*lengthscale%i_j)' % (i, i, i) for i in range(real_input_dim)])
dist = parse_expr(dist_string)
f = variance*sp.exp(-dist/2.)
else:
lengthscales = sp.var('lengthscale_i lengthscale_j',positive=True)
shared_lengthscale = sp.var('shared_lengthscale',positive=True)
dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(real_input_dim)])
dist = parse_expr(dist_string)
f = scale_i*scale_j*sp.exp(-dist/(2*(lengthscale_i**2 + lengthscale_j**2 + shared_lengthscale**2)))
return kern(input_dim, [spkern(input_dim, f, output_dim=output_dim, name='eq_sympy')])
def sinc(input_dim, ARD=False, variance=1., lengthscale=1.):
"""
TODO: Not clear why this isn't working, suggests argument of sinc is not a number.
sinc covariance funciton
"""
X = [sp.var('x%i' % i) for i in range(input_dim)]
Z = [sp.var('z%i' % i) for i in range(input_dim)]
X = sp.symbols('x_:' + str(input_dim))
Z = sp.symbols('z_:' + str(input_dim))
variance = sp.var('variance',positive=True)
if ARD:
lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)]
dist_string = ' + '.join(['(x%i-z%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
dist_string = ' + '.join(['(x_%i-z_%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
dist = parse_expr(dist_string)
f = variance*sinc(sp.pi*sp.sqrt(dist))
else:
lengthscale = sp.var('lengthscale',positive=True)
dist_string = ' + '.join(['(x%i-z%i)**2' % (i, i) for i in range(input_dim)])
dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(input_dim)])
dist = parse_expr(dist_string)
f = variance*sinc(sp.pi*sp.sqrt(dist)/lengthscale)
return kern(input_dim, [spkern(input_dim, f, name='sinc')])
def sympykern(input_dim, k,name=None):
def sympykern(input_dim, k=None, output_dim=1, name=None, param=None):
"""
A base kernel object, where all the hard work in done by sympy.
@ -349,7 +373,7 @@ if sympy_available:
- to handle multiple inputs, call them x1, z1, etc
- to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
"""
return kern(input_dim, [spkern(input_dim, k,name)])
return kern(input_dim, [spkern(input_dim, k=k, output_dim=output_dim, name=name, param=param)])
del sympy_available
def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):

View file

@ -18,37 +18,37 @@ class kern(Parameterized):
like which parameters live where.
The technical code for kernels is divided into _parts_ (see
e.g. rbf.py). This object contains a list of parts, which are
computed additively. For multiplication, special _prod_ parts
e.g. rbf.py). This object contains a list of _parameters_, which are
computed additively. For multiplication, special _prod_ _parameters_
are used.
:param input_dim: The dimensionality of the kernel's input space
:type input_dim: int
:param parts: the 'parts' (PD functions) of the kernel
:type parts: list of Kernpart objects
:param _parameters_: the '_parameters_' (PD functions) of the kernel
:type _parameters_: list of Kernpart objects
:param input_slices: the slices on the inputs which apply to each kernel
:type input_slices: list of slice objects, or list of bools
"""
self.parts = parts
self.Nparts = len(parts)
self.num_params = sum([p.num_params for p in self.parts])
self._parameters_ = parts
self.num_parts = len(parts)
self.num_params = sum([p.num_params for p in self._parameters_])
self.input_dim = input_dim
part_names = [k.name for k in self.parts]
part_names = [k.name for k in self._parameters_]
self.name=''
for name in part_names:
self.name += name + '+'
self.name = self.name[:-1]
# deal with input_slices
if input_slices is None:
self.input_slices = [slice(None) for p in self.parts]
self.input_slices = [slice(None) for p in self._parameters_]
else:
assert len(input_slices) == len(self.parts)
assert len(input_slices) == len(self._parameters_)
self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices]
for p in self.parts:
for p in self._parameters_:
assert isinstance(p, Kernpart), "bad kernel part"
self.compute_param_slices()
@ -60,8 +60,8 @@ class kern(Parameterized):
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return Parameterized.getstate(self) + [self.parts,
self.Nparts,
return Parameterized.getstate(self) + [self._parameters_,
self.num_parts,
self.num_params,
self.input_dim,
self.input_slices,
@ -73,21 +73,20 @@ class kern(Parameterized):
self.input_slices = state.pop()
self.input_dim = state.pop()
self.num_params = state.pop()
self.Nparts = state.pop()
self.parts = state.pop()
self.num_parts = state.pop()
self._parameters_ = state.pop()
Parameterized.setstate(self, state)
def plot_ARD(self, fignum=None, ax=None, title='', legend=False):
"""If an ARD kernel is present, it bar-plots the ARD parameters.
"""If an ARD kernel is present, plot a bar representation using matplotlib
:param fignum: figure number of the plot
:param ax: matplotlib axis to plot on
:param title:
title of the plot,
:param title:
title of the plot,
pass '' to not print a title
pass None for a generic title
"""
if ax is None:
fig = pb.figure(fignum)
@ -100,7 +99,7 @@ class kern(Parameterized):
xticklabels = []
bars = []
x0 = 0
for p in self.parts:
for p in self._parameters_:
c = Tango.nextMedium()
if hasattr(p, 'ARD') and p.ARD:
if title is None:
@ -152,6 +151,13 @@ class kern(Parameterized):
return ax
def _transform_gradients(self, g):
"""
Apply the transformations of the kernel so that the returned vector
represents the gradient in the transformed space (i.e. that given by
get_params_transformed())
:param g: the gradient vector for the current model, usually created by dK_dtheta
"""
x = self._get_params()
[np.put(x, i, x * t.gradfactor(x[i])) for i, t in zip(self.constrained_indices, self.constraints)]
[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
@ -162,22 +168,29 @@ class kern(Parameterized):
return g
def compute_param_slices(self):
"""create a set of slices that can index the parameters of each part."""
"""
Create a set of slices that can index the parameters of each part.
"""
self.param_slices = []
count = 0
for p in self.parts:
for p in self._parameters_:
self.param_slices.append(slice(count, count + p.num_params))
count += p.num_params
def __add__(self, other):
"""
Shortcut for `add`.
"""
""" Overloading of the '+' operator. for more control, see self.add """
return self.add(other)
def add(self, other, tensor=False):
"""
Add another kernel to this one. Both kernels are defined on the same _space_
Add another kernel to this one.
If Tensor is False, both kernels are defined on the same _space_. then
the created kernel will have the same number of inputs as self and
other (which must be the same).
If Tensor is True, then the dimensions are stacked 'horizontally', so
that the resulting kernel has self.input_dim + other.input_dim
:param other: the other kernel to be added
:type other: GPy.kern
@ -189,7 +202,7 @@ class kern(Parameterized):
other_input_indices = [sl.indices(other.input_dim) for sl in other.input_slices]
other_input_slices = [slice(i[0] + self.input_dim, i[1] + self.input_dim, i[2]) for i in other_input_indices]
newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
newkern = kern(D, self._parameters_ + other._parameters_, self_input_slices + other_input_slices)
# transfer constraints:
newkern.constrained_indices = self.constrained_indices + [x + self.num_params for x in other.constrained_indices]
@ -200,7 +213,7 @@ class kern(Parameterized):
newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices]
else:
assert self.input_dim == other.input_dim
newkern = kern(self.input_dim, self.parts + other.parts, self.input_slices + other.input_slices)
newkern = kern(self.input_dim, self._parameters_ + other._parameters_, self.input_slices + other.input_slices)
# transfer constraints:
newkern.constrained_indices = self.constrained_indices + [i + self.num_params for i in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints
@ -210,9 +223,7 @@ class kern(Parameterized):
return newkern
def __mul__(self, other):
"""
Shortcut for `prod`.
"""
""" Here we overload the '*' operator. See self.prod for more information"""
return self.prod(other)
def __pow__(self, other, tensor=False):
@ -228,7 +239,7 @@ class kern(Parameterized):
:param other: the other kernel to be added
:type other: GPy.kern
:param tensor: whether or not to use the tensor space (default is false).
:type tensor: bool
:type tensor: bool
"""
K1 = self.copy()
@ -240,7 +251,7 @@ class kern(Parameterized):
s1[sl1], s2[sl2] = [True], [True]
slices += [s1 + s2]
newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)]
newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1._parameters_, K2._parameters_)]
if tensor:
newkern = kern(K1.input_dim + K2.input_dim, newkernparts, slices)
@ -255,12 +266,12 @@ class kern(Parameterized):
# Build the array that allows to go from the initial indices of the param to the new ones
K1_param = []
n = 0
for k1 in K1.parts:
for k1 in K1._parameters_:
K1_param += [range(n, n + k1.num_params)]
n += k1.num_params
n = 0
K2_param = []
for k2 in K2.parts:
for k2 in K2._parameters_:
K2_param += [range(K1.num_params + n, K1.num_params + n + k2.num_params)]
n += k2.num_params
index_param = []
@ -292,36 +303,47 @@ class kern(Parameterized):
self.constrain(np.where(index_param == i)[0], t)
def _get_params(self):
return np.hstack([p._get_params() for p in self.parts])
return np.hstack([p._get_params() for p in self._parameters_])
def _set_params(self, x):
[p._set_params(x[s]) for p, s in zip(self.parts, self.param_slices)]
[p._set_params(x[s]) for p, s in zip(self._parameters_, self.param_slices)]
def _get_param_names(self):
# this is a bit nasty: we want to distinguish between parts with the same name by appending a count
part_names = np.array([k.name for k in self.parts], dtype=np.str)
# this is a bit nasty: we want to distinguish between _parameters_ with the same name by appending a count
part_names = np.array([k.name for k in self._parameters_], dtype=np.str)
counts = [np.sum(part_names == ni) for i, ni in enumerate(part_names)]
cum_counts = [np.sum(part_names[i:] == ni) for i, ni in enumerate(part_names)]
names = [name + '_' + str(cum_count) if count > 1 else name for name, count, cum_count in zip(part_names, counts, cum_counts)]
return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self.parts)], [])
return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self._parameters_)], [])
def K(self, X, X2=None, which_parts='all'):
"""
Compute the kernel function.
:param X: the first set of inputs to the kernel
:param X2: (optional) the second set of arguments to the kernel. If X2
is None, this is passed throgh to the 'part' object, which
handles this as X2 == X.
:param which_parts: a list of booleans detailing whether to include
each of the part functions. By default, 'all'
indicates [True]*self.num_parts
"""
if which_parts == 'all':
which_parts = [True] * self.Nparts
which_parts = [True] * self.num_parts
assert X.shape[1] == self.input_dim
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0]))
[p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
[p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used]
else:
target = np.zeros((X.shape[0], X2.shape[0]))
[p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
[p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used]
return target
def dK_dtheta(self, dL_dK, X, X2=None):
"""
Compute the gradient of the covariance function with respect to the parameters.
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
:type dL_dK: Np.ndarray (num_samples x num_inducing)
:param X: Observed data inputs
@ -329,18 +351,19 @@ class kern(Parameterized):
:param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)
returns: dL_dtheta
"""
assert X.shape[1] == self.input_dim
target = np.zeros(self.num_params)
if X2 is None:
[p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)]
[p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self.param_slices)]
else:
[p.dK_dtheta(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)]
[p.dK_dtheta(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self.param_slices)]
return self._transform_gradients(target)
def dK_dX(self, dL_dK, X, X2=None):
"""Compute the gradient of the covariance function with respect to X.
"""Compute the gradient of the objective function with respect to X.
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
:type dL_dK: np.ndarray (num_samples x num_inducing)
@ -351,18 +374,18 @@ class kern(Parameterized):
target = np.zeros_like(X)
if X2 is None:
[p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
[p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
else:
[p.dK_dX(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
[p.dK_dX(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def Kdiag(self, X, which_parts='all'):
"""Compute the diagonal of the covariance function for inputs X."""
if which_parts == 'all':
which_parts = [True] * self.Nparts
which_parts = [True] * self.num_parts
assert X.shape[1] == self.input_dim
target = np.zeros(X.shape[0])
[p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self.parts, self.input_slices, which_parts) if part_on]
[p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self._parameters_, self.input_slices, which_parts) if part_on]
return target
def dKdiag_dtheta(self, dL_dKdiag, X):
@ -370,134 +393,203 @@ class kern(Parameterized):
assert X.shape[1] == self.input_dim
assert dL_dKdiag.size == X.shape[0]
target = np.zeros(self.num_params)
[p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)]
[p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self.param_slices)]
return self._transform_gradients(target)
def dKdiag_dX(self, dL_dKdiag, X):
assert X.shape[1] == self.input_dim
target = np.zeros_like(X)
[p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
[p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def psi0(self, Z, mu, S):
target = np.zeros(mu.shape[0])
[p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
[p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S):
target = np.zeros(self.num_params)
[p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
[p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self.param_slices, self.input_slices)]
return self._transform_gradients(target)
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S):
target_mu, target_S = np.zeros_like(mu), np.zeros_like(S)
[p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
[p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target_mu, target_S
def psi1(self, Z, mu, S):
target = np.zeros((mu.shape[0], Z.shape[0]))
[p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
[p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S):
target = np.zeros((self.num_params))
[p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
[p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self.param_slices, self.input_slices)]
return self._transform_gradients(target)
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S):
target = np.zeros_like(Z)
[p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
[p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S):
"""return shapes are num_samples,num_inducing,input_dim"""
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
[p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
[p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target_mu, target_S
def psi2(self, Z, mu, S):
"""
Computer the psi2 statistics for the covariance function.
:param Z: np.ndarray of inducing inputs (num_inducing x input_dim)
:param mu, S: np.ndarrays of means and variances (each num_samples x input_dim)
:returns psi2: np.ndarray (num_samples,num_inducing,num_inducing)
:param Z: np.ndarray of inducing inputs (M x Q)
:param mu, S: np.ndarrays of means and variances (each N x Q)
:returns psi2: np.ndarray (N,M,M)
"""
target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
# compute the "cross" terms
# TODO: input_slices needed
crossterms = 0
from parts.white import White
from parts.rbf import RBF
from parts.rbf_inv import RBFInv
from parts.bias import Bias
from parts.linear import Linear
for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self.parts, self.input_slices), 2):
if i_s1 == i_s2:
# TODO psi1 this must be faster/better/precached/more nice
tmp1 = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1)
tmp2 = np.zeros((mu.shape[0], Z.shape[0]))
p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2)
prod = np.multiply(tmp1, tmp2)
crossterms += prod[:, :, None] + prod[:, None, :]
# target += crossterms
return target + crossterms
for (p1, i1), (p2, i2) in itertools.combinations(itertools.izip(self._parameters_, self._param_slices_), 2):
# white doesn;t combine with anything
if isinstance(p1, White) or isinstance(p2, White):
pass
# rbf X bias
elif isinstance(p1, Bias) and isinstance(p2, (RBF, RBFInv)):
target += p1.variance * (p2._psi1[:, :, None] + p2._psi1[:, None, :])
elif isinstance(p2, Bias) and isinstance(p1, (RBF, RBFInv)):
target += p2.variance * (p1._psi1[:, :, None] + p1._psi1[:, None, :])
# linear X bias
elif isinstance(p1, Bias) and isinstance(p2, Linear):
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p2.psi1(Z, mu, S, tmp)
target += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
elif isinstance(p2, Bias) and isinstance(p1, Linear):
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, tmp)
target += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
# rbf X linear
elif isinstance(p1, Linear) and isinstance(p2, (RBF, RBFInv)):
pass
elif isinstance(p2, Linear) and isinstance(p1, (RBF, RBFInv)):
raise NotImplementedError # TODO
elif isinstance(p1, (RBF, RBFInv)) and isinstance(p2, (RBF, RBFInv)):
raise NotImplementedError # TODO
elif isinstance(p2, (RBF, RBFInv)) and isinstance(p1, (RBF, RBFInv)):
raise NotImplementedError # TODO
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return target
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S):
"""Gradient of the psi2 statistics with respect to the parameters."""
target = np.zeros(self.num_params)
[p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)]
target = np.zeros(self.Nparam)
[p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self.param_slices)]
# compute the "cross" terms
# TODO: better looping, input_slices
for i1, i2 in itertools.permutations(range(len(self.parts)), 2):
p1, p2 = self.parts[i1], self.parts[i2]
for i1, i2 in itertools.combinations(range(len(self._parameters_)), 2):
p1, p2 = self._parameters_[i1], self._parameters_[i2]
# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
ps1, ps2 = self.param_slices[i1], self.param_slices[i2]
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, tmp)
p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2])
# white doesn;t combine with anything
if p1.name == 'white' or p2.name == 'white':
pass
# rbf X bias
elif p1.name == 'bias' and p2.name == 'rbf':
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2])
p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2._psi1 * 2., Z, mu, S, target[ps1])
elif p2.name == 'bias' and p1.name == 'rbf':
p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target[ps1])
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1._psi1 * 2., Z, mu, S, target[ps2])
# linear X bias
elif p1.name == 'bias' and p2.name == 'linear':
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2]) # [ps1])
psi1 = np.zeros((mu.shape[0], Z.shape[0]))
p2.psi1(Z, mu, S, psi1)
p1.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps1])
elif p2.name == 'bias' and p1.name == 'linear':
p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target[ps1])
psi1 = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, psi1)
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps2])
# rbf X linear
elif p1.name == 'linear' and p2.name == 'rbf':
raise NotImplementedError # TODO
elif p2.name == 'linear' and p1.name == 'rbf':
raise NotImplementedError # TODO
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return self._transform_gradients(target)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S):
target = np.zeros_like(Z)
[p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
# target *= 2
[p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
# compute the "cross" terms
# TODO: we need input_slices here.
for p1, p2 in itertools.permutations(self.parts, 2):
if p1.name == 'linear' and p2.name == 'linear':
raise NotImplementedError("We don't handle linear/linear cross-terms")
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, tmp)
p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target)
for p1, p2 in itertools.combinations(self._parameters_, 2):
# white doesn;t combine with anything
if p1.name == 'white' or p2.name == 'white':
pass
# rbf X bias
elif p1.name == 'bias' and p2.name == 'rbf':
p2.dpsi1_dX(dL_dpsi2.sum(1).T * p1.variance, Z, mu, S, target)
elif p2.name == 'bias' and p1.name == 'rbf':
p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target)
# linear X bias
elif p1.name == 'bias' and p2.name == 'linear':
p2.dpsi1_dZ(dL_dpsi2.sum(1).T * p1.variance, Z, mu, S, target)
elif p2.name == 'bias' and p1.name == 'linear':
p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target)
# rbf X linear
elif p1.name == 'linear' and p2.name == 'rbf':
raise NotImplementedError # TODO
elif p2.name == 'linear' and p1.name == 'rbf':
raise NotImplementedError # TODO
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return target * 2
return target * 2.
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S):
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
[p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
[p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
# compute the "cross" terms
# TODO: we need input_slices here.
for p1, p2 in itertools.permutations(self.parts, 2):
if p1.name == 'linear' and p2.name == 'linear':
raise NotImplementedError("We don't handle linear/linear cross-terms")
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, tmp)
p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S)
for p1, p2 in itertools.combinations(self._parameters_, 2):
# white doesn;t combine with anything
if p1.name == 'white' or p2.name == 'white':
pass
# rbf X bias
elif p1.name == 'bias' and p2.name == 'rbf':
p2.dpsi1_dmuS(dL_dpsi2.sum(1).T * p1.variance * 2., Z, mu, S, target_mu, target_S)
elif p2.name == 'bias' and p1.name == 'rbf':
p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S)
# linear X bias
elif p1.name == 'bias' and p2.name == 'linear':
p2.dpsi1_dmuS(dL_dpsi2.sum(1).T * p1.variance * 2., Z, mu, S, target_mu, target_S)
elif p2.name == 'bias' and p1.name == 'linear':
p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S)
# rbf X linear
elif p1.name == 'linear' and p2.name == 'rbf':
raise NotImplementedError # TODO
elif p2.name == 'linear' and p1.name == 'rbf':
raise NotImplementedError # TODO
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return target_mu, target_S
def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs):
if which_parts == 'all':
which_parts = [True] * self.Nparts
which_parts = [True] * self.num_parts
if self.input_dim == 1:
if x is None:
x = np.zeros((1, 1))
@ -658,7 +750,7 @@ class Kern_check_dKdiag_dX(Kern_check_model):
def _set_params(self, x):
self.X=x.reshape(self.X.shape)
def kern_test(kern, X=None, X2=None, verbose=False):
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
"""This function runs on kernels to check the correctness of their implementation. It checks that the covariance function is positive definite for a randomly generated data set.
:param kern: the kernel to be tested.
@ -672,8 +764,13 @@ def kern_test(kern, X=None, X2=None, verbose=False):
pass_checks = True
if X==None:
X = np.random.randn(10, kern.input_dim)
if output_ind is not None:
X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
if X2==None:
X2 = np.random.randn(20, kern.input_dim)
if output_ind is not None:
X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
if verbose:
print("Checking covariance function is positive definite.")
result = Kern_check_model(kern, X=X).is_positive_definite()

View file

@ -1,7 +1,6 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from IPython.core.debugger import Tracer; debug_here=Tracer()
from kernpart import Kernpart
import numpy as np
from ...util.linalg import tdot

View file

@ -7,7 +7,7 @@ from independent_outputs import index_to_slices
class Hierarchical(Kernpart):
"""
A kernel part which can reopresent a hierarchy of indepencnce: a gerenalisation of independent_outputs
A kernel part which can reopresent a hierarchy of indepencnce: a generalisation of independent_outputs
"""
def __init__(self,parts):

View file

@ -5,15 +5,18 @@
class Kernpart(object):
def __init__(self,input_dim):
"""
The base class for a kernpart: a positive definite function which forms part of a kernel
The base class for a kernpart: a positive definite function which forms part of a covariance function (kernel).
:param input_dim: the number of input dimensions to the function
:type input_dim: int
Do not instantiate.
"""
# the input dimensionality for the covariance
self.input_dim = input_dim
# the number of optimisable parameters
self.num_params = 1
# the name of the covariance function.
self.name = 'unnamed'
def _get_params(self):

View file

@ -7,6 +7,7 @@ import numpy as np
from ...util.linalg import tdot
from ...util.misc import fast_array_equal
from scipy import weave
from ...util.config import *
class Linear(Kernpart):
"""
@ -51,6 +52,26 @@ class Linear(Kernpart):
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._params = np.empty(shape=(3, 1))
# a set of optional args to pass to weave
weave_options_openmp = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'],
'extra_link_args' : ['-lgomp'],
'libraries': ['gomp']}
weave_options_noopenmp = {'extra_compile_args': ['-O3']}
if config.getboolean('parallel', 'openmp'):
self.weave_options = weave_options_openmp
self.weave_support_code = """
#include <omp.h>
#include <math.h>
"""
else:
self.weave_options = weave_options_noopenmp
self.weave_support_code = """
#include <math.h>
"""
def _get_params(self):
return self.variances
@ -190,11 +211,17 @@ class Linear(Kernpart):
#target_mu_dummy += (dL_dpsi2[:, :, :, None] * muAZZA).sum(1).sum(1)
#target_S_dummy += (dL_dpsi2[:, :, :, None] * self.ZA[None, :, None, :] * self.ZA[None, None, :, :]).sum(1).sum(1)
if config.getboolean('parallel', 'openmp'):
pragma_string = "#pragma omp parallel for private(m,mm,q,qq,factor,tmp)"
else:
pragma_string = ''
#Using weave, we can exploiut the symmetry of this problem:
code = """
int n, m, mm,q,qq;
double factor,tmp;
#pragma omp parallel for private(m,mm,q,qq,factor,tmp)
%s
for(n=0;n<N;n++){
for(m=0;m<num_inducing;m++){
for(mm=0;mm<=m;mm++){
@ -218,19 +245,13 @@ class Linear(Kernpart):
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']}
""" % pragma_string
N,num_inducing,input_dim = mu.shape[0],Z.shape[0],mu.shape[1]
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options)
N,num_inducing,input_dim = int(mu.shape[0]),int(Z.shape[0]),int(mu.shape[1])
weave.inline(code, support_code=self.weave_support_code,
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
type_converters=weave.converters.blitz,**self.weave_options)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
@ -240,9 +261,15 @@ class Linear(Kernpart):
#dummy_target += psi2_dZ.sum(0).sum(0)
AZA = self.variances*self.ZAinner
if config.getboolean('parallel', 'openmp'):
pragma_string = '#pragma omp parallel for private(n,mm,q)'
else:
pragma_string = ''
code="""
int n,m,mm,q;
#pragma omp parallel for private(n,mm,q)
%s
for(m=0;m<num_inducing;m++){
for(q=0;q<input_dim;q++){
for(mm=0;mm<num_inducing;mm++){
@ -252,22 +279,13 @@ class Linear(Kernpart):
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']}
""" % pragma_string
N,num_inducing,input_dim = mu.shape[0],Z.shape[0],mu.shape[1]
weave.inline(code, support_code=support_code, libraries=['gomp'],
N,num_inducing,input_dim = int(mu.shape[0]),int(Z.shape[0]),int(mu.shape[1])
weave.inline(code, support_code=self.weave_support_code,
arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options)
type_converters=weave.converters.blitz,**self.weave_options)
#---------------------------------------#

View file

@ -113,7 +113,7 @@ class PeriodicMatern32(Kernpart):
@silence_errors
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)"""
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)

View file

@ -115,7 +115,7 @@ class PeriodicMatern52(Kernpart):
@silence_errors
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)"""
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)

View file

@ -111,7 +111,7 @@ class PeriodicExponential(Kernpart):
@silence_errors
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)"""
"""derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)"""
if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)

View file

@ -7,6 +7,7 @@ import numpy as np
from scipy import weave
from ...util.linalg import tdot
from ...util.misc import fast_array_equal
from ...util.config import *
class RBF(Kernpart):
"""
@ -57,12 +58,27 @@ class RBF(Kernpart):
self._X, self._X2, self._params = np.empty(shape=(3, 1))
# a set of optional args to pass to weave
self.weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
'extra_link_args' : ['-lgomp']}
weave_options_openmp = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'],
'extra_link_args' : ['-lgomp'],
'libraries': ['gomp']}
weave_options_noopenmp = {'extra_compile_args': ['-O3']}
if config.getboolean('parallel', 'openmp'):
self.weave_options = weave_options_openmp
self.weave_support_code = """
#include <omp.h>
#include <math.h>
"""
else:
self.weave_options = weave_options_noopenmp
self.weave_support_code = """
#include <math.h>
"""
def _get_params(self):
return np.hstack((self.variance, self.lengthscale))
@ -110,7 +126,7 @@ class RBF(Kernpart):
target(q+1) += var_len3(q)*tmp;
}
"""
num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
num_data, num_inducing, input_dim = int(X.shape[0]), int(X.shape[0]), int(self.input_dim)
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
else:
code = """
@ -126,7 +142,7 @@ class RBF(Kernpart):
target(q+1) += var_len3(q)*tmp;
}
"""
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
num_data, num_inducing, input_dim = int(X.shape[0]), int(X2.shape[0]), int(self.input_dim)
# [np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)]
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
else:
@ -287,10 +303,16 @@ class RBF(Kernpart):
lengthscale2 = self.lengthscale2
else:
lengthscale2 = np.ones(input_dim) * self.lengthscale2
if config.getboolean('parallel', 'openmp'):
pragma_string = '#pragma omp parallel for private(tmp)'
else:
pragma_string = ''
code = """
double tmp;
#pragma omp parallel for private(tmp)
%s
for (int n=0; n<N; n++){
for (int m=0; m<num_inducing; m++){
for (int mm=0; mm<(m+1); mm++){
@ -320,13 +342,20 @@ class RBF(Kernpart):
}
}
"""
""" % pragma_string
if config.getboolean('parallel', 'openmp'):
pragma_string = '#include <omp.h>'
else:
pragma_string = ''
support_code = """
#include <omp.h>
%s
#include <math.h>
"""
weave.inline(code, support_code=support_code, libraries=['gomp'],
""" % pragma_string
N, num_inducing, input_dim = int(N), int(num_inducing), int(input_dim)
weave.inline(code, support_code=support_code,
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
type_converters=weave.converters.blitz, **self.weave_options)

View file

@ -7,6 +7,8 @@ import numpy as np
import hashlib
from scipy import weave
from ...util.linalg import tdot
from ...util.config import *
class RBFInv(RBF):
"""
@ -58,11 +60,23 @@ class RBFInv(RBF):
self._X, self._X2, self._params = np.empty(shape=(3, 1))
# a set of optional args to pass to weave
self.weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
'extra_link_args' : ['-lgomp']}
weave_options_openmp = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'],
'extra_link_args' : ['-lgomp'],
'libraries': ['gomp']}
weave_options_noopenmp = {'extra_compile_args': ['-O3']}
if config.getboolean('parallel', 'openmp'):
self.weave_options = weave_options_openmp
self.weave_support_code = """
#include <omp.h>
#include <math.h>
"""
else:
self.weave_options = weave_options_noopenmp
self.weave_support_code = """
#include <math.h>
"""
def _get_params(self):
return np.hstack((self.variance, self.inv_lengthscale))
@ -109,7 +123,7 @@ class RBFInv(RBF):
target(q+1) += var_len3(q)*tmp*(-len2(q));
}
"""
num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
num_data, num_inducing, input_dim = int(X.shape[0]), int(X.shape[0]), int(self.input_dim)
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3', 'len2'], type_converters=weave.converters.blitz, **self.weave_options)
else:
code = """
@ -125,7 +139,7 @@ class RBFInv(RBF):
target(q+1) += var_len3(q)*tmp*(-len2(q));
}
"""
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
num_data, num_inducing, input_dim = int(X.shape[0]), int(X2.shape[0]), int(self.input_dim)
# [np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)]
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3', 'len2'], type_converters=weave.converters.blitz, **self.weave_options)
else:
@ -133,7 +147,7 @@ class RBFInv(RBF):
def dK_dX(self, dL_dK, X, X2, target):
self._K_computations(X, X2)
if X2 is None:
if X2 is None:
_K_dist = 2*(X[:, None, :] - X[None, :, :])
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
@ -263,8 +277,8 @@ class RBFInv(RBF):
self._Z, self._mu, self._S = Z, mu, S
def weave_psi2(self, mu, Zhat):
N, input_dim = mu.shape
num_inducing = Zhat.shape[0]
N, input_dim = int(mu.shape[0]), int(mu.shape[1])
num_inducing = int(Zhat.shape[0])
mudist = np.empty((N, num_inducing, num_inducing, input_dim))
mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim))
@ -279,10 +293,16 @@ class RBFInv(RBF):
inv_lengthscale2 = self.inv_lengthscale2
else:
inv_lengthscale2 = np.ones(input_dim) * self.inv_lengthscale2
if config.getboolean('parallel', 'openmp'):
pragma_string = '#pragma omp parallel for private(tmp)'
else:
pragma_string = ''
code = """
double tmp;
#pragma omp parallel for private(tmp)
%s
for (int n=0; n<N; n++){
for (int m=0; m<num_inducing; m++){
for (int mm=0; mm<(m+1); mm++){
@ -312,13 +332,9 @@ class RBFInv(RBF):
}
}
"""
""" % pragma_string
support_code = """
#include <omp.h>
#include <math.h>
"""
weave.inline(code, support_code=support_code, libraries=['gomp'],
weave.inline(code, support_code=self.weave_support_code,
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'inv_lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
type_converters=weave.converters.blitz, **self.weave_options)

View file

@ -1,4 +1,7 @@
#include <math.h>
#include <float.h>
#include <stdlib.h>
double DiracDelta(double x){
// TODO: this doesn't seem to be a dirac delta ... should return infinity. Neil
if((x<0.000001) & (x>-0.000001))//go on, laugh at my c++ skills
@ -23,3 +26,36 @@ double sinc_grad(double x){
else
return (x*cos(x) - sin(x))/(x*x);
}
double erfcx(double x){
double xneg=-sqrt(log(DBL_MAX/2));
double xmax = 1/(sqrt(M_PI)*DBL_MIN);
xmax = DBL_MAX<xmax ? DBL_MAX : xmax;
// Find values where erfcx can be evaluated
double t = 3.97886080735226 / (abs(x) + 3.97886080735226);
double u = t-0.5;
double y = (((((((((u * 0.00127109764952614092 + 1.19314022838340944e-4) * u
- 0.003963850973605135) * u - 8.70779635317295828e-4) * u
+ 0.00773672528313526668) * u + 0.00383335126264887303) * u
- 0.0127223813782122755) * u - 0.0133823644533460069) * u
+ 0.0161315329733252248) * u + 0.0390976845588484035) * u + 0.00249367200053503304;
if (x<xneg)
return -INFINITY;
else if (x<0)
return 2*exp(x*x)-y;
else if (x>xmax)
return 0.0;
else
return y;
}
double ln_diff_erf(double x0, double x1){
if (x0==x1)
return INFINITY;
else if(x0<0 && x1>0 || x0>0 && x1<0)
return log(erf(x0)-erf(x1));
else if(x1>0)
return log(erfcx(x1)-erfcx(x0)*exp(x1*x1)- x0*x0)-x1*x1;
else
return log(erfcx(-x0)-erfcx(-x1)*exp(x0*x0 - x1*x1))-x0*x0;
}

View file

@ -4,3 +4,6 @@ double DiracDelta(double x, int foo);
double sinc(double x);
double sinc_grad(double x);
double erfcx(double x);
double ln_diff_erf(double x0, double x1);

View file

@ -9,6 +9,7 @@ import sys
current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
import tempfile
import pdb
import ast
from kernpart import Kernpart
class spkern(Kernpart):
@ -16,64 +17,388 @@ class spkern(Kernpart):
A kernel object, where all the hard work in done by sympy.
:param k: the covariance function
:type k: a positive definite sympy function of x1, z1, x2, z2...
:type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
To construct a new sympy kernel, you'll need to define:
- a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
- that's it! we'll extract the variables from the function k.
Note:
- to handle multiple inputs, call them x1, z1, etc
- to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
- to handle multiple inputs, call them x_1, z_1, etc
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
"""
def __init__(self,input_dim,k,name=None,param=None):
def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None):
if name is None:
self.name='sympykern'
else:
self.name = name
if k is None:
raise ValueError, "You must provide an argument for the covariance function."
self._sp_k = k
sp_vars = [e for e in k.atoms() if e.is_Symbol]
self._sp_x= sorted([e for e in sp_vars if e.name[0]=='x'],key=lambda x:int(x.name[1:]))
self._sp_z= sorted([e for e in sp_vars if e.name[0]=='z'],key=lambda z:int(z.name[1:]))
assert all([x.name=='x%i'%i for i,x in enumerate(self._sp_x)])
assert all([z.name=='z%i'%i for i,z in enumerate(self._sp_z)])
self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
self._sp_z= sorted([e for e in sp_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:]))
# Check that variable names make sense.
assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)])
assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)])
assert len(self._sp_x)==len(self._sp_z)
self.input_dim = len(self._sp_x)
self._real_input_dim = self.input_dim
if output_dim > 1:
self.input_dim += 1
assert self.input_dim == input_dim
self._sp_theta = sorted([e for e in sp_vars if not (e.name[0]=='x' or e.name[0]=='z')],key=lambda e:e.name)
self.num_params = len(self._sp_theta)
self.output_dim = output_dim
# extract parameter names
thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
#deal with param
if param is None:
param = np.ones(self.num_params)
assert param.size==self.num_params
self._set_params(param)
# Look for parameters with index.
if self.output_dim>1:
self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
self._sp_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name)
# Make sure parameter appears with both indices!
assert len(self._sp_theta_i)==len(self._sp_theta_j)
assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j)])
# Extract names of shared parameters
self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j]
self.num_split_params = len(self._sp_theta_i)
self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i]
for theta in self._split_theta_names:
setattr(self, theta, np.ones(self.output_dim))
self.num_shared_params = len(self._sp_theta)
self.num_params = self.num_shared_params+self.num_split_params*self.output_dim
else:
self.num_split_params = 0
self._split_theta_names = []
self._sp_theta = thetas
self.num_shared_params = len(self._sp_theta)
self.num_params = self.num_shared_params
for theta in self._sp_theta:
val = 1.0
if param is not None:
if param.has_key(theta):
val = param[theta]
setattr(self, theta.name, val)
#deal with param
self._set_params(self._get_params())
#Differentiate!
self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta]
if self.output_dim > 1:
self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i]
self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x]
#self._sp_dk_dz = [sp.diff(k,zi) for zi in self._sp_z]
#self.compute_psi_stats()
if False:
self.compute_psi_stats()
self._gen_code()
self.weave_kwargs = {\
'support_code':self._function_code,\
'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')],\
'headers':['"sympy_helpers.h"'],\
'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")],\
#'extra_compile_args':['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'],\
'extra_compile_args':[],\
'extra_link_args':['-lgomp'],\
if False:
extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5']
else:
extra_compile_args = []
self.weave_kwargs = {
'support_code':self._function_code,
'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')],
'headers':['"sympy_helpers.h"'],
'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")],
'extra_compile_args':extra_compile_args,
'extra_link_args':['-lgomp'],
'verbose':True}
def __add__(self,other):
return spkern(self._sp_k+other._sp_k)
def _gen_code(self):
"""Generates the C functions necessary for computing the covariance function using the sympy objects as input."""
#TODO: maybe generate one C function only to save compile time? Also easier to take that as a basis and hand craft other covariances??
#generate c functions from sympy objects
argument_sequence = self._sp_x+self._sp_z+self._sp_theta
code_list = [('k',self._sp_k)]
# gradients with respect to covariance input
code_list += [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]
# gradient with respect to parameters
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]
# gradient with respect to multiple output parameters
if self.output_dim > 1:
argument_sequence += self._sp_theta_i + self._sp_theta_j
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)]
(foo_c,self._function_code), (foo_h,self._function_header) = \
codegen(code_list, "C",'foobar',argument_sequence=argument_sequence)
#put the header file where we can find it
f = file(os.path.join(tempfile.gettempdir(),'foobar.h'),'w')
f.write(self._function_header)
f.close()
# Substitute any known derivatives which sympy doesn't compute
self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
############################################################
# This is the basic argument construction for the C code. #
############################################################
arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x]
+ ["Z2(j, %s)"%z.name[2:] for z in self._sp_z])
# for multiple outputs need to also provide these arguments reversed.
if self.output_dim>1:
reverse_arg_list = list(arg_list)
reverse_arg_list.reverse()
# Add in any 'shared' parameters to the list.
param_arg_list = [shared_params.name for shared_params in self._sp_theta]
arg_list += param_arg_list
precompute_list=[]
if self.output_dim > 1:
reverse_arg_list+=list(param_arg_list)
split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i]
split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i]
arg_list += split_param_arg_list
reverse_arg_list += split_param_reverse_arg_list
# Extract the right output indices from the inputs.
c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])]
precompute_list += c_define_output_indices
reverse_arg_string = ", ".join(reverse_arg_list)
arg_string = ", ".join(arg_list)
precompute_string = "\n".join(precompute_list)
# Code to compute argments string needed when only X is provided.
X_arg_string = re.sub('Z','X',arg_string)
# Code to compute argument string when only diagonal is required.
diag_arg_string = re.sub('int jj','//int jj',X_arg_string)
diag_arg_string = re.sub('j','i',diag_arg_string)
diag_precompute_string = precompute_list[0]
# Here's the code to do the looping for K
self._K_code =\
"""
// _K_code
// Code for computing the covariance function.
int i;
int j;
int N = target_array->dimensions[0];
int num_inducing = target_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<N;i++){
for (j=0;j<num_inducing;j++){
%s
//target[i*num_inducing+j] =
TARGET2(i, j) += k(%s);
}
}
%s
"""%(precompute_string,arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
self._K_code_X = """
// _K_code_X
// Code for computing the covariance function.
int i;
int j;
int N = target_array->dimensions[0];
int num_inducing = target_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<N;i++){
%s // int ii=(int)X2(i, 1);
TARGET2(i, i) += k(%s);
for (j=0;j<i;j++){
%s //int jj=(int)X2(j, 1);
double kval = k(%s); //double kval = k(X2(i, 0), X2(j, 0), shared_lengthscale, LENGTHSCALE1(ii), SCALE1(ii), LENGTHSCALE1(jj), SCALE1(jj));
TARGET2(i, j) += kval;
TARGET2(j, i) += kval;
}
}
/*%s*/
"""%(diag_precompute_string, diag_arg_string, re.sub('Z2', 'X2', precompute_list[1]), X_arg_string,str(self._sp_k)) #adding a string representation forces recompile when needed
# Code to do the looping for Kdiag
self._Kdiag_code =\
"""
// _Kdiag_code
// Code for computing diagonal of covariance function.
int i;
int N = target_array->dimensions[0];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for
for (i=0;i<N;i++){
%s
//target[i] =
TARGET1(i)=k(%s);
}
%s
"""%(diag_precompute_string,diag_arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Code to compute gradients
grad_func_list = []
if self.output_dim>1:
grad_func_list += c_define_output_indices
grad_func_list += [' '*16 + 'TARGET1(%i+ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)]
grad_func_list += [' '*16 + 'TARGET1(%i+jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)]
grad_func_list += ([' '*16 + 'TARGET1(%i) += PARTIAL2(i, j)*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in enumerate(self._sp_theta)])
grad_func_string = '\n'.join(grad_func_list)
self._dK_dtheta_code =\
"""
// _dK_dtheta_code
// Code for computing gradient of covariance with respect to parameters.
int i;
int j;
int N = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<N;i++){
for (j=0;j<num_inducing;j++){
%s
}
}
%s
"""%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
# Code to compute gradients for Kdiag TODO: needs clean up
diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL1(i)',diag_grad_func_string)
self._dKdiag_dtheta_code =\
"""
// _dKdiag_dtheta_code
// Code for computing gradient of diagonal with respect to parameters.
int i;
int N = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1];
for (i=0;i<N;i++){
%s
}
%s
"""%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Code for gradients wrt X, TODO: may need to deal with special case where one input is actually an output.
gradX_func_list = []
if self.output_dim>1:
gradX_func_list += c_define_output_indices
gradX_func_list += ["TARGET2(i, %i) += PARTIAL2(i, j)*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)]
gradX_func_string = "\n".join(gradX_func_list)
self._dK_dX_code = \
"""
// _dK_dX_code
// Code for computing gradient of covariance with respect to inputs.
int i;
int j;
int N = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<N; i++){
for (j=0; j<num_inducing; j++){
%s
}
}
%s
"""%(gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
diag_gradX_func_string = re.sub('Z','X',gradX_func_string,count=0)
diag_gradX_func_string = re.sub('int jj','//int jj',diag_gradX_func_string)
diag_gradX_func_string = re.sub('j','i',diag_gradX_func_string)
diag_gradX_func_string = re.sub('PARTIAL2\(i, i\)','2*PARTIAL1(i)',diag_gradX_func_string)
# Code for gradients of Kdiag wrt X
self._dKdiag_dX_code= \
"""
// _dKdiag_dX_code
// Code for computing gradient of diagonal with respect to inputs.
int N = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1];
for (int i=0;i<N; i++){
%s
}
%s
"""%(diag_gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a
# string representation forces recompile when needed Get rid
# of Zs in argument for diagonal. TODO: Why wasn't
# diag_func_string called here? Need to check that.
#self._dKdiag_dX_code = self._dKdiag_dX_code.replace('Z[j', 'X[i')
# Code to use when only X is provided.
self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[')
self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= partial[', '+= 2*partial[')
self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z2(', 'X2(')
self._dK_dX_code_X = self._dK_dX_code.replace('Z2(', 'X2(')
#TODO: insert multiple functions here via string manipulation
#TODO: similar functions for psi_stats
def _get_arg_names(self, Z=None, partial=None):
arg_names = ['target','X']
for shared_params in self._sp_theta:
arg_names += [shared_params.name]
if Z is not None:
arg_names += ['Z']
if partial is not None:
arg_names += ['partial']
if self.output_dim>1:
arg_names += self._split_theta_names
arg_names += ['output_dim']
return arg_names
def _weave_inline(self, code, X, target, Z=None, partial=None):
output_dim = self.output_dim
for shared_params in self._sp_theta:
locals()[shared_params.name] = getattr(self, shared_params.name)
# Need to extract parameters first
for split_params in self._split_theta_names:
locals()[split_params] = getattr(self, split_params)
arg_names = self._get_arg_names(Z, partial)
weave.inline(code=code, arg_names=arg_names,**self.weave_kwargs)
def K(self,X,Z,target):
if Z is None:
self._weave_inline(self._K_code_X, X, target)
else:
self._weave_inline(self._K_code, X, target, Z)
def Kdiag(self,X,target):
self._weave_inline(self._Kdiag_code, X, target)
def dK_dtheta(self,partial,X,Z,target):
if Z is None:
self._weave_inline(self._dK_dtheta_code_X, X, target, Z, partial)
else:
self._weave_inline(self._dK_dtheta_code, X, target, Z, partial)
def dKdiag_dtheta(self,partial,X,target):
self._weave_inline(self._dKdiag_dtheta_code, X, target, Z=None, partial=partial)
def dK_dX(self,partial,X,Z,target):
if Z is None:
self._weave_inline(self._dK_dX_code_X, X, target, Z, partial)
else:
self._weave_inline(self._dK_dX_code, X, target, Z, partial)
def dKdiag_dX(self,partial,X,target):
self._weave.inline(self._dKdiag_dX_code, X, target, Z, partial)
def compute_psi_stats(self):
#define some normal distributions
mus = [sp.var('mu%i'%i,real=True) for i in range(self.input_dim)]
Ss = [sp.var('S%i'%i,positive=True) for i in range(self.input_dim)]
mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)]
Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)]
normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
#do some integration!
@ -99,188 +424,29 @@ class spkern(Kernpart):
self._sp_psi2 = self._sp_psi2.simplify()
def _gen_code(self):
#generate c functions from sympy objects
(foo_c,self._function_code),(foo_h,self._function_header) = \
codegen([('k',self._sp_k)] \
+ [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]\
#+ [('dk_d%s'%z.name,dz) for z,dz in zip(self._sp_z,self._sp_dk_dz)]\
+ [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]\
,"C",'foobar',argument_sequence=self._sp_x+self._sp_z+self._sp_theta)
#put the header file where we can find it
f = file(os.path.join(tempfile.gettempdir(),'foobar.h'),'w')
f.write(self._function_header)
f.close()
def _set_params(self,param):
assert param.size == (self.num_params)
for i, shared_params in enumerate(self._sp_theta):
setattr(self, shared_params.name, param[i])
if self.output_dim>1:
for i, split_params in enumerate(self._split_theta_names):
start = self.num_shared_params + i*self.output_dim
end = self.num_shared_params + (i+1)*self.output_dim
setattr(self, split_params, param[start:end])
# Substitute any known derivatives which sympy doesn't compute
self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
# Here's the code to do the looping for K
arglist = ", ".join(["X[i*input_dim+%s]"%x.name[1:] for x in self._sp_x]
+ ["Z[j*input_dim+%s]"%z.name[1:] for z in self._sp_z]
+ ["param[%i]"%i for i in range(self.num_params)])
self._K_code =\
"""
int i;
int j;
int N = target_array->dimensions[0];
int num_inducing = target_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<N;i++){
for (j=0;j<num_inducing;j++){
target[i*num_inducing+j] = k(%s);
}
}
%s
"""%(arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Similar code when only X is provided.
self._K_code_X = self._K_code.replace('Z[', 'X[')
# Code to compute diagonal of covariance.
diag_arglist = re.sub('Z','X',arglist)
diag_arglist = re.sub('j','i',diag_arglist)
# Code to do the looping for Kdiag
self._Kdiag_code =\
"""
int i;
int N = target_array->dimensions[0];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for
for (i=0;i<N;i++){
target[i] = k(%s);
}
%s
"""%(diag_arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Code to compute gradients
funclist = '\n'.join([' '*16 + 'target[%i] += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arglist) for i,theta in enumerate(self._sp_theta)])
self._dK_dtheta_code =\
"""
int i;
int j;
int N = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<N;i++){
for (j=0;j<num_inducing;j++){
%s
}
}
%s
"""%(funclist,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
# Similar code when only X is provided, change argument lists.
self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[')
# Code to compute gradients for Kdiag TODO: needs clean up
diag_funclist = re.sub('Z','X',funclist,count=0)
diag_funclist = re.sub('j','i',diag_funclist)
diag_funclist = re.sub('partial\[i\*num_inducing\+i\]','partial[i]',diag_funclist)
self._dKdiag_dtheta_code =\
"""
int i;
int N = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1];
for (i=0;i<N;i++){
%s
}
%s
"""%(diag_funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Code for gradients wrt X
gradient_funcs = "\n".join(["target[i*input_dim+%i] += partial[i*num_inducing+j]*dk_dx%i(%s);"%(q,q,arglist) for q in range(self.input_dim)])
if False:
gradient_funcs += """if(isnan(target[i*input_dim+2])){printf("%%f\\n",dk_dx2(X[i*input_dim+0], X[i*input_dim+1], X[i*input_dim+2], Z[j*input_dim+0], Z[j*input_dim+1], Z[j*input_dim+2], param[0], param[1], param[2], param[3], param[4], param[5]));}
if(isnan(target[i*input_dim+2])){printf("%%f,%%f,%%i,%%i\\n", X[i*input_dim+2], Z[j*input_dim+2],i,j);}"""
self._dK_dX_code = \
"""
int i;
int j;
int N = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<N; i++){
for (j=0; j<num_inducing; j++){
%s
}
}
%s
"""%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Create code for call when just X is passed as argument.
self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= partial[', '+= 2*partial[')
diag_gradient_funcs = re.sub('Z','X',gradient_funcs,count=0)
diag_gradient_funcs = re.sub('j','i',diag_gradient_funcs)
diag_gradient_funcs = re.sub('partial\[i\*num_inducing\+i\]','2*partial[i]',diag_gradient_funcs)
# Code for gradients of Kdiag wrt X
self._dKdiag_dX_code= \
"""
int N = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1];
for (int i=0;i<N; i++){
%s
}
%s
"""%(diag_gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a
# string representation forces recompile when needed Get rid
# of Zs in argument for diagonal. TODO: Why wasn't
# diag_funclist called here? Need to check that.
#self._dKdiag_dX_code = self._dKdiag_dX_code.replace('Z[j', 'X[i')
#TODO: insert multiple functions here via string manipulation
#TODO: similar functions for psi_stats
def K(self,X,Z,target):
param = self._param
if Z is None:
weave.inline(self._K_code_X,arg_names=['target','X','param'],**self.weave_kwargs)
else:
weave.inline(self._K_code,arg_names=['target','X','Z','param'],**self.weave_kwargs)
def Kdiag(self,X,target):
param = self._param
weave.inline(self._Kdiag_code,arg_names=['target','X','param'],**self.weave_kwargs)
def dK_dtheta(self,partial,X,Z,target):
param = self._param
if Z is None:
weave.inline(self._dK_dtheta_code_X, arg_names=['target','X','param','partial'],**self.weave_kwargs)
else:
weave.inline(self._dK_dtheta_code, arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
def dKdiag_dtheta(self,partial,X,target):
param = self._param
weave.inline(self._dKdiag_dtheta_code,arg_names=['target','X','param','partial'],**self.weave_kwargs)
def dK_dX(self,partial,X,Z,target):
param = self._param
if Z is None:
weave.inline(self._dK_dX_code_X,arg_names=['target','X','param','partial'],**self.weave_kwargs)
else:
weave.inline(self._dK_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
def dKdiag_dX(self,partial,X,target):
param = self._param
weave.inline(self._dKdiag_dX_code,arg_names=['target','X','param','partial'],**self.weave_kwargs)
def _set_params(self,param):
#print param.flags['C_CONTIGUOUS']
self._param = param.copy()
def _get_params(self):
return self._param
params = np.zeros(0)
for shared_params in self._sp_theta:
params = np.hstack((params, getattr(self, shared_params.name)))
if self.output_dim>1:
for split_params in self._split_theta_names:
params = np.hstack((params, getattr(self, split_params).flatten()))
return params
def _get_param_names(self):
return [x.name for x in self._sp_theta]
if self.output_dim>1:
return [x.name for x in self._sp_theta] + [x.name[:-2] + str(i) for x in self._sp_theta_i for i in range(self.output_dim)]
else:
return [x.name for x in self._sp_theta]

View file

@ -1,7 +1,7 @@
from ep import EP
from laplace import Laplace
from ep_mixed_noise import EP_Mixed_Noise
from gaussian import Gaussian
from gaussian_mixed_noise import Gaussian_Mixed_Noise
import noise_models
from noise_model_constructors import *
# TODO: from Laplace import Laplace

View file

@ -18,8 +18,7 @@ class EP(likelihood):
self.data = data
self.num_data, self.output_dim = self.data.shape
self.is_heteroscedastic = True
self.Nparams = 0
self._transf_data = self.noise_model._preprocess_values(data)
self.num_params = 0
#Initial values - Likelihood approximation parameters:
#p(y|f) = t(f|tau_tilde,v_tilde)
@ -55,6 +54,22 @@ class EP(likelihood):
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
return self.noise_model.predictive_values(mu,var)
def log_predictive_density(self, y_test, mu_star, var_star):
"""
Calculation of the log predictive density
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
:type mu_star: (Nx1) array
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
:type var_star: (Nx1) array
"""
return self.noise_model.log_predictive_density(y_test, mu_star, var_star)
def _get_params(self):
#return np.zeros(0)
return self.noise_model._get_params()
@ -134,7 +149,7 @@ class EP(likelihood):
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
@ -233,7 +248,7 @@ class EP(likelihood):
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
@ -336,7 +351,7 @@ class EP(likelihood):
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])

View file

@ -31,7 +31,7 @@ class EP_Mixed_Noise(likelihood):
self.data = np.vstack(data_list)
self.N, self.output_dim = self.data.shape
self.is_heteroscedastic = True
self.Nparams = 0#FIXME
self.num_params = 0#FIXME
self._transf_data = np.vstack([noise_model._preprocess_values(data) for noise_model,data in zip(noise_model_list,data_list)])
#TODO non-gaussian index

View file

@ -15,7 +15,7 @@ class Gaussian(likelihood):
"""
def __init__(self, data, variance=1., normalize=False):
self.is_heteroscedastic = False
self.Nparams = 1
self.num_params = 1
self.Z = 0. # a correction factor which accounts for the approximation made
N, self.output_dim = data.shape
@ -90,11 +90,25 @@ class Gaussian(likelihood):
_95pc = mean + 2.*np.sqrt(true_var)
return mean, true_var, _5pc, _95pc
def fit_full(self):
def log_predictive_density(self, y_test, mu_star, var_star):
"""
No approximations needed
Calculation of the log predictive density
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
:type mu_star: (Nx1) array
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
:type var_star: (Nx1) array
.. Note:
Works as if each test point was provided individually, i.e. not full_cov
"""
pass
y_rescaled = (y_test - self._offset)/self._scale
return -0.5*np.log(2*np.pi) -0.5*np.log(var_star + self._variance) -0.5*(np.square(y_rescaled - mu_star))/(var_star + self._variance)
def _gradients(self, partial):
return np.sum(partial)

View file

@ -23,14 +23,14 @@ class Gaussian_Mixed_Noise(likelihood):
:type normalize: False|True
"""
def __init__(self, data_list, noise_params=None, normalize=True):
self.Nparams = len(data_list)
self.num_params = len(data_list)
self.n_list = [data.size for data in data_list]
self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.Nparams),self.n_list)])
self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.num_params),self.n_list)])
if noise_params is None:
noise_params = [1.] * self.Nparams
noise_params = [1.] * self.num_params
else:
assert self.Nparams == len(noise_params), 'Number of noise parameters does not match the number of noise models.'
assert self.num_params == len(noise_params), 'Number of noise parameters does not match the number of noise models.'
self.noise_model_list = [Gaussian(Y,variance=v,normalize = normalize) for Y,v in zip(data_list,noise_params)]
self.n_params = [noise_model._get_params().size for noise_model in self.noise_model_list]

390
GPy/likelihoods/laplace.py Normal file
View file

@ -0,0 +1,390 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#
#Parts of this file were influenced by the Matlab GPML framework written by
#Carl Edward Rasmussen & Hannes Nickisch, however all bugs are our own.
#
#The GPML code is released under the FreeBSD License.
#Copyright (c) 2005-2013 Carl Edward Rasmussen & Hannes Nickisch. All rights reserved.
#
#The code and associated documentation is available from
#http://gaussianprocess.org/gpml/code.
import numpy as np
import scipy as sp
from likelihood import likelihood
from ..util.linalg import mdot, jitchol, pddet, dpotrs
from functools import partial as partial_func
class Laplace(likelihood):
"""Laplace approximation to a posterior"""
def __init__(self, data, noise_model, extra_data=None):
"""
Laplace Approximation
Find the moments \hat{f} and the hessian at this point
(using Newton-Raphson) of the unnormalised posterior
Compute the GP variables (i.e. generate some Y^{squiggle} and
z^{squiggle} which makes a gaussian the same as the laplace
approximation to the posterior, but normalised
Arguments
---------
:param data: array of data the likelihood function is approximating
:type data: NxD
:param noise_model: likelihood function - subclass of noise_model
:type noise_model: noise_model
:param extra_data: additional data used by some likelihood functions,
"""
self.data = data
self.noise_model = noise_model
self.extra_data = extra_data
#Inital values
self.N, self.D = self.data.shape
self.is_heteroscedastic = True
self.Nparams = 0
self.NORMAL_CONST = ((0.5 * self.N) * np.log(2 * np.pi))
self.restart()
likelihood.__init__(self)
def restart(self):
"""
Reset likelihood variables to their defaults
"""
#Initial values for the GP variables
self.Y = np.zeros((self.N, 1))
self.covariance_matrix = np.eye(self.N)
self.precision = np.ones(self.N)[:, None]
self.Z = 0
self.YYT = None
self.old_Ki_f = None
def predictive_values(self, mu, var, full_cov):
if full_cov:
raise NotImplementedError("Cannot make correlated predictions\
with an Laplace likelihood")
return self.noise_model.predictive_values(mu, var)
def log_predictive_density(self, y_test, mu_star, var_star):
"""
Calculation of the log predictive density
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
:type mu_star: (Nx1) array
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
:type var_star: (Nx1) array
"""
return self.noise_model.log_predictive_density(y_test, mu_star, var_star)
def _get_params(self):
return np.asarray(self.noise_model._get_params())
def _get_param_names(self):
return self.noise_model._get_param_names()
def _set_params(self, p):
return self.noise_model._set_params(p)
def _shared_gradients_components(self):
d3lik_d3fhat = self.noise_model.d3logpdf_df3(self.f_hat, self.data, extra_data=self.extra_data)
dL_dfhat = 0.5*(np.diag(self.Ki_W_i)[:, None]*d3lik_d3fhat).T #why isn't this -0.5?
I_KW_i = np.eye(self.N) - np.dot(self.K, self.Wi_K_i)
return dL_dfhat, I_KW_i
def _Kgradients(self):
"""
Gradients with respect to prior kernel parameters dL_dK to be chained
with dK_dthetaK to give dL_dthetaK
:returns: dL_dK matrix
:rtype: Matrix (1 x num_kernel_params)
"""
dL_dfhat, I_KW_i = self._shared_gradients_components()
dlp = self.noise_model.dlogpdf_df(self.f_hat, self.data, extra_data=self.extra_data)
#Explicit
#expl_a = np.dot(self.Ki_f, self.Ki_f.T)
#expl_b = self.Wi_K_i
#expl = 0.5*expl_a - 0.5*expl_b
#dL_dthetaK_exp = dK_dthetaK(expl, X)
#Implicit
impl = mdot(dlp, dL_dfhat, I_KW_i)
#No longer required as we are computing these in the gp already
#otherwise we would take them away and add them back
#dL_dthetaK_imp = dK_dthetaK(impl, X)
#dL_dthetaK = dL_dthetaK_exp + dL_dthetaK_imp
#dL_dK = expl + impl
#No need to compute explicit as we are computing dZ_dK to account
#for the difference between the K gradients of a normal GP,
#and the K gradients including the implicit part
dL_dK = impl
return dL_dK
def _gradients(self, partial):
"""
Gradients with respect to likelihood parameters (dL_dthetaL)
:param partial: Not needed by this likelihood
:type partial: lambda function
:rtype: array of derivatives (1 x num_likelihood_params)
"""
dL_dfhat, I_KW_i = self._shared_gradients_components()
dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = self.noise_model._laplace_gradients(self.f_hat, self.data, extra_data=self.extra_data)
#len(dlik_dthetaL)
num_params = len(self._get_param_names())
# make space for one derivative for each likelihood parameter
dL_dthetaL = np.zeros(num_params)
for thetaL_i in range(num_params):
#Explicit
dL_dthetaL_exp = ( np.sum(dlik_dthetaL[:, thetaL_i])
#- 0.5*np.trace(mdot(self.Ki_W_i, (self.K, np.diagflat(dlik_hess_dthetaL[thetaL_i]))))
+ np.dot(0.5*np.diag(self.Ki_W_i)[:,None].T, dlik_hess_dthetaL[:, thetaL_i])
)
#Implicit
dfhat_dthetaL = mdot(I_KW_i, self.K, dlik_grad_dthetaL[:, thetaL_i])
dL_dthetaL_imp = np.dot(dL_dfhat, dfhat_dthetaL)
dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp
return dL_dthetaL
def _compute_GP_variables(self):
"""
Generate data Y which would give the normal distribution identical
to the laplace approximation to the posterior, but normalised
GPy expects a likelihood to be gaussian, so need to caluclate
the data Y^{\tilde} that makes the posterior match that found
by a laplace approximation to a non-gaussian likelihood but with
a gaussian likelihood
Firstly,
The hessian of the unormalised posterior distribution is (K^{-1} + W)^{-1},
i.e. z*N(f|f^{\hat}, (K^{-1} + W)^{-1}) but this assumes a non-gaussian likelihood,
we wish to find the hessian \Sigma^{\tilde}
that has the same curvature but using our new simulated data Y^{\tilde}
i.e. we do N(Y^{\tilde}|f^{\hat}, \Sigma^{\tilde})N(f|0, K) = z*N(f|f^{\hat}, (K^{-1} + W)^{-1})
and we wish to find what Y^{\tilde} and \Sigma^{\tilde}
We find that Y^{\tilde} = W^{-1}(K^{-1} + W)f^{\hat} and \Sigma^{tilde} = W^{-1}
Secondly,
GPy optimizes the log marginal log p(y) = -0.5*ln|K+\Sigma^{\tilde}| - 0.5*Y^{\tilde}^{T}(K^{-1} + \Sigma^{tilde})^{-1}Y + lik.Z
So we can suck up any differences between that and our log marginal likelihood approximation
p^{\squiggle}(y) = -0.5*f^{\hat}K^{-1}f^{\hat} + log p(y|f^{\hat}) - 0.5*log |K||K^{-1} + W|
which we want to optimize instead, by equating them and rearranging, the difference is added onto
the log p(y) that GPy optimizes by default
Thirdly,
Since we have gradients that depend on how we move f^{\hat}, we have implicit components
aswell as the explicit dL_dK, we hold these differences in dZ_dK and add them to dL_dK in the
gp.py code
"""
Wi = 1.0/self.W
self.Sigma_tilde = np.diagflat(Wi)
Y_tilde = Wi*self.Ki_f + self.f_hat
self.Wi_K_i = self.W12BiW12
self.ln_det_Wi_K = pddet(self.Sigma_tilde + self.K)
self.lik = self.noise_model.logpdf(self.f_hat, self.data, extra_data=self.extra_data)
self.y_Wi_Ki_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde)
Z_tilde = (+ self.lik
- 0.5*self.ln_B_det
+ 0.5*self.ln_det_Wi_K
- 0.5*self.f_Ki_f
+ 0.5*self.y_Wi_Ki_i_y
)
#Convert to float as its (1, 1) and Z must be a scalar
self.Z = np.float64(Z_tilde)
self.Y = Y_tilde
self.YYT = np.dot(self.Y, self.Y.T)
self.covariance_matrix = self.Sigma_tilde
self.precision = 1.0 / np.diag(self.covariance_matrix)[:, None]
#Compute dZ_dK which is how the approximated distributions gradients differ from the dL_dK computed for other likelihoods
self.dZ_dK = self._Kgradients()
#+ 0.5*self.Wi_K_i - 0.5*np.dot(self.Ki_f, self.Ki_f.T) #since we are not adding the K gradients explicit part theres no need to compute this again
def fit_full(self, K):
"""
The laplace approximation algorithm, find K and expand hessian
For nomenclature see Rasmussen & Williams 2006 - modified for numerical stability
:param K: Prior covariance matrix evaluated at locations X
:type K: NxN matrix
"""
self.K = K.copy()
#Find mode
self.f_hat = self.rasm_mode(self.K)
#Compute hessian and other variables at mode
self._compute_likelihood_variables()
#Compute fake variables replicating laplace approximation to posterior
self._compute_GP_variables()
def _compute_likelihood_variables(self):
"""
Compute the variables required to compute gaussian Y variables
"""
#At this point get the hessian matrix (or vector as W is diagonal)
self.W = -self.noise_model.d2logpdf_df2(self.f_hat, self.data, extra_data=self.extra_data)
#TODO: Could save on computation when using rasm by returning these, means it isn't just a "mode finder" though
self.W12BiW12, self.ln_B_det = self._compute_B_statistics(self.K, self.W, np.eye(self.N))
self.Ki_f = self.Ki_f
self.f_Ki_f = np.dot(self.f_hat.T, self.Ki_f)
self.Ki_W_i = self.K - mdot(self.K, self.W12BiW12, self.K)
def _compute_B_statistics(self, K, W, a):
"""
Rasmussen suggests the use of a numerically stable positive definite matrix B
Which has a positive diagonal element and can be easyily inverted
:param K: Prior Covariance matrix evaluated at locations X
:type K: NxN matrix
:param W: Negative hessian at a point (diagonal matrix)
:type W: Vector of diagonal values of hessian (1xN)
:param a: Matrix to calculate W12BiW12a
:type a: Matrix NxN
:returns: (W12BiW12, ln_B_det)
"""
if not self.noise_model.log_concave:
#print "Under 1e-10: {}".format(np.sum(W < 1e-10))
W[W < 1e-6] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
# To cause the posterior to become less certain than the prior and likelihood,
# This is a property only held by non-log-concave likelihoods
#W is diagonal so its sqrt is just the sqrt of the diagonal elements
W_12 = np.sqrt(W)
B = np.eye(self.N) + W_12*K*W_12.T
L = jitchol(B)
W12BiW12 = W_12*dpotrs(L, np.asfortranarray(W_12*a), lower=1)[0]
ln_B_det = 2*np.sum(np.log(np.diag(L)))
return W12BiW12, ln_B_det
def rasm_mode(self, K, MAX_ITER=30):
"""
Rasmussen's numerically stable mode finding
For nomenclature see Rasmussen & Williams 2006
Influenced by GPML (BSD) code, all errors are our own
:param K: Covariance matrix evaluated at locations X
:type K: NxD matrix
:param MAX_ITER: Maximum number of iterations of newton-raphson before forcing finish of optimisation
:type MAX_ITER: scalar
:returns: f_hat, mode on which to make laplace approxmiation
:rtype: NxD matrix
"""
#old_Ki_f = np.zeros((self.N, 1))
#Start f's at zero originally
if self.old_Ki_f is None:
old_Ki_f = np.zeros((self.N, 1))
f = np.dot(K, old_Ki_f)
else:
#Start at the old best point
old_Ki_f = self.old_Ki_f.copy()
f = self.f_hat.copy()
new_obj = -np.inf
old_obj = np.inf
def obj(Ki_f, f):
return -0.5*np.dot(Ki_f.T, f) + self.noise_model.logpdf(f, self.data, extra_data=self.extra_data)
difference = np.inf
epsilon = 1e-5
#step_size = 1
#rs = 0
i = 0
while difference > epsilon and i < MAX_ITER:
W = -self.noise_model.d2logpdf_df2(f, self.data, extra_data=self.extra_data)
W_f = W*f
grad = self.noise_model.dlogpdf_df(f, self.data, extra_data=self.extra_data)
b = W_f + grad
W12BiW12Kb, _ = self._compute_B_statistics(K, W.copy(), np.dot(K, b))
#Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
full_step_Ki_f = b - W12BiW12Kb
dKi_f = full_step_Ki_f - old_Ki_f
f_old = f.copy()
def inner_obj(step_size, old_Ki_f, dKi_f, K):
Ki_f = old_Ki_f + step_size*dKi_f
f = np.dot(K, Ki_f)
# This is nasty, need to set something within an optimization though
self.tmp_Ki_f = Ki_f.copy()
self.tmp_f = f.copy()
return -obj(Ki_f, f)
i_o = partial_func(inner_obj, old_Ki_f=old_Ki_f, dKi_f=dKi_f, K=K)
#Find the stepsize that minimizes the objective function using a brent line search
#The tolerance and maxiter matter for speed! Seems to be best to keep them low and make more full
#steps than get this exact then make a step, if B was bigger it might be the other way around though
new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-4, options={'maxiter':5}).fun
f = self.tmp_f.copy()
Ki_f = self.tmp_Ki_f.copy()
#Optimize without linesearch
#f_old = f.copy()
#update_passed = False
#while not update_passed:
#Ki_f = old_Ki_f + step_size*dKi_f
#f = np.dot(K, Ki_f)
#old_obj = new_obj
#new_obj = obj(Ki_f, f)
#difference = new_obj - old_obj
##print "difference: ",difference
#if difference < 0:
##print "Objective function rose", np.float(difference)
##If the objective function isn't rising, restart optimization
#step_size *= 0.8
##print "Reducing step-size to {ss:.3} and restarting optimization".format(ss=step_size)
##objective function isn't increasing, try reducing step size
#f = f_old.copy() #it's actually faster not to go back to old location and just zigzag across the mode
#old_obj = new_obj
#rs += 1
#else:
#update_passed = True
#old_Ki_f = self.Ki_f.copy()
#difference = abs(new_obj - old_obj)
#old_obj = new_obj.copy()
#difference = np.abs(np.sum(f - f_old))
difference = np.abs(np.sum(Ki_f - old_Ki_f))
old_Ki_f = Ki_f.copy()
i += 1
self.old_Ki_f = old_Ki_f.copy()
if difference > epsilon:
print "Not perfect f_hat fit difference: {}".format(difference)
self.Ki_f = Ki_f
return f

View file

@ -23,6 +23,7 @@ class likelihood(Parameterized):
"""
def __init__(self):
Parameterized.__init__(self)
self.dZ_dK = 0
def _get_params(self):
raise NotImplementedError
@ -33,11 +34,36 @@ class likelihood(Parameterized):
def _set_params(self, x):
raise NotImplementedError
def fit(self):
raise NotImplementedError
def fit_full(self, K):
"""
No approximations needed by default
"""
pass
def restart(self):
"""
No need to restart if not an approximation
"""
pass
def _gradients(self, partial):
raise NotImplementedError
def predictive_values(self, mu, var):
raise NotImplementedError
def log_predictive_density(self, y_test, mu_star, var_star):
"""
Calculation of the predictive density
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
:type mu_star: (Nx1) array
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
:type var_star: (Nx1) array
"""
raise NotImplementedError

View file

@ -4,9 +4,9 @@
import numpy as np
import noise_models
def binomial(gp_link=None):
def bernoulli(gp_link=None):
"""
Construct a binomial likelihood
Construct a bernoulli likelihood
:param gp_link: a GPy gp_link function
"""
@ -27,16 +27,17 @@ def binomial(gp_link=None):
analytical_mean = False
analytical_variance = False
return noise_models.binomial_noise.Binomial(gp_link,analytical_mean,analytical_variance)
return noise_models.bernoulli_noise.Bernoulli(gp_link,analytical_mean,analytical_variance)
def exponential(gp_link=None):
"""
Construct a binomial likelihood
Construct a exponential likelihood
:param gp_link: a GPy gp_link function
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Identity()
gp_link = noise_models.gp_transformations.Log_ex_1()
analytical_mean = False
analytical_variance = False
@ -85,4 +86,36 @@ def gamma(gp_link=None,beta=1.):
analytical_variance = False
return noise_models.gamma_noise.Gamma(gp_link,analytical_mean,analytical_variance,beta)
def gaussian(gp_link=None, variance=2, D=None, N=None):
"""
Construct a Gaussian likelihood
:param gp_link: a GPy gp_link function
:param variance: variance
:type variance: scalar
:returns: Gaussian noise model:
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Identity()
analytical_mean = True
analytical_variance = True # ?
return noise_models.gaussian_noise.Gaussian(gp_link, analytical_mean,
analytical_variance, variance=variance, D=D, N=N)
def student_t(gp_link=None, deg_free=5, sigma2=2):
"""
Construct a Student t likelihood
:param gp_link: a GPy gp_link function
:param deg_free: degrees of freedom of student-t
:type deg_free: scalar
:param sigma2: variance
:type sigma2: scalar
:returns: Student-T noise model
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Identity()
analytical_mean = True
analytical_variance = True
return noise_models.student_t_noise.StudentT(gp_link, analytical_mean,
analytical_variance,deg_free, sigma2)

View file

@ -1,7 +1,8 @@
import noise_distributions
import binomial_noise
import bernoulli_noise
import exponential_noise
import gaussian_noise
import gamma_noise
import poisson_noise
import student_t_noise
import gp_transformations

View file

@ -0,0 +1,216 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from noise_distributions import NoiseDistribution
class Bernoulli(NoiseDistribution):
"""
Bernoulli likelihood
.. math::
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
.. Note::
Y is expected to take values in {-1,1}
Probit likelihood usually used
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
super(Bernoulli, self).__init__(gp_link,analytical_mean,analytical_variance)
def _preprocess_values(self,Y):
"""
Check if the values of the observations correspond to the values
assumed by the likelihood function.
..Note:: Binary classification algorithm works better with classes {-1,1}
"""
Y_prep = Y.copy()
Y1 = Y[Y.flatten()==1].size
Y2 = Y[Y.flatten()==0].size
assert Y1 + Y2 == Y.size, 'Bernoulli likelihood is meant to be used only with outputs in {0,1}.'
Y_prep[Y.flatten() == 0] = -1
return Y_prep
def _moments_match_analytical(self,data_i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
if data_i == 1:
sign = 1.
elif data_i == 0:
sign = -1
else:
raise ValueError("bad value for Bernouilli observation (0,1)")
if isinstance(self.gp_link,gp_transformations.Probit):
z = sign*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = std_norm_cdf(z)
phi = std_norm_pdf(z)
mu_hat = v_i/tau_i + sign*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
elif isinstance(self.gp_link,gp_transformations.Heaviside):
a = sign*v_i/np.sqrt(tau_i)
Z_hat = std_norm_cdf(a)
N = std_norm_pdf(a)
mu_hat = v_i/tau_i + sign*N/Z_hat/np.sqrt(tau_i)
sigma2_hat = (1. - a*N/Z_hat - np.square(N/Z_hat))/tau_i
if np.any(np.isnan([Z_hat, mu_hat, sigma2_hat])):
stop
else:
raise ValueError("Exact moment matching not available for link {}".format(self.gp_link.gp_transformations.__name__))
return Z_hat, mu_hat, sigma2_hat
def _predictive_mean_analytical(self,mu,sigma):
if isinstance(self.gp_link,gp_transformations.Probit):
return stats.norm.cdf(mu/np.sqrt(1+sigma**2))
elif isinstance(self.gp_link,gp_transformations.Heaviside):
return stats.norm.cdf(mu/sigma)
else:
raise NotImplementedError
def _predictive_variance_analytical(self,mu,sigma, pred_mean):
if isinstance(self.gp_link,gp_transformations.Heaviside):
return 0.
else:
raise NotImplementedError
def pdf_link(self, link_f, y, extra_data=None):
"""
Likelihood function given link(f)
.. math::
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:returns: likelihood evaluated for this point
:rtype: float
.. Note:
Each y_i must be in {0,1}
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
objective = (link_f**y) * ((1.-link_f)**(1.-y))
return np.exp(np.sum(np.log(objective)))
def logpdf_link(self, link_f, y, extra_data=None):
"""
Log Likelihood function given link(f)
.. math::
\\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i})
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:returns: log likelihood evaluated at points link(f)
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
#objective = y*np.log(link_f) + (1.-y)*np.log(link_f)
objective = np.where(y==1, np.log(link_f), np.log(1-link_f))
return np.sum(objective)
def dlogpdf_dlink(self, link_f, y, extra_data=None):
"""
Gradient of the pdf at y, given link(f) w.r.t link(f)
.. math::
\\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:returns: gradient of log likelihood evaluated at points link(f)
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
grad = (y/link_f) - (1.-y)/(1-link_f)
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
"""
Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
.. math::
\\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2)
return d2logpdf_dlink2
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:returns: third derivative of log likelihood evaluated at points link(f)
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3))
return d3logpdf_dlink3
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
p = self.gp_link.transf(gp)
return p*(1.-p)
def samples(self, gp):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
ns = np.ones_like(gp, dtype=int)
Ysim = np.random.binomial(ns, self.gp_link.transf(gp))
return Ysim.reshape(orig_shape)

View file

@ -1,132 +0,0 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from noise_distributions import NoiseDistribution
class Binomial(NoiseDistribution):
"""
Probit likelihood
Y is expected to take values in {-1,1}
-----
$$
L(x) = \\Phi (Y_i*f_i)
$$
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
super(Binomial, self).__init__(gp_link,analytical_mean,analytical_variance)
def _preprocess_values(self,Y):
"""
Check if the values of the observations correspond to the values
assumed by the likelihood function.
..Note:: Binary classification algorithm works better with classes {-1,1}
"""
Y_prep = Y.copy()
Y1 = Y[Y.flatten()==1].size
Y2 = Y[Y.flatten()==0].size
assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.'
Y_prep[Y.flatten() == 0] = -1
return Y_prep
def _moments_match_analytical(self,data_i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
if isinstance(self.gp_link,gp_transformations.Probit):
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = std_norm_cdf(z)
phi = std_norm_pdf(z)
mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
elif isinstance(self.gp_link,gp_transformations.Heaviside):
a = data_i*v_i/np.sqrt(tau_i)
Z_hat = std_norm_cdf(a)
N = std_norm_pdf(a)
mu_hat = v_i/tau_i + data_i*N/Z_hat/np.sqrt(tau_i)
sigma2_hat = (1. - a*N/Z_hat - np.square(N/Z_hat))/tau_i
if np.any(np.isnan([Z_hat, mu_hat, sigma2_hat])):
stop
return Z_hat, mu_hat, sigma2_hat
def _predictive_mean_analytical(self,mu,sigma):
if isinstance(self.gp_link,gp_transformations.Probit):
return stats.norm.cdf(mu/np.sqrt(1+sigma**2))
elif isinstance(self.gp_link,gp_transformations.Heaviside):
return stats.norm.cdf(mu/sigma)
else:
raise NotImplementedError
def _predictive_variance_analytical(self,mu,sigma, pred_mean):
if isinstance(self.gp_link,gp_transformations.Heaviside):
return 0.
else:
raise NotImplementedError
def _mass(self,gp,obs):
#NOTE obs must be in {0,1}
p = self.gp_link.transf(gp)
return p**obs * (1.-p)**(1.-obs)
def _nlog_mass(self,gp,obs):
p = self.gp_link.transf(gp)
return obs*np.log(p) + (1.-obs)*np.log(1-p)
def _dnlog_mass_dgp(self,gp,obs):
p = self.gp_link.transf(gp)
dp = self.gp_link.dtransf_df(gp)
return obs/p * dp - (1.-obs)/(1.-p) * dp
def _d2nlog_mass_dgp2(self,gp,obs):
p = self.gp_link.transf(gp)
return (obs/p + (1.-obs)/(1.-p))*self.gp_link.d2transf_df2(gp) + ((1.-obs)/(1.-p)**2-obs/p**2)*self.gp_link.dtransf_df(gp)
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _dmean_dgp(self,gp):
return self.gp_link.dtransf_df(gp)
def _d2mean_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
p = self.gp_link.transf(gp)
return p*(1.-p)
def _dvariance_dgp(self,gp):
return self.gp_link.dtransf_df(gp)*(1. - 2.*self.gp_link.transf(gp))
def _d2variance_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)*(1. - 2.*self.gp_link.transf(gp)) - 2*self.gp_link.dtransf_df(gp)**2
def samples(self, gp):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param size: number of samples to compute
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
Ysim = np.array([np.random.binomial(1,self.gp_link.transf(gpj),size=1) for gpj in gp])
return Ysim.reshape(orig_shape)

View file

@ -24,24 +24,113 @@ class Exponential(NoiseDistribution):
def _preprocess_values(self,Y):
return Y
def _mass(self,gp,obs):
def pdf_link(self, link_f, y, extra_data=None):
"""
Mass (or density) function
"""
return np.exp(-obs/self.gp_link.transf(gp))/self.gp_link.transf(gp)
Likelihood function given link(f)
def _nlog_mass(self,gp,obs):
"""
Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
"""
return obs/self.gp_link.transf(gp) + np.log(self.gp_link.transf(gp))
.. math::
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})\\exp (-y\\lambda(f_{i}))
def _dnlog_mass_dgp(self,gp,obs):
return ( 1./self.gp_link.transf(gp) - obs/self.gp_link.transf(gp)**2) * self.gp_link.dtransf_df(gp)
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
log_objective = link_f*np.exp(-y*link_f)
return np.exp(np.sum(np.log(log_objective)))
#return np.exp(np.sum(-y/link_f - np.log(link_f) ))
def _d2nlog_mass_dgp2(self,gp,obs):
fgp = self.gp_link.transf(gp)
return (2*obs/fgp**3 - 1./fgp**2) * self.gp_link.dtransf_df(gp)**2 + ( 1./fgp - obs/fgp**2) * self.gp_link.d2transf_df2(gp)
def logpdf_link(self, link_f, y, extra_data=None):
"""
Log Likelihood Function given link(f)
.. math::
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\lambda(f_{i}) - y_{i}\\lambda(f_{i})
:param link_f: latent variables (link(f))
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
log_objective = np.log(link_f) - y*link_f
#logpdf_link = np.sum(-np.log(link_f) - y/link_f)
return np.sum(log_objective)
def dlogpdf_dlink(self, link_f, y, extra_data=None):
"""
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
.. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\lambda(f)} - y_{i}
:param link_f: latent variables (f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
grad = 1./link_f - y
#grad = y/(link_f**2) - 1./link_f
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\frac{1}{\\lambda(f_{i})^{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
hess = -1./(link_f**2)
#hess = -2*y/(link_f**3) + 1/(link_f**2)
return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2}{\\lambda(f_{i})^{3}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d3lik_dlink3 = 2./(link_f**3)
#d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
return d3lik_dlink3
def _mean(self,gp):
"""
@ -49,20 +138,19 @@ class Exponential(NoiseDistribution):
"""
return self.gp_link.transf(gp)
def _dmean_dgp(self,gp):
return self.gp_link.dtransf_df(gp)
def _d2mean_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)**2
def _dvariance_dgp(self,gp):
return 2*self.gp_link.transf(gp)*self.gp_link.dtransf_df(gp)
def samples(self, gp):
"""
Returns a set of samples of observations based on a given value of the latent variable.
def _d2variance_dgp2(self,gp):
return 2 * (self.gp_link.dtransf_df(gp)**2 + self.gp_link.transf(gp)*self.gp_link.d2transf_df2(gp))
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
Ysim = np.random.exponential(1.0/self.gp_link.transf(gp))
return Ysim.reshape(orig_shape)

View file

@ -12,11 +12,11 @@ from noise_distributions import NoiseDistribution
class Gamma(NoiseDistribution):
"""
Gamma likelihood
Y is expected to take values in {0,1,2,...}
-----
$$
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
$$
.. math::
p(y_{i}|\\lambda(f_{i})) = \\frac{\\beta^{\\alpha_{i}}}{\\Gamma(\\alpha_{i})}y_{i}^{\\alpha_{i}-1}e^{-\\beta y_{i}}\\\\
\\alpha_{i} = \\beta y_{i}
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.):
self.beta = beta
@ -25,26 +25,122 @@ class Gamma(NoiseDistribution):
def _preprocess_values(self,Y):
return Y
def _mass(self,gp,obs):
def pdf_link(self, link_f, y, extra_data=None):
"""
Mass (or density) function
Likelihood function given link(f)
.. math::
p(y_{i}|\\lambda(f_{i})) = \\frac{\\beta^{\\alpha_{i}}}{\\Gamma(\\alpha_{i})}y_{i}^{\\alpha_{i}-1}e^{-\\beta y_{i}}\\\\
\\alpha_{i} = \\beta y_{i}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
#return stats.gamma.pdf(obs,a = self.gp_link.transf(gp)/self.variance,scale=self.variance)
alpha = self.gp_link.transf(gp)*self.beta
return obs**(alpha - 1.) * np.exp(-self.beta*obs) * self.beta**alpha / special.gamma(alpha)
alpha = link_f*self.beta
objective = (y**(alpha - 1.) * np.exp(-self.beta*y) * self.beta**alpha)/ special.gamma(alpha)
return np.exp(np.sum(np.log(objective)))
def _nlog_mass(self,gp,obs):
def logpdf_link(self, link_f, y, extra_data=None):
"""
Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
Log Likelihood Function given link(f)
.. math::
\\ln p(y_{i}|\lambda(f_{i})) = \\alpha_{i}\\log \\beta - \\log \\Gamma(\\alpha_{i}) + (\\alpha_{i} - 1)\\log y_{i} - \\beta y_{i}\\\\
\\alpha_{i} = \\beta y_{i}
:param link_f: latent variables (link(f))
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
alpha = self.gp_link.transf(gp)*self.beta
return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha))
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
#alpha = self.gp_link.transf(gp)*self.beta
#return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha))
alpha = link_f*self.beta
log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y
return np.sum(log_objective)
def _dnlog_mass_dgp(self,gp,obs):
return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
def dlogpdf_dlink(self, link_f, y, extra_data=None):
"""
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
def _d2nlog_mass_dgp2(self,gp,obs):
return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
.. math::
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\beta (\\log \\beta y_{i}) - \\Psi(\\alpha_{i})\\beta\\\\
\\alpha_{i} = \\beta y_{i}
:param link_f: latent variables (f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in gamma distribution
:returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
grad = self.beta*np.log(self.beta*y) - special.psi(self.beta*link_f)*self.beta
#old
#return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\beta^{2}\\frac{d\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
\\alpha_{i} = \\beta y_{i}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in gamma distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
hess = -special.polygamma(1, self.beta*link_f)*(self.beta**2)
#old
#return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = -\\beta^{3}\\frac{d^{2}\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
\\alpha_{i} = \\beta y_{i}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in gamma distribution
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3)
return d3lik_dlink3
def _mean(self,gp):
"""
@ -52,20 +148,8 @@ class Gamma(NoiseDistribution):
"""
return self.gp_link.transf(gp)
def _dmean_dgp(self,gp):
return self.gp_link.dtransf_df(gp)
def _d2mean_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)/self.beta
def _dvariance_dgp(self,gp):
return self.gp_link.dtransf_df(gp)/self.beta
def _d2variance_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)/self.beta

View file

@ -12,11 +12,17 @@ class Gaussian(NoiseDistribution):
"""
Gaussian likelihood
:param mean: mean value of the Gaussian distribution
:param variance: mean value of the Gaussian distribution
.. math::
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
:param variance: variance value of the Gaussian distribution
:param N: Number of data points
:type N: int
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,variance=1.):
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,variance=1., D=None, N=None):
self.variance = variance
self.N = N
self._set_params(np.asarray(variance))
super(Gaussian, self).__init__(gp_link,analytical_mean,analytical_variance)
def _get_params(self):
@ -25,8 +31,13 @@ class Gaussian(NoiseDistribution):
def _get_param_names(self):
return ['noise_model_variance']
def _set_params(self,p):
self.variance = p
def _set_params(self, p):
self.variance = float(p)
self.I = np.eye(self.N)
self.covariance_matrix = self.I * self.variance
self.Ki = self.I*(1.0 / self.variance)
#self.ln_det_K = np.sum(np.log(np.diag(self.covariance_matrix)))
self.ln_det_K = self.N*np.log(self.variance)
def _gradients(self,partial):
return np.zeros(1)
@ -57,42 +68,231 @@ class Gaussian(NoiseDistribution):
new_sigma2 = self.predictive_variance(mu,sigma)
return new_sigma2*(mu/sigma**2 + self.gp_link.transf(mu)/self.variance)
def _predictive_variance_analytical(self,mu,sigma):
def _predictive_variance_analytical(self,mu,sigma,predictive_mean=None):
return 1./(1./self.variance + 1./sigma**2)
def _mass(self,gp,obs):
#return std_norm_pdf( (self.gp_link.transf(gp)-obs)/np.sqrt(self.variance) )
return stats.norm.pdf(obs,self.gp_link.transf(gp),np.sqrt(self.variance))
def _mass(self, link_f, y, extra_data=None):
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
Please negate your function and use pdf in noise_model.py, if implementing a likelihood\
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
its derivatives")
def _nlog_mass(self, link_f, y, extra_data=None):
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
its derivatives")
def _nlog_mass(self,gp,obs):
return .5*((self.gp_link.transf(gp)-obs)**2/self.variance + np.log(2.*np.pi*self.variance))
def _dnlog_mass_dgp(self, link_f, y, extra_data=None):
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
Please negate your function and use dlogpdf_df in noise_model.py, if implementing a likelihood\
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
its derivatives")
def _dnlog_mass_dgp(self,gp,obs):
return (self.gp_link.transf(gp)-obs)/self.variance * self.gp_link.dtransf_df(gp)
def _d2nlog_mass_dgp2(self, link_f, y, extra_data=None):
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
Please negate your function and use d2logpdf_df2 in noise_model.py, if implementing a likelihood\
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
its derivatives")
def _d2nlog_mass_dgp2(self,gp,obs):
return ((self.gp_link.transf(gp)-obs)*self.gp_link.d2transf_df2(gp) + self.gp_link.dtransf_df(gp)**2)/self.variance
def pdf_link(self, link_f, y, extra_data=None):
"""
Likelihood function given link(f)
.. math::
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: likelihood evaluated for this point
:rtype: float
"""
#Assumes no covariance, exp, sum, log for numerical stability
return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance)))))
def logpdf_link(self, link_f, y, extra_data=None):
"""
Log likelihood function given link(f)
.. math::
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: log likelihood evaluated for this point
:rtype: float
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
return -0.5*(np.sum((y-link_f)**2/self.variance) + self.ln_det_K + self.N*np.log(2.*np.pi))
def dlogpdf_dlink(self, link_f, y, extra_data=None):
"""
Gradient of the pdf at y, given link(f) w.r.t link(f)
.. math::
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\sigma^{2}}(y_{i} - \\lambda(f_{i}))
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: gradient of log likelihood evaluated at points link(f)
:rtype: Nx1 array
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
s2_i = (1.0/self.variance)
grad = s2_i*y - s2_i*link_f
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
"""
Hessian at y, given link_f, w.r.t link_f.
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}f} = -\\frac{1}{\\sigma^{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
hess = -(1.0/self.variance)*np.ones((self.N, 1))
return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = 0
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: third derivative of log likelihood evaluated at points link(f)
:rtype: Nx1 array
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
d3logpdf_dlink3 = np.diagonal(0*self.I)[:, None]
return d3logpdf_dlink3
def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
"""
Gradient of the log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance)
.. math::
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\sigma^{2}} = -\\frac{N}{2\\sigma^{2}} + \\frac{(y_{i} - \\lambda(f_{i}))^{2}}{2\\sigma^{4}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: float
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
e = y - link_f
s_4 = 1.0/(self.variance**2)
dlik_dsigma = -0.5*self.N/self.variance + 0.5*s_4*np.sum(np.square(e))
return np.sum(dlik_dsigma) # Sure about this sum?
def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
"""
Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance)
.. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)}) = \\frac{1}{\\sigma^{4}}(-y_{i} + \\lambda(f_{i}))
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: Nx1 array
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
s_4 = 1.0/(self.variance**2)
dlik_grad_dsigma = -s_4*y + s_4*link_f
return dlik_grad_dsigma
def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None):
"""
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance)
.. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)}) = \\frac{1}{\\sigma^{4}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter
:rtype: Nx1 array
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
s_4 = 1.0/(self.variance**2)
d2logpdf_dlink2_dvar = np.diag(s_4*self.I)[:, None]
return d2logpdf_dlink2_dvar
def dlogpdf_link_dtheta(self, f, y, extra_data=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
return np.asarray([[dlogpdf_dvar]])
def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
return dlogpdf_dlink_dvar
def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
return d2logpdf_dlink2_dvar
def _mean(self,gp):
"""
Mass (or density) function
Expected value of y under the Mass (or density) function p(y|f)
.. math::
E_{p(y|f)}[y]
"""
return self.gp_link.transf(gp)
def _dmean_dgp(self,gp):
return self.gp_link.dtransf_df(gp)
def _d2mean_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)
def _variance(self,gp):
"""
Mass (or density) function
Variance of y under the Mass (or density) function p(y|f)
.. math::
Var_{p(y|f)}[y]
"""
return self.variance
def _dvariance_dgp(self,gp):
return 0
def samples(self, gp):
"""
Returns a set of samples of observations based on a given value of the latent variable.
def _d2variance_dgp2(self,gp):
return 0
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp])
return Ysim.reshape(orig_shape)

View file

@ -24,19 +24,25 @@ class GPTransformation(object):
"""
Gaussian process tranformation function, latent space -> output space
"""
pass
raise NotImplementedError
def dtransf_df(self,f):
"""
derivative of transf(f) w.r.t. f
"""
pass
raise NotImplementedError
def d2transf_df2(self,f):
"""
second derivative of transf(f) w.r.t. f
"""
pass
raise NotImplementedError
def d3transf_df3(self,f):
"""
third derivative of transf(f) w.r.t. f
"""
raise NotImplementedError
class Identity(GPTransformation):
"""
@ -49,10 +55,13 @@ class Identity(GPTransformation):
return f
def dtransf_df(self,f):
return 1.
return np.ones_like(f)
def d2transf_df2(self,f):
return 0
return np.zeros_like(f)
def d3transf_df3(self,f):
return np.zeros_like(f)
class Probit(GPTransformation):
@ -71,6 +80,10 @@ class Probit(GPTransformation):
def d2transf_df2(self,f):
return -f * std_norm_pdf(f)
def d3transf_df3(self,f):
f2 = f**2
return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2)
class Log(GPTransformation):
"""
.. math::
@ -87,6 +100,9 @@ class Log(GPTransformation):
def d2transf_df2(self,f):
return np.exp(f)
def d3transf_df3(self,f):
return np.exp(f)
class Log_ex_1(GPTransformation):
"""
.. math::
@ -104,15 +120,23 @@ class Log_ex_1(GPTransformation):
aux = np.exp(f)/(1.+np.exp(f))
return aux*(1.-aux)
def d3transf_df3(self,f):
aux = np.exp(f)/(1.+np.exp(f))
daux_df = aux*(1.-aux)
return daux_df - (2.*aux*daux_df)
class Reciprocal(GPTransformation):
def transf(sefl,f):
def transf(self,f):
return 1./f
def dtransf_df(self,f):
return -1./f**2
return -1./(f**2)
def d2transf_df2(self,f):
return 2./f**3
return 2./(f**3)
def d3transf_df3(self,f):
return -6./(f**4)
class Heaviside(GPTransformation):
"""

View file

@ -9,15 +9,12 @@ import pylab as pb
from GPy.util.plot import gpplot
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from GPy.util.misc import chain_1, chain_2, chain_3
from scipy.integrate import quad
class NoiseDistribution(object):
"""
Likelihood class for doing Expectation propagation
:param Y: observed output (Nx1 numpy.darray)
.. note:: Y values allowed depend on the LikelihoodFunction used
Likelihood class for doing approximations
"""
def __init__(self,gp_link,analytical_mean=False,analytical_variance=False):
assert isinstance(gp_link,gp_transformations.GPTransformation), "gp_link is not a valid GPTransformation."
@ -35,6 +32,8 @@ class NoiseDistribution(object):
else:
self.predictive_variance = self._predictive_variance_numerical
self.log_concave = True
def _get_params(self):
return np.zeros(0)
@ -57,369 +56,379 @@ class NoiseDistribution(object):
"""
return Y
def _product(self,gp,obs,mu,sigma):
"""
Product between the cavity distribution and a likelihood factor.
:param gp: latent variable
:param obs: observed output
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._mass(gp,obs)
def _nlog_product_scaled(self,gp,obs,mu,sigma):
"""
Negative log-product between the cavity distribution and a likelihood factor.
.. note:: The constant term in the Gaussian distribution is ignored.
:param gp: latent variable
:param obs: observed output
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return .5*((gp-mu)/sigma)**2 + self._nlog_mass(gp,obs)
def _dnlog_product_dgp(self,gp,obs,mu,sigma):
"""
Derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor.
:param gp: latent variable
:param obs: observed output
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return (gp - mu)/sigma**2 + self._dnlog_mass_dgp(gp,obs)
def _d2nlog_product_dgp2(self,gp,obs,mu,sigma):
"""
Second derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor.
:param gp: latent variable
:param obs: observed output
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return 1./sigma**2 + self._d2nlog_mass_dgp2(gp,obs)
def _product_mode(self,obs,mu,sigma):
"""
Newton's CG method to find the mode in _product (cavity x likelihood factor).
:param obs: observed output
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return sp.optimize.fmin_ncg(self._nlog_product_scaled,x0=mu,fprime=self._dnlog_product_dgp,fhess=self._d2nlog_product_dgp2,args=(obs,mu,sigma),disp=False)
def _moments_match_analytical(self,obs,tau,v):
"""
If available, this function computes the moments analytically.
"""
pass
raise NotImplementedError
def log_predictive_density(self, y_test, mu_star, var_star):
"""
Calculation of the log predictive density
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
:type mu_star: (Nx1) array
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
:type var_star: (Nx1) array
"""
assert y_test.shape==mu_star.shape
assert y_test.shape==var_star.shape
assert y_test.shape[1] == 1
def integral_generator(y, m, v):
"""Generate a function which can be integrated to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*"""
def f(f_star):
return self.pdf(f_star, y)*np.exp(-(1./(2*v))*np.square(m-f_star))
return f
scaled_p_ystar, accuracy = zip(*[quad(integral_generator(y, m, v), -np.inf, np.inf) for y, m, v in zip(y_test.flatten(), mu_star.flatten(), var_star.flatten())])
scaled_p_ystar = np.array(scaled_p_ystar).reshape(-1,1)
p_ystar = scaled_p_ystar/np.sqrt(2*np.pi*var_star)
return np.log(p_ystar)
def _moments_match_numerical(self,obs,tau,v):
"""
Lapace approximation to calculate the moments.
Calculation of moments using quadrature
:param obs: observed output
:param tau: cavity distribution 1st natural parameter (precision)
:param v: cavity distribution 2nd natural paramenter (mu*precision)
"""
#Compute first integral for zeroth moment
mu = v/tau
mu_hat = self._product_mode(obs,mu,np.sqrt(1./tau))
sigma2_hat = 1./(tau + self._d2nlog_mass_dgp2(mu_hat,obs))
Z_hat = np.exp(-.5*tau*(mu_hat-mu)**2) * self._mass(mu_hat,obs)*np.sqrt(tau*sigma2_hat)
return Z_hat,mu_hat,sigma2_hat
def int_1(f):
return self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
z, accuracy = quad(int_1, -np.inf, np.inf)
z /= np.sqrt(2*np.pi/tau)
def _nlog_conditional_mean_scaled(self,gp,mu,sigma):
"""
Negative logarithm of the l.v.'s predictive distribution times the output's mean given the l.v.
#Compute second integral for first moment
def int_2(f):
return f*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
mean, accuracy = quad(int_2, -np.inf, np.inf)
mean /= np.sqrt(2*np.pi/tau)
mean /= z
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
#Compute integral for variance
def int_3(f):
return (f**2)*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
Ef2, accuracy = quad(int_3, -np.inf, np.inf)
Ef2 /= np.sqrt(2*np.pi/tau)
Ef2 /= z
variance = Ef2 - mean**2
.. note:: This function helps computing E(Y_star) = E(E(Y_star|f_star))
"""
return .5*((gp - mu)/sigma)**2 - np.log(self._mean(gp))
def _dnlog_conditional_mean_dgp(self,gp,mu,sigma):
"""
Derivative of _nlog_conditional_mean_scaled wrt. l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return (gp - mu)/sigma**2 - self._dmean_dgp(gp)/self._mean(gp)
def _d2nlog_conditional_mean_dgp2(self,gp,mu,sigma):
"""
Second derivative of _nlog_conditional_mean_scaled wrt. l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return 1./sigma**2 - self._d2mean_dgp2(gp)/self._mean(gp) + (self._dmean_dgp(gp)/self._mean(gp))**2
def _nlog_exp_conditional_variance_scaled(self,gp,mu,sigma):
"""
Negative logarithm of the l.v.'s predictive distribution times the output's variance given the l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
.. note:: This function helps computing E(V(Y_star|f_star))
"""
return .5*((gp - mu)/sigma)**2 - np.log(self._variance(gp))
def _dnlog_exp_conditional_variance_dgp(self,gp,mu,sigma):
"""
Derivative of _nlog_exp_conditional_variance_scaled wrt. l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return (gp - mu)/sigma**2 - self._dvariance_dgp(gp)/self._variance(gp)
def _d2nlog_exp_conditional_variance_dgp2(self,gp,mu,sigma):
"""
Second derivative of _nlog_exp_conditional_variance_scaled wrt. l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return 1./sigma**2 - self._d2variance_dgp2(gp)/self._variance(gp) + (self._dvariance_dgp(gp)/self._variance(gp))**2
def _nlog_exp_conditional_mean_sq_scaled(self,gp,mu,sigma):
"""
Negative logarithm of the l.v.'s predictive distribution times the output's mean squared given the l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
.. note:: This function helps computing E( E(Y_star|f_star)**2 )
"""
return .5*((gp - mu)/sigma)**2 - 2*np.log(self._mean(gp))
def _dnlog_exp_conditional_mean_sq_dgp(self,gp,mu,sigma):
"""
Derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return (gp - mu)/sigma**2 - 2*self._dmean_dgp(gp)/self._mean(gp)
def _d2nlog_exp_conditional_mean_sq_dgp2(self,gp,mu,sigma):
"""
Second derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return 1./sigma**2 - 2*( self._d2mean_dgp2(gp)/self._mean(gp) - (self._dmean_dgp(gp)/self._mean(gp))**2 )
return z, mean, variance
def _predictive_mean_analytical(self,mu,sigma):
"""
Predictive mean
.. math::
E(Y^{*}|Y) = E( E(Y^{*}|f^{*}, Y) )
If available, this function computes the predictive mean analytically.
"""
pass
raise NotImplementedError
def _predictive_variance_analytical(self,mu,sigma):
"""
Predictive variance
.. math::
V(Y^{*}| Y) = E( V(Y^{*}|f^{*}, Y) ) + V( E(Y^{*}|f^{*}, Y) )
If available, this function computes the predictive variance analytically.
"""
pass
raise NotImplementedError
def _predictive_mean_numerical(self,mu,sigma):
"""
Laplace approximation to the predictive mean: E(Y_star) = E( E(Y_star|f_star) )
Quadrature calculation of the predictive mean: E(Y_star|Y) = E( E(Y_star|f_star, Y) )
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
:param mu: mean of posterior
:param sigma: standard deviation of posterior
"""
maximum = sp.optimize.fmin_ncg(self._nlog_conditional_mean_scaled,x0=self._mean(mu),fprime=self._dnlog_conditional_mean_dgp,fhess=self._d2nlog_conditional_mean_dgp2,args=(mu,sigma),disp=False)
mean = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma))*sigma)
"""
#FIXME: Quadrature does not work!
raise NotImplementedError
sigma2 = sigma**2
#Compute first moment
def int_mean(f):
return self._mean(f)*np.exp(-(0.5/sigma2)*np.square(f - mu))
scaled_mean, accuracy = quad(int_mean, -np.inf, np.inf)
mean = scaled_mean / np.sqrt(2*np.pi*(sigma2))
pb.figure()
x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)])
f = np.array([np.exp(-self._nlog_conditional_mean_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x])
pb.plot(x,f,'b-')
sigma2 = 1./self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma)
f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2)
k = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2)
pb.plot(x,f2*mean,'r-')
pb.vlines(maximum,0,f.max())
"""
return mean
def _predictive_mean_sq(self,mu,sigma):
"""
Laplace approximation to the predictive mean squared: E(Y_star**2) = E( E(Y_star|f_star)**2 )
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_mean_sq_scaled,x0=self._mean(mu),fprime=self._dnlog_exp_conditional_mean_sq_dgp,fhess=self._d2nlog_exp_conditional_mean_sq_dgp2,args=(mu,sigma),disp=False)
mean_squared = np.exp(-self._nlog_exp_conditional_mean_sq_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_mean_sq_dgp2(maximum,mu,sigma))*sigma)
return mean_squared
def _predictive_variance_numerical(self,mu,sigma,predictive_mean=None):
"""
Laplace approximation to the predictive variance: V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
:param mu: mean of posterior
:param sigma: standard deviation of posterior
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
"""
sigma2 = sigma**2
normalizer = np.sqrt(2*np.pi*sigma2)
# E( V(Y_star|f_star) )
maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_variance_scaled,x0=self._variance(mu),fprime=self._dnlog_exp_conditional_variance_dgp,fhess=self._d2nlog_exp_conditional_variance_dgp2,args=(mu,sigma),disp=False)
exp_var = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma))*sigma)
#Compute expected value of variance
def int_var(f):
return self._variance(f)*np.exp(-(0.5/sigma2)*np.square(f - mu))
scaled_exp_variance, accuracy = quad(int_var, -np.inf, np.inf)
exp_var = scaled_exp_variance / normalizer
"""
pb.figure()
x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)])
f = np.array([np.exp(-self._nlog_exp_conditional_variance_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x])
pb.plot(x,f,'b-')
sigma2 = 1./self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma)
f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2)
k = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2)
pb.plot(x,f2*exp_var,'r--')
pb.vlines(maximum,0,f.max())
"""
#V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star)**2 )
exp_exp2 = self._predictive_mean_sq(mu,sigma)
#V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star) )**2
if predictive_mean is None:
predictive_mean = self.predictive_mean(mu,sigma)
predictive_mean_sq = predictive_mean**2
def int_pred_mean_sq(f):
return predictive_mean_sq*np.exp(-(0.5/(sigma2))*np.square(f - mu))
scaled_exp_exp2, accuracy = quad(int_pred_mean_sq, -np.inf, np.inf)
exp_exp2 = scaled_exp_exp2 / normalizer
var_exp = exp_exp2 - predictive_mean**2
# V(Y_star | f_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
return exp_var + var_exp
def _predictive_percentiles(self,p,mu,sigma):
def pdf_link(self, link_f, y, extra_data=None):
raise NotImplementedError
def logpdf_link(self, link_f, y, extra_data=None):
raise NotImplementedError
def dlogpdf_dlink(self, link_f, y, extra_data=None):
raise NotImplementedError
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
raise NotImplementedError
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
raise NotImplementedError
def dlogpdf_link_dtheta(self, link_f, y, extra_data=None):
raise NotImplementedError
def dlogpdf_dlink_dtheta(self, link_f, y, extra_data=None):
raise NotImplementedError
def d2logpdf_dlink2_dtheta(self, link_f, y, extra_data=None):
raise NotImplementedError
def pdf(self, f, y, extra_data=None):
"""
Percentiles of the predictive distribution
Evaluates the link function link(f) then computes the likelihood (pdf) using it
:parm p: lower tail probability
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
.. math:
p(y|\\lambda(f))
:param f: latent variables f
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: likelihood evaluated for this point
:rtype: float
"""
qf = stats.norm.ppf(p,mu,sigma)
return self.gp_link.transf(qf)
link_f = self.gp_link.transf(f)
return self.pdf_link(link_f, y, extra_data=extra_data)
def _nlog_joint_predictive_scaled(self,x,mu,sigma):
def logpdf(self, f, y, extra_data=None):
"""
Negative logarithm of the joint predictive distribution (latent variable and output).
Evaluates the link function link(f) then computes the log likelihood (log pdf) using it
:param x: tuple (latent variable,output)
:param mu: latent variable's predictive mean
:param sigma: latent variable's predictive standard deviation
.. math:
\\log p(y|\\lambda(f))
:param f: latent variables f
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: log likelihood evaluated for this point
:rtype: float
"""
return self._nlog_product_scaled(x[0],x[1],mu,sigma)
link_f = self.gp_link.transf(f)
return self.logpdf_link(link_f, y, extra_data=extra_data)
def _gradient_nlog_joint_predictive(self,x,mu,sigma):
def dlogpdf_df(self, f, y, extra_data=None):
"""
Gradient of _nlog_joint_predictive_scaled.
Evaluates the link function link(f) then computes the derivative of log likelihood using it
Uses the Faa di Bruno's formula for the chain rule
:param x: tuple (latent variable,output)
:param mu: latent variable's predictive mean
:param sigma: latent variable's predictive standard deviation
.. note: Only available when the output is continuous
.. math::
\\frac{d\\log p(y|\\lambda(f))}{df} = \\frac{d\\log p(y|\\lambda(f))}{d\\lambda(f)}\\frac{d\\lambda(f)}{df}
:param f: latent variables f
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: derivative of log likelihood evaluated for this point
:rtype: 1xN array
"""
assert not self.discrete, "Gradient not available for discrete outputs."
return np.array((self._dnlog_product_dgp(gp=x[0],obs=x[1],mu=mu,sigma=sigma),self._dnlog_mass_dobs(obs=x[1],gp=x[0])))
link_f = self.gp_link.transf(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
dlink_df = self.gp_link.dtransf_df(f)
return chain_1(dlogpdf_dlink, dlink_df)
def _hessian_nlog_joint_predictive(self,x,mu,sigma):
def d2logpdf_df2(self, f, y, extra_data=None):
"""
Hessian of _nlog_joint_predictive_scaled.
Evaluates the link function link(f) then computes the second derivative of log likelihood using it
Uses the Faa di Bruno's formula for the chain rule
:param x: tuple (latent variable,output)
:param mu: latent variable's predictive mean
:param sigma: latent variable's predictive standard deviation
.. note: Only available when the output is continuous
.. math::
\\frac{d^{2}\\log p(y|\\lambda(f))}{df^{2}} = \\frac{d^{2}\\log p(y|\\lambda(f))}{d^{2}\\lambda(f)}\\left(\\frac{d\\lambda(f)}{df}\\right)^{2} + \\frac{d\\log p(y|\\lambda(f))}{d\\lambda(f)}\\frac{d^{2}\\lambda(f)}{df^{2}}
:param f: latent variables f
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: second derivative of log likelihood evaluated for this point (diagonal only)
:rtype: 1xN array
"""
assert not self.discrete, "Hessian not available for discrete outputs."
cross_derivative = self._d2nlog_mass_dcross(gp=x[0],obs=x[1])
return np.array((self._d2nlog_product_dgp2(gp=x[0],obs=x[1],mu=mu,sigma=sigma),cross_derivative,cross_derivative,self._d2nlog_mass_dobs2(obs=x[1],gp=x[0]))).reshape(2,2)
link_f = self.gp_link.transf(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
d2link_df2 = self.gp_link.d2transf_df2(f)
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
def _joint_predictive_mode(self,mu,sigma):
def d3logpdf_df3(self, f, y, extra_data=None):
"""
Negative logarithm of the joint predictive distribution (latent variable and output).
Evaluates the link function link(f) then computes the third derivative of log likelihood using it
Uses the Faa di Bruno's formula for the chain rule
:param x: tuple (latent variable,output)
:param mu: latent variable's predictive mean
:param sigma: latent variable's predictive standard deviation
.. math::
\\frac{d^{3}\\log p(y|\\lambda(f))}{df^{3}} = \\frac{d^{3}\\log p(y|\\lambda(f)}{d\\lambda(f)^{3}}\\left(\\frac{d\\lambda(f)}{df}\\right)^{3} + 3\\frac{d^{2}\\log p(y|\\lambda(f)}{d\\lambda(f)^{2}}\\frac{d\\lambda(f)}{df}\\frac{d^{2}\\lambda(f)}{df^{2}} + \\frac{d\\log p(y|\\lambda(f)}{d\\lambda(f)}\\frac{d^{3}\\lambda(f)}{df^{3}}
:param f: latent variables f
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: third derivative of log likelihood evaluated for this point
:rtype: float
"""
return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.gp_link.transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma),disp=False)
link_f = self.gp_link.transf(f)
d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, extra_data=extra_data)
dlink_df = self.gp_link.dtransf_df(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
d2link_df2 = self.gp_link.d2transf_df2(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
d3link_df3 = self.gp_link.d3transf_df3(f)
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
def predictive_values(self,mu,var):
def dlogpdf_dtheta(self, f, y, extra_data=None):
"""
TODO: Doc strings
"""
if len(self._get_param_names()) > 0:
link_f = self.gp_link.transf(f)
return self.dlogpdf_link_dtheta(link_f, y, extra_data=extra_data)
else:
#Is no parameters so return an empty array for its derivatives
return np.empty([1, 0])
def dlogpdf_df_dtheta(self, f, y, extra_data=None):
"""
TODO: Doc strings
"""
if len(self._get_param_names()) > 0:
link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
return chain_1(dlogpdf_dlink_dtheta, dlink_df)
else:
#Is no parameters so return an empty array for its derivatives
return np.empty([f.shape[0], 0])
def d2logpdf_df2_dtheta(self, f, y, extra_data=None):
"""
TODO: Doc strings
"""
if len(self._get_param_names()) > 0:
link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f)
d2link_df2 = self.gp_link.d2transf_df2(f)
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, extra_data=extra_data)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
else:
#Is no parameters so return an empty array for its derivatives
return np.empty([f.shape[0], 0])
def _laplace_gradients(self, f, y, extra_data=None):
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, extra_data=extra_data)
dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, extra_data=extra_data)
d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, extra_data=extra_data)
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
# ensure we have gradients for every parameter we want to optimize
assert dlogpdf_dtheta.shape[1] == len(self._get_param_names())
assert dlogpdf_df_dtheta.shape[1] == len(self._get_param_names())
assert d2logpdf_df2_dtheta.shape[1] == len(self._get_param_names())
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
def predictive_values(self, mu, var, full_cov=False, num_samples=30000,
sampling=False):
"""
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction.
:param mu: mean of the latent variable
:param var: variance of the latent variable
:param mu: mean of the latent variable, f, of posterior
:param var: variance of the latent variable, f, of posterior
:param full_cov: whether to use the full covariance or just the diagonal
:type full_cov: Boolean
:param num_samples: number of samples to use in computing quantiles and
possibly mean variance
:type num_samples: integer
:param sampling: Whether to use samples for mean and variances anyway
:type sampling: Boolean
"""
if isinstance(mu,float) or isinstance(mu,int):
mu = [mu]
var = [var]
pred_mean = []
pred_var = []
q1 = []
q3 = []
for m,s in zip(mu,np.sqrt(var)):
pred_mean.append(self.predictive_mean(m,s))
pred_var.append(self.predictive_variance(m,s,pred_mean[-1]))
q1.append(self._predictive_percentiles(.025,m,s))
q3.append(self._predictive_percentiles(.975,m,s))
#Get gp_samples f* using posterior mean and variance
if not full_cov:
gp_samples = np.random.multivariate_normal(mu.flatten(), np.diag(var.flatten()),
size=num_samples).T
else:
gp_samples = np.random.multivariate_normal(mu.flatten(), var,
size=num_samples).T
#Push gp samples (f*) through likelihood to give p(y*|f*)
samples = self.samples(gp_samples)
axis=-1
if self.analytical_mean and not sampling:
pred_mean = self.predictive_mean(mu, np.sqrt(var))
else:
pred_mean = np.mean(samples, axis=axis)
if self.analytical_variance and not sampling:
pred_var = self.predictive_variance(mu, np.sqrt(var), pred_mean)
else:
pred_var = np.var(samples, axis=axis)
#Calculate quantiles from samples
q1 = np.percentile(samples, 2.5, axis=axis)
q3 = np.percentile(samples, 97.5, axis=axis)
print "WARNING: Using sampling to calculate predictive quantiles"
pred_mean = np.vstack(pred_mean)
pred_var = np.vstack(pred_var)
q1 = np.vstack(q1)
q3 = np.vstack(q3)
return pred_mean, pred_var, q1, q3
def samples(self, gp):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
pass
raise NotImplementedError

View file

@ -1,7 +1,7 @@
from __future__ import division
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
@ -14,9 +14,10 @@ class Poisson(NoiseDistribution):
Poisson likelihood
.. math::
L(x) = \\exp(\\lambda) * \\frac{\\lambda^Y_i}{Y_i!}
p(y_{i}|\\lambda(f_{i})) = \\frac{\\lambda(f_{i})^{y_{i}}}{y_{i}!}e^{-\\lambda(f_{i})}
..Note: Y is expected to take values in {0,1,2,...}
.. Note::
Y is expected to take values in {0,1,2,...}
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance)
@ -24,25 +25,108 @@ class Poisson(NoiseDistribution):
def _preprocess_values(self,Y): #TODO
return Y
def _mass(self,gp,obs):
def pdf_link(self, link_f, y, extra_data=None):
"""
Mass (or density) function
"""
return stats.poisson.pmf(obs,self.gp_link.transf(gp))
Likelihood function given link(f)
def _nlog_mass(self,gp,obs):
"""
Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
"""
return self.gp_link.transf(gp) - obs * np.log(self.gp_link.transf(gp)) + np.log(special.gamma(obs+1))
.. math::
p(y_{i}|\\lambda(f_{i})) = \\frac{\\lambda(f_{i})^{y_{i}}}{y_{i}!}e^{-\\lambda(f_{i})}
def _dnlog_mass_dgp(self,gp,obs):
return self.gp_link.dtransf_df(gp) * (1. - obs/self.gp_link.transf(gp))
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
return np.prod(stats.poisson.pmf(y,link_f))
def _d2nlog_mass_dgp2(self,gp,obs):
d2_df = self.gp_link.d2transf_df2(gp)
transf = self.gp_link.transf(gp)
return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
def logpdf_link(self, link_f, y, extra_data=None):
"""
Log Likelihood Function given link(f)
.. math::
\\ln p(y_{i}|\lambda(f_{i})) = -\\lambda(f_{i}) + y_{i}\\log \\lambda(f_{i}) - \\log y_{i}!
:param link_f: latent variables (link(f))
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1))
def dlogpdf_dlink(self, link_f, y, extra_data=None):
"""
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
.. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - 1
:param link_f: latent variables (f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
return y/link_f - 1
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{-y_{i}}{\\lambda(f_{i})^{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
hess = -y/(link_f**2)
return hess
#d2_df = self.gp_link.d2transf_df2(gp)
#transf = self.gp_link.transf(gp)
#return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f_{i})^{3}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d3lik_dlink3 = 2*y/(link_f)**3
return d3lik_dlink3
def _mean(self,gp):
"""
@ -50,20 +134,19 @@ class Poisson(NoiseDistribution):
"""
return self.gp_link.transf(gp)
def _dmean_dgp(self,gp):
return self.gp_link.dtransf_df(gp)
def _d2mean_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _dvariance_dgp(self,gp):
return self.gp_link.dtransf_df(gp)
def samples(self, gp):
"""
Returns a set of samples of observations based on a given value of the latent variable.
def _d2variance_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
Ysim = np.random.poisson(self.gp_link.transf(gp))
return Ysim.reshape(orig_shape)

View file

@ -0,0 +1,277 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats, special
import scipy as sp
import gp_transformations
from noise_distributions import NoiseDistribution
from scipy import stats, integrate
from scipy.special import gammaln, gamma
class StudentT(NoiseDistribution):
"""
Student T likelihood
For nomanclature see Bayesian Data Analysis 2003 p576
.. math::
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
"""
def __init__(self,gp_link=None,analytical_mean=True,analytical_variance=True, deg_free=5, sigma2=2):
self.v = deg_free
self.sigma2 = sigma2
self._set_params(np.asarray(sigma2))
super(StudentT, self).__init__(gp_link,analytical_mean,analytical_variance)
self.log_concave = False
def _get_params(self):
return np.asarray(self.sigma2)
def _get_param_names(self):
return ["t_noise_std2"]
def _set_params(self, x):
self.sigma2 = float(x)
@property
def variance(self, extra_data=None):
return (self.v / float(self.v - 2)) * self.sigma2
def pdf_link(self, link_f, y, extra_data=None):
"""
Likelihood function given link(f)
.. math::
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
#Careful gamma(big_number) is infinity!
objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5))
/ (np.sqrt(self.v * np.pi * self.sigma2)))
* ((1 + (1./float(self.v))*((e**2)/float(self.sigma2)))**(-0.5*(self.v + 1)))
)
return np.prod(objective)
def logpdf_link(self, link_f, y, extra_data=None):
"""
Log Likelihood Function given link(f)
.. math::
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)
:param link_f: latent variables (link(f))
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
objective = (+ gammaln((self.v + 1) * 0.5)
- gammaln(self.v * 0.5)
- 0.5*np.log(self.sigma2 * self.v * np.pi)
- 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
)
return np.sum(objective)
def dlogpdf_dlink(self, link_f, y, extra_data=None):
"""
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
.. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v}
:param link_f: latent variables (f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
((e**2 + self.sigma2*self.v)**3)
)
return d3lik_dlink3
def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
"""
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
.. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
return np.sum(dlogpdf_dvar)
def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
"""
Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
.. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2}
:param link_f: latent variables link_f
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
return dlogpdf_dlink_dvar
def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None):
"""
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
.. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
/ ((self.sigma2*self.v + (e**2))**3)
)
return d2logpdf_dlink2_dvar
def dlogpdf_link_dtheta(self, f, y, extra_data=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
return np.asarray([[dlogpdf_dvar]])
def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
return dlogpdf_dlink_dvar
def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
return d2logpdf_dlink2_dvar
def _predictive_variance_analytical(self, mu, sigma, predictive_mean=None):
"""
Compute predictive variance of student_t*normal p(y*|f*)p(f*)
Need to find what the variance is at the latent points for a student t*normal p(y*|f*)p(f*)
(((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))
*((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2)))
"""
#FIXME: Not correct
#We want the variance around test points y which comes from int p(y*|f*)p(f*) df*
#Var(y*) = Var(E[y*|f*]) + E[Var(y*|f*)]
#Since we are given f* (mu) which is our mean (expected) value of y*|f* then the variance is the variance around this
#Which was also given to us as (var)
#We also need to know the expected variance of y* around samples f*, this is the variance of the student t distribution
#However the variance of the student t distribution is not dependent on f, only on sigma and the degrees of freedom
true_var = 1/(1/sigma**2 + 1/self.variance)
return true_var
def _predictive_mean_analytical(self, mu, sigma):
"""
Compute mean of the prediction
"""
#FIXME: Not correct
return mu
def samples(self, gp):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
#FIXME: Very slow as we are computing a new random variable per input!
#Can't get it to sample all at the same time
#student_t_samples = np.array([stats.t.rvs(self.v, self.gp_link.transf(gpj),scale=np.sqrt(self.sigma2), size=1) for gpj in gp])
dfs = np.ones_like(gp)*self.v
scales = np.ones_like(gp)*np.sqrt(self.sigma2)
student_t_samples = stats.t.rvs(dfs, loc=self.gp_link.transf(gp),
scale=scales)
return student_t_samples.reshape(orig_shape)

View file

@ -49,18 +49,6 @@ class BayesianGPLVM(SparseGP, GPLVM):
SparseGP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs)
self.ensure_default_constraints()
def getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return SparseGP.getstate(self) + [self.init]
def setstate(self, state):
self._const_jitter = None
self.init = state.pop()
SparseGP.setstate(self, state)
def _get_param_names(self):
X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
@ -285,6 +273,19 @@ class BayesianGPLVM(SparseGP, GPLVM):
fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
return fig
def getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return SparseGP.getstate(self) + [self.init]
def setstate(self, state):
self._const_jitter = None
self.init = state.pop()
SparseGP.setstate(self, state)
def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
"""
objective function for fitting the latent variables for test points

View file

@ -7,7 +7,7 @@ import pylab as pb
import sys, pdb
from ..core import GP
from ..models import GPLVM
from ..mappings import *
from ..mappings import Kernel
class BCGPLVM(GPLVM):

View file

@ -16,7 +16,7 @@ class FITCClassification(FITC):
:param X: input observations
:param Y: observed values
:param likelihood: a GPy likelihood, defaults to Binomial with probit link function
:param likelihood: a GPy likelihood, defaults to Bernoulli with probit link function
:param kernel: a GPy kernel, defaults to rbf+white
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
@ -31,7 +31,7 @@ class FITCClassification(FITC):
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
if likelihood is None:
noise_model = likelihoods.binomial()
noise_model = likelihoods.bernoulli()
likelihood = likelihoods.EP(Y, noise_model)
elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()):

View file

@ -15,7 +15,7 @@ class GPClassification(GP):
:param X: input observations
:param Y: observed values, can be None if likelihood is not None
:param likelihood: a GPy likelihood, defaults to Binomial with probit link_function
:param likelihood: a GPy likelihood, defaults to Bernoulli with Probit link_function
:param kernel: a GPy kernel, defaults to rbf
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
@ -31,7 +31,7 @@ class GPClassification(GP):
kernel = kern.rbf(X.shape[1])
if likelihood is None:
noise_model = likelihoods.binomial()
noise_model = likelihoods.bernoulli()
likelihood = likelihoods.EP(Y, noise_model)
elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()):

View file

@ -25,11 +25,12 @@ class GPRegression(GP):
"""
def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False):
def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, likelihood=None):
if kernel is None:
kernel = kern.rbf(X.shape[1])
likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y)
if likelihood is None:
likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y)
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self.ensure_default_constraints()
@ -39,5 +40,3 @@ class GPRegression(GP):
def setstate(self, state):
return GP.setstate(self, state)
pass

View file

@ -44,12 +44,6 @@ class GPLVM(GP):
Xr[:PC.shape[0], :PC.shape[1]] = PC
return Xr
def getstate(self):
return GP.getstate(self)
def setstate(self, state):
GP.setstate(self, state)
def _get_param_names(self):
return sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) + GP._get_param_names(self)
@ -68,7 +62,7 @@ class GPLVM(GP):
def jacobian(self,X):
target = np.zeros((X.shape[0],X.shape[1],self.output_dim))
for i in range(self.output_dim):
target[:,:,i]=self.kern.dK_dX(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X)
target[:,:,i] = self.kern.dK_dX(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X)
return target
def magnification(self,X):
@ -91,3 +85,11 @@ class GPLVM(GP):
def plot_magnification(self, *args, **kwargs):
return util.plot_latent.plot_magnification(self, *args, **kwargs)
def getstate(self):
return GP.getstate(self)
def setstate(self, state):
GP.setstate(self, state)

View file

@ -26,40 +26,40 @@ class GradientChecker(Model):
"""
:param f: Function to check gradient for
:param df: Gradient of function to check
:param x0:
:param x0:
Initial guess for inputs x (if it has a shape (a,b) this will be reflected in the parameter names).
Can be a list of arrays, if takes a list of arrays. This list will be passed
Can be a list of arrays, if takes a list of arrays. This list will be passed
to f and df in the same order as given here.
If only one argument, make sure not to pass a list!!!
:type x0: [array-like] | array-like | float | int
:param names:
Names to print, when performing gradcheck. If a list was passed to x0
a list of names with the same length is expected.
:param args: Arguments passed as f(x, *args, **kwargs) and df(x, *args, **kwargs)
Examples:
---------
from GPy.models import GradientChecker
N, M, Q = 10, 5, 3
Sinusoid:
X = numpy.random.rand(N, Q)
grad = GradientChecker(numpy.sin,numpy.cos,X,'x')
grad.checkgrad(verbose=1)
Using GPy:
X, Z = numpy.random.randn(N,Q), numpy.random.randn(M,Q)
kern = GPy.kern.linear(Q, ARD=True) + GPy.kern.rbf(Q, ARD=True)
grad = GradientChecker(kern.K,
grad = GradientChecker(kern.K,
lambda x: 2*kern.dK_dX(numpy.ones((1,1)), x),
x0 = X.copy(),
names='X')
names='X')
grad.checkgrad(verbose=1)
grad.randomize()
grad.checkgrad(verbose=1)
grad.checkgrad(verbose=1)
"""
Model.__init__(self)
if isinstance(x0, (list, tuple)) and names is None:
@ -75,14 +75,14 @@ class GradientChecker(Model):
self.names = names
self.shapes = [get_shape(x0)]
for name, xi in zip(self.names, at_least_one_element(x0)):
self.__setattr__(name, xi)
self.__setattr__(name, numpy.float_(xi))
# self._param_names = []
# for name, shape in zip(self.names, self.shapes):
# self._param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
self.args = args
self.kwargs = kwargs
self.f = f
self.df = df
self._f = f
self._df = df
def _get_x(self):
if len(self.names) > 1:
@ -90,10 +90,10 @@ class GradientChecker(Model):
return [self.__getattribute__(self.names[0])] + list(self.args)
def log_likelihood(self):
return float(numpy.sum(self.f(*self._get_x(), **self.kwargs)))
return float(numpy.sum(self._f(*self._get_x(), **self.kwargs)))
def _log_likelihood_gradients(self):
return numpy.atleast_1d(self.df(*self._get_x(), **self.kwargs)).flatten()
return numpy.atleast_1d(self._df(*self._get_x(), **self.kwargs)).flatten()
def _get_params(self):

View file

@ -81,29 +81,6 @@ class MRD(Model):
Model.__init__(self)
self.ensure_default_constraints()
def getstate(self):
return Model.getstate(self) + [self.names,
self.bgplvms,
self.gref,
self.nparams,
self.input_dim,
self.num_inducing,
self.num_data,
self.NQ,
self.MQ]
def setstate(self, state):
self.MQ = state.pop()
self.NQ = state.pop()
self.num_data = state.pop()
self.num_inducing = state.pop()
self.input_dim = state.pop()
self.nparams = state.pop()
self.gref = state.pop()
self.bgplvms = state.pop()
self.names = state.pop()
Model.setstate(self, state)
@property
def X(self):
return self.gref.X
@ -211,8 +188,8 @@ class MRD(Model):
# g.Z = Z.reshape(self.num_inducing, self.input_dim)
#
# def _set_kern_params(self, g, p):
# g.kern._set_params(p[:g.kern.Nparam])
# g.likelihood._set_params(p[g.kern.Nparam:])
# g.kern._set_params(p[:g.kern.num_params])
# g.likelihood._set_params(p[g.kern.num_params:])
def _set_params(self, x):
start = 0; end = self.NQ
@ -371,4 +348,28 @@ class MRD(Model):
pylab.draw()
fig.tight_layout()
def getstate(self):
return Model.getstate(self) + [self.names,
self.bgplvms,
self.gref,
self.nparams,
self.input_dim,
self.num_inducing,
self.num_data,
self.NQ,
self.MQ]
def setstate(self, state):
self.MQ = state.pop()
self.NQ = state.pop()
self.num_data = state.pop()
self.num_inducing = state.pop()
self.input_dim = state.pop()
self.nparams = state.pop()
self.gref = state.pop()
self.bgplvms = state.pop()
self.names = state.pop()
Model.setstate(self, state)

View file

@ -16,7 +16,7 @@ class SparseGPClassification(SparseGP):
:param X: input observations
:param Y: observed values
:param likelihood: a GPy likelihood, defaults to Binomial with probit link_function
:param likelihood: a GPy likelihood, defaults to Bernoulli with probit link_function
:param kernel: a GPy kernel, defaults to rbf+white
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
@ -31,7 +31,7 @@ class SparseGPClassification(SparseGP):
kernel = kern.rbf(X.shape[1])# + kern.white(X.shape[1],1e-3)
if likelihood is None:
noise_model = likelihoods.binomial()
noise_model = likelihoods.bernoulli()
likelihood = likelihoods.EP(Y, noise_model)
elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()):

View file

@ -25,7 +25,7 @@ class SVIGPRegression(SVIGP):
"""
def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, q_u=None, batchsize=10):
def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, q_u=None, batchsize=10, normalize_Y=False):
# kern defaults to rbf (plus white for stability)
if kernel is None:
kernel = kern.rbf(X.shape[1], variance=1., lengthscale=4.) + kern.white(X.shape[1], 1e-3)
@ -38,7 +38,7 @@ class SVIGPRegression(SVIGP):
assert Z.shape[1] == X.shape[1]
# likelihood defaults to Gaussian
likelihood = likelihoods.Gaussian(Y, normalize=False)
likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y)
SVIGP.__init__(self, X, likelihood, kernel, Z, q_u=q_u, batchsize=batchsize)
self.load_batch()

View file

@ -37,7 +37,7 @@ def model_checkgrads(model):
def model_instance(model):
#assert isinstance(model, GPy.core.model)
return isinstance(model, GPy.core.model)
return isinstance(model, GPy.core.model.Model)
@nottest
def test_models():
@ -54,7 +54,7 @@ def test_models():
print "After"
print functions
for example in functions:
if example[0] in ['oil', 'silhouette', 'GPLVM_oil_100']:
if example[0] in ['oil', 'silhouette', 'GPLVM_oil_100', 'brendan_faces']:
print "SKIPPING"
continue

View file

@ -0,0 +1,61 @@
from nose.tools import with_setup
from GPy.models import GradientChecker
from GPy.likelihoods.noise_models import gp_transformations
import inspect
import unittest
import numpy as np
class TestTransformations(object):
"""
Generic transformations checker
"""
def setUp(self):
N = 30
self.fs = [np.random.rand(N, 1), float(np.random.rand(1))]
def tearDown(self):
self.fs = None
def test_transformations(self):
self.setUp()
transformations = [gp_transformations.Identity(),
gp_transformations.Log(),
gp_transformations.Probit(),
gp_transformations.Log_ex_1(),
gp_transformations.Reciprocal(),
]
for transformation in transformations:
for f in self.fs:
yield self.t_dtransf_df, transformation, f
yield self.t_d2transf_df2, transformation, f
yield self.t_d3transf_df3, transformation, f
@with_setup(setUp, tearDown)
def t_dtransf_df(self, transformation, f):
print "\n{}".format(inspect.stack()[0][3])
grad = GradientChecker(transformation.transf, transformation.dtransf_df, f, 'f')
grad.randomize()
grad.checkgrad(verbose=1)
assert grad.checkgrad()
@with_setup(setUp, tearDown)
def t_d2transf_df2(self, transformation, f):
print "\n{}".format(inspect.stack()[0][3])
grad = GradientChecker(transformation.dtransf_df, transformation.d2transf_df2, f, 'f')
grad.randomize()
grad.checkgrad(verbose=1)
assert grad.checkgrad()
@with_setup(setUp, tearDown)
def t_d3transf_df3(self, transformation, f):
print "\n{}".format(inspect.stack()[0][3])
grad = GradientChecker(transformation.d2transf_df2, transformation.d3transf_df3, f, 'f')
grad.randomize()
grad.checkgrad(verbose=1)
assert grad.checkgrad()
#if __name__ == "__main__":
#print "Running unit tests"
#unittest.main()

View file

@ -7,6 +7,13 @@ import GPy
verbose = False
try:
import sympy
SYMPY_AVAILABLE=True
except ImportError:
SYMPY_AVAILABLE=False
class KernelTests(unittest.TestCase):
def test_kerneltie(self):
K = GPy.kern.rbf(5, ARD=True)
@ -22,7 +29,16 @@ class KernelTests(unittest.TestCase):
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_rbf_sympykernel(self):
kern = GPy.kern.rbf_sympy(5)
if SYMPY_AVAILABLE:
kern = GPy.kern.rbf_sympy(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_eq_sympykernel(self):
kern = GPy.kern.eq_sympy(5, 3, output_ind=4)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_sinckernel(self):
kern = GPy.kern.sinc(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_rbf_invkernel(self):

View file

@ -0,0 +1,577 @@
import numpy as np
import unittest
import GPy
from GPy.models import GradientChecker
import functools
import inspect
from GPy.likelihoods.noise_models import gp_transformations
from functools import partial
def dparam_partial(inst_func, *args):
"""
If we have a instance method that needs to be called but that doesn't
take the parameter we wish to change to checkgrad, then this function
will change the variable using set params.
inst_func: should be a instance function of an object that we would like
to change
param: the param that will be given to set_params
args: anything else that needs to be given to the function (for example
the f or Y that are being used in the function whilst we tweak the
param
"""
def param_func(param, inst_func, args):
inst_func.im_self._set_params(param)
return inst_func(*args)
return functools.partial(param_func, inst_func=inst_func, args=args)
def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=False, verbose=False):
"""
checkgrad expects a f: R^N -> R^1 and df: R^N -> R^N
However if we are holding other parameters fixed and moving something else
We need to check the gradient of each of the fixed parameters
(f and y for example) seperately, whilst moving another parameter.
Otherwise f: gives back R^N and
df: gives back R^NxM where M is
The number of parameters and N is the number of data
Need to take a slice out from f and a slice out of df
"""
#print "\n{} likelihood: {} vs {}".format(func.im_self.__class__.__name__,
#func.__name__, dfunc.__name__)
partial_f = dparam_partial(func, *args)
partial_df = dparam_partial(dfunc, *args)
gradchecking = True
for param in params:
fnum = np.atleast_1d(partial_f(param)).shape[0]
dfnum = np.atleast_1d(partial_df(param)).shape[0]
for fixed_val in range(dfnum):
#dlik and dlik_dvar gives back 1 value for each
f_ind = min(fnum, fixed_val+1) - 1
print "fnum: {} dfnum: {} f_ind: {} fixed_val: {}".format(fnum, dfnum, f_ind, fixed_val)
#Make grad checker with this param moving, note that set_params is NOT being called
#The parameter is being set directly with __setattr__
grad = GradientChecker(lambda x: np.atleast_1d(partial_f(x))[f_ind],
lambda x : np.atleast_1d(partial_df(x))[fixed_val],
param, 'p')
#This is not general for more than one param...
if constraints is not None:
for constraint in constraints:
constraint('p', grad)
if randomize:
grad.randomize()
if verbose:
print grad
grad.checkgrad(verbose=1)
if not grad.checkgrad():
gradchecking = False
return gradchecking
from nose.tools import with_setup
class TestNoiseModels(object):
"""
Generic model checker
"""
def setUp(self):
self.N = 5
self.D = 3
self.X = np.random.rand(self.N, self.D)*10
self.real_std = 0.1
noise = np.random.randn(*self.X[:, 0].shape)*self.real_std
self.Y = (np.sin(self.X[:, 0]*2*np.pi) + noise)[:, None]
self.f = np.random.rand(self.N, 1)
self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=np.int)[:, None]
self.positive_Y = np.exp(self.Y.copy())
tmp = np.round(self.X[:, 0]*3-3)[:, None] + np.random.randint(0,3, self.X.shape[0])[:, None]
self.integer_Y = np.where(tmp > 0, tmp, 0)
self.var = 0.2
self.var = np.random.rand(1)
#Make a bigger step as lower bound can be quite curved
self.step = 1e-3
def tearDown(self):
self.Y = None
self.f = None
self.X = None
def test_noise_models(self):
self.setUp()
####################################################
# Constraint wrappers so we can just list them off #
####################################################
def constrain_negative(regex, model):
model.constrain_negative(regex)
def constrain_positive(regex, model):
model.constrain_positive(regex)
def constrain_bounded(regex, model, lower, upper):
"""
Used like: partial(constrain_bounded, lower=0, upper=1)
"""
model.constrain_bounded(regex, lower, upper)
"""
Dictionary where we nest models we would like to check
Name: {
"model": model_instance,
"grad_params": {
"names": [names_of_params_we_want, to_grad_check],
"vals": [values_of_params, to_start_at],
"constrain": [constraint_wrappers, listed_here]
},
"laplace": boolean_of_whether_model_should_work_for_laplace,
"ep": boolean_of_whether_model_should_work_for_laplace,
"link_f_constraints": [constraint_wrappers, listed_here]
}
"""
noise_models = {"Student_t_default": {
"model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var),
"grad_params": {
"names": ["t_noise"],
"vals": [self.var],
"constraints": [constrain_positive]
},
"laplace": True
},
"Student_t_1_var": {
"model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var),
"grad_params": {
"names": ["t_noise"],
"vals": [1],
"constraints": [constrain_positive]
},
"laplace": True
},
"Student_t_small_var": {
"model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var),
"grad_params": {
"names": ["t_noise"],
"vals": [0.01],
"constraints": [constrain_positive]
},
"laplace": True
},
"Student_t_approx_gauss": {
"model": GPy.likelihoods.student_t(deg_free=1000, sigma2=self.var),
"grad_params": {
"names": ["t_noise"],
"vals": [self.var],
"constraints": [constrain_positive]
},
"laplace": True
},
"Student_t_log": {
"model": GPy.likelihoods.student_t(gp_link=gp_transformations.Log(), deg_free=5, sigma2=self.var),
"grad_params": {
"names": ["t_noise"],
"vals": [self.var],
"constraints": [constrain_positive]
},
"laplace": True
},
"Gaussian_default": {
"model": GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N),
"grad_params": {
"names": ["noise_model_variance"],
"vals": [self.var],
"constraints": [constrain_positive]
},
"laplace": True,
"ep": True
},
"Gaussian_log": {
"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log(), variance=self.var, D=self.D, N=self.N),
"grad_params": {
"names": ["noise_model_variance"],
"vals": [self.var],
"constraints": [constrain_positive]
},
"laplace": True
},
"Gaussian_probit": {
"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Probit(), variance=self.var, D=self.D, N=self.N),
"grad_params": {
"names": ["noise_model_variance"],
"vals": [self.var],
"constraints": [constrain_positive]
},
"laplace": True
},
"Gaussian_log_ex": {
"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log_ex_1(), variance=self.var, D=self.D, N=self.N),
"grad_params": {
"names": ["noise_model_variance"],
"vals": [self.var],
"constraints": [constrain_positive]
},
"laplace": True
},
"Bernoulli_default": {
"model": GPy.likelihoods.bernoulli(),
"link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)],
"laplace": True,
"Y": self.binary_Y,
"ep": True
},
"Exponential_default": {
"model": GPy.likelihoods.exponential(),
"link_f_constraints": [constrain_positive],
"Y": self.positive_Y,
"laplace": True,
},
"Poisson_default": {
"model": GPy.likelihoods.poisson(),
"link_f_constraints": [constrain_positive],
"Y": self.integer_Y,
"laplace": True,
"ep": False #Should work though...
},
"Gamma_default": {
"model": GPy.likelihoods.gamma(),
"link_f_constraints": [constrain_positive],
"Y": self.positive_Y,
"laplace": True
}
}
for name, attributes in noise_models.iteritems():
model = attributes["model"]
if "grad_params" in attributes:
params = attributes["grad_params"]
param_vals = params["vals"]
param_names= params["names"]
param_constraints = params["constraints"]
else:
params = []
param_vals = []
param_names = []
constrain_positive = []
if "link_f_constraints" in attributes:
link_f_constraints = attributes["link_f_constraints"]
else:
link_f_constraints = []
if "Y" in attributes:
Y = attributes["Y"].copy()
else:
Y = self.Y.copy()
if "f" in attributes:
f = attributes["f"].copy()
else:
f = self.f.copy()
if "laplace" in attributes:
laplace = attributes["laplace"]
else:
laplace = False
if "ep" in attributes:
ep = attributes["ep"]
else:
ep = False
if len(param_vals) > 1:
raise NotImplementedError("Cannot support multiple params in likelihood yet!")
#Required by all
#Normal derivatives
yield self.t_logpdf, model, Y, f
yield self.t_dlogpdf_df, model, Y, f
yield self.t_d2logpdf_df2, model, Y, f
#Link derivatives
yield self.t_dlogpdf_dlink, model, Y, f, link_f_constraints
yield self.t_d2logpdf_dlink2, model, Y, f, link_f_constraints
if laplace:
#Laplace only derivatives
yield self.t_d3logpdf_df3, model, Y, f
yield self.t_d3logpdf_dlink3, model, Y, f, link_f_constraints
#Params
yield self.t_dlogpdf_dparams, model, Y, f, param_vals, param_constraints
yield self.t_dlogpdf_df_dparams, model, Y, f, param_vals, param_constraints
yield self.t_d2logpdf2_df2_dparams, model, Y, f, param_vals, param_constraints
#Link params
yield self.t_dlogpdf_link_dparams, model, Y, f, param_vals, param_constraints
yield self.t_dlogpdf_dlink_dparams, model, Y, f, param_vals, param_constraints
yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, param_vals, param_constraints
#laplace likelihood gradcheck
yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
if ep:
#ep likelihood gradcheck
yield self.t_ep_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
self.tearDown()
#############
# dpdf_df's #
#############
@with_setup(setUp, tearDown)
def t_logpdf(self, model, Y, f):
print "\n{}".format(inspect.stack()[0][3])
print model
np.testing.assert_almost_equal(
np.log(model.pdf(f.copy(), Y.copy())),
model.logpdf(f.copy(), Y.copy()))
@with_setup(setUp, tearDown)
def t_dlogpdf_df(self, model, Y, f):
print "\n{}".format(inspect.stack()[0][3])
self.description = "\n{}".format(inspect.stack()[0][3])
logpdf = functools.partial(model.logpdf, y=Y)
dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y)
grad = GradientChecker(logpdf, dlogpdf_df, f.copy(), 'g')
grad.randomize()
grad.checkgrad(verbose=1)
print model
assert grad.checkgrad()
@with_setup(setUp, tearDown)
def t_d2logpdf_df2(self, model, Y, f):
print "\n{}".format(inspect.stack()[0][3])
dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y)
d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y)
grad = GradientChecker(dlogpdf_df, d2logpdf_df2, f.copy(), 'g')
grad.randomize()
grad.checkgrad(verbose=1)
print model
assert grad.checkgrad()
@with_setup(setUp, tearDown)
def t_d3logpdf_df3(self, model, Y, f):
print "\n{}".format(inspect.stack()[0][3])
d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y)
d3logpdf_df3 = functools.partial(model.d3logpdf_df3, y=Y)
grad = GradientChecker(d2logpdf_df2, d3logpdf_df3, f.copy(), 'g')
grad.randomize()
grad.checkgrad(verbose=1)
print model
assert grad.checkgrad()
##############
# df_dparams #
##############
@with_setup(setUp, tearDown)
def t_dlogpdf_dparams(self, model, Y, f, params, param_constraints):
print "\n{}".format(inspect.stack()[0][3])
print model
assert (
dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta,
params, args=(f, Y), constraints=param_constraints,
randomize=False, verbose=True)
)
@with_setup(setUp, tearDown)
def t_dlogpdf_df_dparams(self, model, Y, f, params, param_constraints):
print "\n{}".format(inspect.stack()[0][3])
print model
assert (
dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta,
params, args=(f, Y), constraints=param_constraints,
randomize=False, verbose=True)
)
@with_setup(setUp, tearDown)
def t_d2logpdf2_df2_dparams(self, model, Y, f, params, param_constraints):
print "\n{}".format(inspect.stack()[0][3])
print model
assert (
dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta,
params, args=(f, Y), constraints=param_constraints,
randomize=False, verbose=True)
)
################
# dpdf_dlink's #
################
@with_setup(setUp, tearDown)
def t_dlogpdf_dlink(self, model, Y, f, link_f_constraints):
print "\n{}".format(inspect.stack()[0][3])
logpdf = functools.partial(model.logpdf_link, y=Y)
dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y)
grad = GradientChecker(logpdf, dlogpdf_dlink, f.copy(), 'g')
#Apply constraints to link_f values
for constraint in link_f_constraints:
constraint('g', grad)
grad.randomize()
print grad
grad.checkgrad(verbose=1)
assert grad.checkgrad()
@with_setup(setUp, tearDown)
def t_d2logpdf_dlink2(self, model, Y, f, link_f_constraints):
print "\n{}".format(inspect.stack()[0][3])
dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y)
d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y)
grad = GradientChecker(dlogpdf_dlink, d2logpdf_dlink2, f.copy(), 'g')
#Apply constraints to link_f values
for constraint in link_f_constraints:
constraint('g', grad)
grad.randomize()
grad.checkgrad(verbose=1)
print grad
assert grad.checkgrad()
@with_setup(setUp, tearDown)
def t_d3logpdf_dlink3(self, model, Y, f, link_f_constraints):
print "\n{}".format(inspect.stack()[0][3])
d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y)
d3logpdf_dlink3 = functools.partial(model.d3logpdf_dlink3, y=Y)
grad = GradientChecker(d2logpdf_dlink2, d3logpdf_dlink3, f.copy(), 'g')
#Apply constraints to link_f values
for constraint in link_f_constraints:
constraint('g', grad)
grad.randomize()
grad.checkgrad(verbose=1)
print grad
assert grad.checkgrad()
#################
# dlink_dparams #
#################
@with_setup(setUp, tearDown)
def t_dlogpdf_link_dparams(self, model, Y, f, params, param_constraints):
print "\n{}".format(inspect.stack()[0][3])
print model
assert (
dparam_checkgrad(model.logpdf_link, model.dlogpdf_link_dtheta,
params, args=(f, Y), constraints=param_constraints,
randomize=False, verbose=True)
)
@with_setup(setUp, tearDown)
def t_dlogpdf_dlink_dparams(self, model, Y, f, params, param_constraints):
print "\n{}".format(inspect.stack()[0][3])
print model
assert (
dparam_checkgrad(model.dlogpdf_dlink, model.dlogpdf_dlink_dtheta,
params, args=(f, Y), constraints=param_constraints,
randomize=False, verbose=True)
)
@with_setup(setUp, tearDown)
def t_d2logpdf2_dlink2_dparams(self, model, Y, f, params, param_constraints):
print "\n{}".format(inspect.stack()[0][3])
print model
assert (
dparam_checkgrad(model.d2logpdf_dlink2, model.d2logpdf_dlink2_dtheta,
params, args=(f, Y), constraints=param_constraints,
randomize=False, verbose=True)
)
################
# laplace test #
################
@with_setup(setUp, tearDown)
def t_laplace_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints):
print "\n{}".format(inspect.stack()[0][3])
#Normalize
Y = Y/Y.max()
white_var = 0.001
kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), model)
m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=laplace_likelihood)
m.ensure_default_constraints()
m.constrain_fixed('white', white_var)
for param_num in range(len(param_names)):
name = param_names[param_num]
m[name] = param_vals[param_num]
constraints[param_num](name, m)
m.randomize()
m.checkgrad(verbose=1, step=step)
print m
assert m.checkgrad(step=step)
###########
# EP test #
###########
@with_setup(setUp, tearDown)
def t_ep_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints):
print "\n{}".format(inspect.stack()[0][3])
#Normalize
Y = Y/Y.max()
white_var = 0.001
kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
ep_likelihood = GPy.likelihoods.EP(Y.copy(), model)
m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=ep_likelihood)
m.ensure_default_constraints()
m.constrain_fixed('white', white_var)
for param_num in range(len(param_names)):
name = param_names[param_num]
m[name] = param_vals[param_num]
constraints[param_num](name, m)
m.randomize()
m.checkgrad(verbose=1, step=step)
print m
assert m.checkgrad(step=step)
class LaplaceTests(unittest.TestCase):
"""
Specific likelihood tests, not general enough for the above tests
"""
def setUp(self):
self.N = 5
self.D = 3
self.X = np.random.rand(self.N, self.D)*10
self.real_std = 0.1
noise = np.random.randn(*self.X[:, 0].shape)*self.real_std
self.Y = (np.sin(self.X[:, 0]*2*np.pi) + noise)[:, None]
self.f = np.random.rand(self.N, 1)
self.var = 0.2
self.var = np.random.rand(1)
self.stu_t = GPy.likelihoods.student_t(deg_free=5, sigma2=self.var)
self.gauss = GPy.likelihoods.gaussian(gp_transformations.Log(), variance=self.var, D=self.D, N=self.N)
#Make a bigger step as lower bound can be quite curved
self.step = 1e-6
def tearDown(self):
self.stu_t = None
self.gauss = None
self.Y = None
self.f = None
self.X = None
def test_gaussian_d2logpdf_df2_2(self):
print "\n{}".format(inspect.stack()[0][3])
self.Y = None
self.gauss = None
self.N = 2
self.D = 1
self.X = np.linspace(0, self.D, self.N)[:, None]
self.real_std = 0.2
noise = np.random.randn(*self.X.shape)*self.real_std
self.Y = np.sin(self.X*2*np.pi) + noise
self.f = np.random.rand(self.N, 1)
self.gauss = GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N)
dlogpdf_df = functools.partial(self.gauss.dlogpdf_df, y=self.Y)
d2logpdf_df2 = functools.partial(self.gauss.d2logpdf_df2, y=self.Y)
grad = GradientChecker(dlogpdf_df, d2logpdf_df2, self.f.copy(), 'g')
grad.randomize()
grad.checkgrad(verbose=1)
self.assertTrue(grad.checkgrad())
if __name__ == "__main__":
print "Running unit tests"
unittest.main()

View file

@ -28,8 +28,8 @@ def ard(p):
class Test(unittest.TestCase):
input_dim = 9
num_inducing = 4
N = 3
Nsamples = 5e6
N = 30
Nsamples = 9e6
def setUp(self):
i_s_dim_list = [2,4,3]
@ -45,20 +45,26 @@ class Test(unittest.TestCase):
input_slices = input_slices
)
self.kerns = (
input_slice_kern,
# input_slice_kern,
# (GPy.kern.rbf(self.input_dim, ARD=True) +
# GPy.kern.linear(self.input_dim, ARD=True) +
# GPy.kern.bias(self.input_dim) +
# GPy.kern.white(self.input_dim)),
# (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
# GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
# GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) +
# GPy.kern.bias(self.input_dim) +
# GPy.kern.white(self.input_dim)),
# GPy.kern.rbf(self.input_dim), GPy.kern.rbf(self.input_dim, ARD=True),
# GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
# GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) +
# GPy.kern.bias(self.input_dim) +
# GPy.kern.white(self.input_dim)),
(GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) +
GPy.kern.bias(self.input_dim, np.random.rand()) +
GPy.kern.white(self.input_dim, np.random.rand())),
(GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
GPy.kern.bias(self.input_dim, np.random.rand()) +
GPy.kern.white(self.input_dim, np.random.rand())),
# GPy.kern.rbf(self.input_dim), GPy.kern.rbf(self.input_dim, ARD=True),
# GPy.kern.linear(self.input_dim, ARD=False), GPy.kern.linear(self.input_dim, ARD=True),
# GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim),
# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim),
# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim),
# GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim),
# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim),
# GPy.kern.bias(self.input_dim), GPy.kern.white(self.input_dim),
@ -79,7 +85,7 @@ class Test(unittest.TestCase):
def test_psi1(self):
for kern in self.kerns:
Nsamples = np.floor(self.Nsamples/300.)
Nsamples = np.floor(self.Nsamples/self.N)
psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance)
K_ = np.zeros((Nsamples, self.num_inducing))
diffs = []
@ -105,7 +111,7 @@ class Test(unittest.TestCase):
def test_psi2(self):
for kern in self.kerns:
Nsamples = self.Nsamples/300.
Nsamples = int(np.floor(self.Nsamples/self.N))
psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
K_ = np.zeros((self.num_inducing, self.num_inducing))
diffs = []
@ -119,10 +125,10 @@ class Test(unittest.TestCase):
try:
import pylab
pylab.figure(msg)
pylab.plot(diffs)
pylab.plot(diffs, marker='x', mew=1.3)
# print msg, np.allclose(psi2.squeeze(), K_, rtol=1e-1, atol=.1)
self.assertTrue(np.allclose(psi2.squeeze(), K_,
rtol=1e-1, atol=.1),
self.assertTrue(np.allclose(psi2.squeeze(), K_),
#rtol=1e-1, atol=.1),
msg=msg + ": not matching")
# sys.stdout.write(".")
except:
@ -135,7 +141,7 @@ class Test(unittest.TestCase):
if __name__ == "__main__":
sys.argv = ['',
#'Test.test_psi0',
'Test.test_psi1',
#'Test.test_psi1',
'Test.test_psi2',
]
unittest.main()

View file

@ -209,7 +209,7 @@ class GradientTests(unittest.TestCase):
Z = np.linspace(0, 15, 4)[:, None]
kernel = GPy.kern.rbf(1)
m = GPy.models.SparseGPClassification(X,Y,kernel=kernel,Z=Z)
#distribution = GPy.likelihoods.likelihood_functions.Binomial()
#distribution = GPy.likelihoods.likelihood_functions.Bernoulli()
#likelihood = GPy.likelihoods.EP(Y, distribution)
#m = GPy.core.SparseGP(X, likelihood, kernel, Z)
#m.ensure_default_constraints()

View file

@ -14,3 +14,5 @@ import visualize
import decorators
import classification
import latent_space_visualizations
import netpbmfile

17
GPy/util/config.py Normal file
View file

@ -0,0 +1,17 @@
#
# This loads the configuration
#
import ConfigParser
import os
config = ConfigParser.ConfigParser()
user_file = os.path.join(os.getenv('HOME'),'.gpy_config.cfg')
default_file = os.path.join('..','gpy_config.cfg')
# 1. check if the user has a ~/.gpy_config.cfg
if os.path.isfile(user_file):
config.read(user_file)
else:
# 2. if not, use the default one
path = os.path.dirname(__file__)
config.read(os.path.join(path,default_file))

View file

@ -8,40 +8,38 @@ import zipfile
import tarfile
import datetime
ipython_notebook = False
if ipython_notebook:
import IPython.core.display
def ipynb_input(varname, prompt=''):
"""Prompt user for input and assign string val to given variable name."""
js_code = ("""
var value = prompt("{prompt}","");
var py_code = "{varname} = '" + value + "'";
IPython.notebook.kernel.execute(py_code);
""").format(prompt=prompt, varname=varname)
return IPython.core.display.Javascript(js_code)
ipython_available=True
try:
import IPython
except ImportError:
ipython_available=False
import sys, urllib
def reporthook(a,b,c):
def reporthook(a,b,c):
# ',' at the end of the line is important!
#print "% 3.1f%% of %d bytes\r" % (min(100, float(a * b) / c * 100), c),
#you can also use sys.stdout.write
sys.stdout.write("\r% 3.1f%% of %d bytes" % (min(100, float(a * b) / c * 100), c))
sys.stdout.flush()
# Global variables
data_path = os.path.join(os.path.dirname(__file__), 'datasets')
default_seed = 10000
overide_manual_authorize=False
neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/'
sam_url = 'http://www.cs.nyu.edu/~roweis/data/'
cmu_url = 'http://mocap.cs.cmu.edu/subjects/'
# Note: there may be a better way of storing data resources. One of the pythonistas will need to take a look.
# Note: there may be a better way of storing data resources, for the
# moment we are storing them in a dictionary.
data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'],
'files' : [['ankurDataPoseSilhouette.mat']],
'license' : None,
'citation' : """3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.""",
'details' : """Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing."""},
'boston_housing' : {'urls' : ['http://archive.ics.uci.edu/ml/machine-learning-databases/housing/'],
'files' : [['Index', 'housing.data', 'housing.names']],
'citation' : """Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.""",
@ -49,7 +47,7 @@ data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'],
'license' : None,
'size' : 51276
},
'brendan_faces' : {'urls' : ['http://www.cs.nyu.edu/~roweis/data/'],
'brendan_faces' : {'urls' : [sam_url],
'files': [['frey_rawface.mat']],
'citation' : 'Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.',
'details' : """A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.""",
@ -93,6 +91,12 @@ The database was created with funding from NSF EIA-0196217.""",
'details' : """Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.""",
'license' : None,
'size' : 21949154},
'olivetti_faces' : {'urls' : [neil_url + 'olivetti_faces/', sam_url],
'files' : [['att_faces.zip'], ['olivettifaces.mat']],
'citation' : 'Ferdinando Samaria and Andy Harter, Parameterisation of a Stochastic Model for Human Face Identification. Proceedings of 2nd IEEE Workshop on Applications of Computer Vision, Sarasota FL, December 1994',
'details' : """Olivetti Research Labs Face data base, acquired between December 1992 and December 1994 in the Olivetti Research Lab, Cambridge (which later became AT&T Laboratories, Cambridge). When using these images please give credit to AT&T Laboratories, Cambridge. """,
'license': None,
'size' : 8561331},
'olympic_marathon_men' : {'urls' : [neil_url + 'olympic_marathon_men/'],
'files' : [['olympicMarathonTimes.csv']],
'citation' : None,
@ -141,26 +145,41 @@ The database was created with funding from NSF EIA-0196217.""",
'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000',
'license' : None,
'size' : 24229368},
'xw_pen' : {'urls' : [neil_url + 'xw_pen/'],
'files' : [['xw_pen_15.csv']],
'details' : """Accelerometer pen data used for robust regression by Tipping and Lawrence.""",
'citation' : 'Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005',
'license' : None,
'size' : 3410}
}
def prompt_user():
def prompt_user(prompt):
"""Ask user for agreeing to data set licenses."""
# raw_input returns the empty string for "enter"
yes = set(['yes', 'y'])
no = set(['no','n'])
choice = ''
if ipython_notebook:
ipynb_input(choice, prompt='provide your answer here')
else:
try:
print(prompt)
choice = raw_input().lower()
# would like to test for exception here, but not sure if we can do that without importing IPython
except:
print('Stdin is not implemented.')
print('You need to set')
print('overide_manual_authorize=True')
print('to proceed with the download. Please set that variable and continue.')
raise
if choice in yes:
return True
elif choice in no:
return False
else:
sys.stdout.write("Please respond with 'yes', 'y' or 'no', 'n'")
return prompt_user()
print("Your response was a " + choice)
print("Please respond with 'yes', 'y' or 'no', 'n'")
#return prompt_user()
def data_available(dataset_name=None):
@ -170,7 +189,7 @@ def data_available(dataset_name=None):
if not os.path.exists(os.path.join(data_path, dataset_name, file)):
return False
return True
def download_url(url, store_directory, save_name = None, messages = True, suffix=''):
"""Download a file from a url and save it to disk."""
i = url.rfind('/')
@ -212,15 +231,14 @@ def authorize_download(dataset_name=None):
print('You must also agree to the following license:')
print(dr['license'])
print('')
print('Do you wish to proceed with the download? [yes/no]')
return prompt_user()
return prompt_user('Do you wish to proceed with the download? [yes/no]')
def download_data(dataset_name=None):
"""Check with the user that the are happy with terms and conditions for the data set, then download it."""
dr = data_resources[dataset_name]
if not authorize_download(dataset_name):
return False
raise Exception("Permission to download data set denied.")
if dr.has_key('suffices'):
for url, files, suffices in zip(dr['urls'], dr['files'], dr['suffices']):
@ -231,18 +249,18 @@ def download_data(dataset_name=None):
for file in files:
download_url(os.path.join(url,file), dataset_name, dataset_name)
return True
def data_details_return(data, data_set):
"""Update the data component of the data dictionary with details drawn from the data_resources."""
data.update(data_resources[data_set])
return data
def cmu_urls_files(subj_motions, messages = True):
'''
Find which resources are missing on the local disk for the requested CMU motion capture motions.
Find which resources are missing on the local disk for the requested CMU motion capture motions.
'''
subjects_num = subj_motions[0]
motions_num = subj_motions[1]
@ -262,15 +280,15 @@ def cmu_urls_files(subj_motions, messages = True):
motions[i].append(curMot)
all_skels = []
assert len(subjects) == len(motions)
all_motions = []
for i in range(len(subjects)):
skel_dir = os.path.join(data_path, 'cmu_mocap')
cur_skel_file = os.path.join(skel_dir, subjects[i] + '.asf')
url_required = False
file_download = []
if not os.path.exists(cur_skel_file):
@ -314,7 +332,7 @@ if gpxpy_available:
points = [point for track in gpx.tracks for segment in track.segments for point in segment.points]
data = [[(point.time-datetime.datetime(2013,8,21)).total_seconds(), point.latitude, point.longitude, point.elevation] for point in points]
X.append(np.asarray(data)[::sample_every, :])
gpx_file.close()
gpx_file.close()
return data_details_return({'X' : X, 'info' : 'Data is an array containing time in seconds, latitude, longitude and elevation in that order.'}, data_set)
del gpxpy_available
@ -390,7 +408,7 @@ def oil(data_set='three_phase_oil_flow'):
return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'Xtest' : Xtest, 'Xvalid': Xvalid, 'Yvalid': Yvalid}, data_set)
#else:
# throw an error
def oil_100(seed=default_seed, data_set = 'three_phase_oil_flow'):
np.random.seed(seed=seed)
data = oil()
@ -489,13 +507,13 @@ def ripley_synth(data_set='ripley_prnn_data'):
return data_details_return({'X': X, 'y': y, 'Xtest': Xtest, 'ytest': ytest, 'info': 'Synthetic data generated by Ripley for a two class classification problem.'}, data_set)
def osu_run1(data_set='osu_run1', sample_every=4):
path = os.path.join(data_path, data_set)
if not data_available(data_set):
download_data(data_set)
zip = zipfile.ZipFile(os.path.join(data_path, data_set, 'sprintTXT.ZIP'), 'r')
path = os.path.join(data_path, data_set)
for name in zip.namelist():
zip.extract(name, path)
Y, connect = GPy.util.mocap.load_text_data('Aug210107', path)
zip = zipfile.ZipFile(os.path.join(data_path, data_set, 'run1TXT.ZIP'), 'r')
for name in zip.namelist():
zip.extract(name, path)
Y, connect = GPy.util.mocap.load_text_data('Aug210106', path)
Y = Y[0:-1:sample_every, :]
return data_details_return({'Y': Y, 'connect' : connect}, data_set)
@ -579,8 +597,34 @@ def toy_linear_1d_classification(seed=default_seed):
X = (np.r_[x1, x2])[:, None]
return {'X': X, 'Y': sample_class(2.*X), 'F': 2.*X, 'seed' : seed}
def olympic_100m_men(data_set='rogers_girolami_data'):
def olivetti_faces(data_set='olivetti_faces'):
path = os.path.join(data_path, data_set)
if not data_available(data_set):
download_data(data_set)
zip = zipfile.ZipFile(os.path.join(path, 'att_faces.zip'), 'r')
for name in zip.namelist():
zip.extract(name, path)
Y = []
lbls = []
for subject in range(40):
for image in range(10):
image_path = os.path.join(path, 'orl_faces', 's'+str(subject+1), str(image+1) + '.pgm')
Y.append(GPy.util.netpbmfile.imread(image_path).flatten())
lbls.append(subject)
Y = np.asarray(Y)
lbls = np.asarray(lbls)[:, None]
return data_details_return({'Y': Y, 'lbls' : lbls, 'info': "ORL Faces processed to 64x64 images."}, data_set)
def xw_pen(data_set='xw_pen'):
if not data_available(data_set):
download_data(data_set)
Y = np.loadtxt(os.path.join(data_path, data_set, 'xw_pen_15.csv'), delimiter=',')
X = np.arange(485)[:, None]
return data_details_return({'Y': Y, 'X': X, 'info': "Tilt data from a personalized digital assistant pen. Plot in original paper showed regression between time steps 175 and 275."}, data_set)
def download_rogers_girolami_data():
if not data_available('rogers_girolami_data'):
download_data(data_set)
path = os.path.join(data_path, data_set)
tar_file = os.path.join(path, 'firstcoursemldata.tar.gz')
@ -588,6 +632,9 @@ def olympic_100m_men(data_set='rogers_girolami_data'):
print('Extracting file.')
tar.extractall(path=path)
tar.close()
def olympic_100m_men(data_set='rogers_girolami_data'):
download_rogers_girolami_data()
olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['male100']
X = olympic_data[:, 0][:, None]
@ -595,20 +642,45 @@ def olympic_100m_men(data_set='rogers_girolami_data'):
return data_details_return({'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m men from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
def olympic_100m_women(data_set='rogers_girolami_data'):
if not data_available(data_set):
download_data(data_set)
path = os.path.join(data_path, data_set)
tar_file = os.path.join(path, 'firstcoursemldata.tar.gz')
tar = tarfile.open(tar_file)
print('Extracting file.')
tar.extractall(path=path)
tar.close()
download_rogers_girolami_data()
olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['female100']
X = olympic_data[:, 0][:, None]
Y = olympic_data[:, 1][:, None]
return data_details_return({'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m women from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
def olympic_200m_women(data_set='rogers_girolami_data'):
download_rogers_girolami_data()
olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['female200']
X = olympic_data[:, 0][:, None]
Y = olympic_data[:, 1][:, None]
return data_details_return({'X': X, 'Y': Y, 'info': "Olympic 200 m winning times for women from 1896 until 2008. Data is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
def olympic_200m_men(data_set='rogers_girolami_data'):
download_rogers_girolami_data()
olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['male200']
X = olympic_data[:, 0][:, None]
Y = olympic_data[:, 1][:, None]
return data_details_return({'X': X, 'Y': Y, 'info': "Male 200 m winning times for women from 1896 until 2008. Data is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
def olympic_400m_women(data_set='rogers_girolami_data'):
download_rogers_girolami_data()
olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['female400']
X = olympic_data[:, 0][:, None]
Y = olympic_data[:, 1][:, None]
return data_details_return({'X': X, 'Y': Y, 'info': "Olympic 400 m winning times for women until 2008. Data is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
def olympic_400m_men(data_set='rogers_girolami_data'):
download_rogers_girolami_data()
olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['male400']
X = olympic_data[:, 0][:, None]
Y = olympic_data[:, 1][:, None]
return data_details_return({'X': X, 'Y': Y, 'info': "Male 400 m winning times for women until 2008. Data is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
def olympic_marathon_men(data_set='olympic_marathon_men'):
if not data_available(data_set):
download_data(data_set)
@ -617,6 +689,26 @@ def olympic_marathon_men(data_set='olympic_marathon_men'):
Y = olympics[:, 1:2]
return data_details_return({'X': X, 'Y': Y}, data_set)
def olympics():
"""All olympics sprint winning times for multiple output prediction."""
X = np.zeros((0, 2))
Y = np.zeros((0, 1))
for i, dataset in enumerate([olympic_100m_men,
olympic_100m_women,
olympic_200m_men,
olympic_200m_women,
olympic_400m_men,
olympic_400m_women]):
data = dataset()
year = data['X']
time = data['Y']
X = np.vstack((X, np.hstack((year, np.ones_like(year)*i))))
Y = np.vstack((Y, time))
data['X'] = X
data['Y'] = Y
data['info'] = "Olympics sprint event winning for men and women to 2008. Data is from Rogers and Girolami's First Course in Machine Learning."
return data
# def movielens_small(partNo=1,seed=default_seed):
# np.random.seed(seed=seed)

View file

@ -61,6 +61,14 @@ def dpotri(A, lower=0):
"""
return lapack.dpotri(A, lower=lower)
def pddet(A):
"""
Determinant of a positive definite matrix, only symmetric matricies though
"""
L = jitchol(A)
logdetA = 2*sum(np.log(np.diag(L)))
return logdetA
def trace_dot(a, b):
"""
Efficiently compute the trace of the matrix product of a and b
@ -325,6 +333,7 @@ def symmetrify(A, upper=False):
"""
N, M = A.shape
assert N == M
c_contig_code = """
int iN;
for (int i=1; i<N; i++){
@ -343,6 +352,8 @@ def symmetrify(A, upper=False):
}
}
"""
N = int(N) # for safe type casting
if A.flags['C_CONTIGUOUS'] and upper:
weave.inline(f_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
elif A.flags['C_CONTIGUOUS'] and not upper:
@ -403,4 +414,3 @@ def backsub_both_sides(L, X, transpose='left'):
else:
tmp, _ = lapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=0)
return lapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=0)[0].T

View file

@ -3,6 +3,34 @@
import numpy as np
from scipy import weave
from config import *
def chain_1(df_dg, dg_dx):
"""
Generic chaining function for first derivative
.. math::
\\frac{d(f . g)}{dx} = \\frac{df}{dg} \\frac{dg}{dx}
"""
return df_dg * dg_dx
def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
"""
Generic chaining function for second derivative
.. math::
\\frac{d^{2}(f . g)}{dx^{2}} = \\frac{d^{2}f}{dg^{2}}(\\frac{dg}{dx})^{2} + \\frac{df}{dg}\\frac{d^{2}g}{dx^{2}}
"""
return d2f_dg2*(dg_dx**2) + df_dg*d2g_dx2
def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3):
"""
Generic chaining function for third derivative
.. math::
\\frac{d^{3}(f . g)}{dx^{3}} = \\frac{d^{3}f}{dg^{3}}(\\frac{dg}{dx})^{3} + 3\\frac{d^{2}f}{dg^{2}}\\frac{dg}{dx}\\frac{d^{2}g}{dx^{2}} + \\frac{df}{dg}\\frac{d^{3}g}{dx^{3}}
"""
return d3f_dg3*(dg_dx**3) + 3*d2f_dg2*dg_dx*d2g_dx2 + df_dg*d3g_dx3
def opt_wrapper(m, **kwargs):
"""
@ -57,11 +85,18 @@ def kmm_init(X, m = 10):
return X[inducing]
def fast_array_equal(A, B):
if config.getboolean('parallel', 'openmp'):
pragma_string = '#pragma omp parallel for private(i, j)'
else:
pragma_string = ''
code2="""
int i, j;
return_val = 1;
#pragma omp parallel for private(i, j)
%s
for(i=0;i<N;i++){
for(j=0;j<D;j++){
if(A(i, j) != B(i, j)){
@ -70,13 +105,18 @@ def fast_array_equal(A, B):
}
}
}
"""
""" % pragma_string
if config.getboolean('parallel', 'openmp'):
pragma_string = '#pragma omp parallel for private(i, j, z)'
else:
pragma_string = ''
code3="""
int i, j, z;
return_val = 1;
#pragma omp parallel for private(i, j, z)
%s
for(i=0;i<N;i++){
for(j=0;j<D;j++){
for(z=0;z<Q;z++){
@ -87,35 +127,48 @@ def fast_array_equal(A, B):
}
}
}
"""
""" % pragma_string
if config.getboolean('parallel', 'openmp'):
pragma_string = '#include <omp.h>'
else:
pragma_string = ''
support_code = """
#include <omp.h>
%s
#include <math.h>
"""
""" % pragma_string
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'],
'extra_link_args' : ['-lgomp']}
weave_options_openmp = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'],
'extra_link_args' : ['-lgomp'],
'libraries': ['gomp']}
weave_options_noopenmp = {'extra_compile_args': ['-O3']}
if config.getboolean('parallel', 'openmp'):
weave_options = weave_options_openmp
else:
weave_options = weave_options_noopenmp
value = False
if (A == None) and (B == None):
return True
elif ((A == None) and (B != None)) or ((A != None) and (B == None)):
return False
elif A.shape == B.shape:
if A.ndim == 2:
N, D = A.shape
value = weave.inline(code2, support_code=support_code, libraries=['gomp'],
N, D = [int(i) for i in A.shape]
value = weave.inline(code2, support_code=support_code,
arg_names=['A', 'B', 'N', 'D'],
type_converters=weave.converters.blitz,**weave_options)
type_converters=weave.converters.blitz, **weave_options)
elif A.ndim == 3:
N, D, Q = A.shape
value = weave.inline(code3, support_code=support_code, libraries=['gomp'],
N, D, Q = [int(i) for i in A.shape]
value = weave.inline(code3, support_code=support_code,
arg_names=['A', 'B', 'N', 'D', 'Q'],
type_converters=weave.converters.blitz,**weave_options)
type_converters=weave.converters.blitz, **weave_options)
else:
value = np.array_equal(A,B)

331
GPy/util/netpbmfile.py Normal file
View file

@ -0,0 +1,331 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# netpbmfile.py
# Copyright (c) 2011-2013, Christoph Gohlke
# Copyright (c) 2011-2013, The Regents of the University of California
# Produced at the Laboratory for Fluorescence Dynamics.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the copyright holders nor the names of any
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""Read and write image data from respectively to Netpbm files.
This implementation follows the Netpbm format specifications at
http://netpbm.sourceforge.net/doc/. No gamma correction is performed.
The following image formats are supported: PBM (bi-level), PGM (grayscale),
PPM (color), PAM (arbitrary), XV thumbnail (RGB332, read-only).
:Author:
`Christoph Gohlke <http://www.lfd.uci.edu/~gohlke/>`_
:Organization:
Laboratory for Fluorescence Dynamics, University of California, Irvine
:Version: 2013.01.18
Requirements
------------
* `CPython 2.7, 3.2 or 3.3 <http://www.python.org>`_
* `Numpy 1.7 <http://www.numpy.org>`_
* `Matplotlib 1.2 <http://www.matplotlib.org>`_ (optional for plotting)
Examples
--------
>>> im1 = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16)
>>> imsave('_tmp.pgm', im1)
>>> im2 = imread('_tmp.pgm')
>>> assert numpy.all(im1 == im2)
"""
from __future__ import division, print_function
import sys
import re
import math
from copy import deepcopy
import numpy
__version__ = '2013.01.18'
__docformat__ = 'restructuredtext en'
__all__ = ['imread', 'imsave', 'NetpbmFile']
def imread(filename, *args, **kwargs):
"""Return image data from Netpbm file as numpy array.
`args` and `kwargs` are arguments to NetpbmFile.asarray().
Examples
--------
>>> image = imread('_tmp.pgm')
"""
try:
netpbm = NetpbmFile(filename)
image = netpbm.asarray()
finally:
netpbm.close()
return image
def imsave(filename, data, maxval=None, pam=False):
"""Write image data to Netpbm file.
Examples
--------
>>> image = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16)
>>> imsave('_tmp.pgm', image)
"""
try:
netpbm = NetpbmFile(data, maxval=maxval)
netpbm.write(filename, pam=pam)
finally:
netpbm.close()
class NetpbmFile(object):
"""Read and write Netpbm PAM, PBM, PGM, PPM, files."""
_types = {b'P1': b'BLACKANDWHITE', b'P2': b'GRAYSCALE', b'P3': b'RGB',
b'P4': b'BLACKANDWHITE', b'P5': b'GRAYSCALE', b'P6': b'RGB',
b'P7 332': b'RGB', b'P7': b'RGB_ALPHA'}
def __init__(self, arg=None, **kwargs):
"""Initialize instance from filename, open file, or numpy array."""
for attr in ('header', 'magicnum', 'width', 'height', 'maxval',
'depth', 'tupltypes', '_filename', '_fh', '_data'):
setattr(self, attr, None)
if arg is None:
self._fromdata([], **kwargs)
elif isinstance(arg, basestring):
self._fh = open(arg, 'rb')
self._filename = arg
self._fromfile(self._fh, **kwargs)
elif hasattr(arg, 'seek'):
self._fromfile(arg, **kwargs)
self._fh = arg
else:
self._fromdata(arg, **kwargs)
def asarray(self, copy=True, cache=False, **kwargs):
"""Return image data from file as numpy array."""
data = self._data
if data is None:
data = self._read_data(self._fh, **kwargs)
if cache:
self._data = data
else:
return data
return deepcopy(data) if copy else data
def write(self, arg, **kwargs):
"""Write instance to file."""
if hasattr(arg, 'seek'):
self._tofile(arg, **kwargs)
else:
with open(arg, 'wb') as fid:
self._tofile(fid, **kwargs)
def close(self):
"""Close open file. Future asarray calls might fail."""
if self._filename and self._fh:
self._fh.close()
self._fh = None
def __del__(self):
self.close()
def _fromfile(self, fh):
"""Initialize instance from open file."""
fh.seek(0)
data = fh.read(4096)
if (len(data) < 7) or not (b'0' < data[1:2] < b'8'):
raise ValueError("Not a Netpbm file:\n%s" % data[:32])
try:
self._read_pam_header(data)
except Exception:
try:
self._read_pnm_header(data)
except Exception:
raise ValueError("Not a Netpbm file:\n%s" % data[:32])
def _read_pam_header(self, data):
"""Read PAM header and initialize instance."""
regroups = re.search(
b"(^P7[\n\r]+(?:(?:[\n\r]+)|(?:#.*)|"
b"(HEIGHT\s+\d+)|(WIDTH\s+\d+)|(DEPTH\s+\d+)|(MAXVAL\s+\d+)|"
b"(?:TUPLTYPE\s+\w+))*ENDHDR\n)", data).groups()
self.header = regroups[0]
self.magicnum = b'P7'
for group in regroups[1:]:
key, value = group.split()
setattr(self, unicode(key).lower(), int(value))
matches = re.findall(b"(TUPLTYPE\s+\w+)", self.header)
self.tupltypes = [s.split(None, 1)[1] for s in matches]
def _read_pnm_header(self, data):
"""Read PNM header and initialize instance."""
bpm = data[1:2] in b"14"
regroups = re.search(b"".join((
b"(^(P[123456]|P7 332)\s+(?:#.*[\r\n])*",
b"\s*(\d+)\s+(?:#.*[\r\n])*",
b"\s*(\d+)\s+(?:#.*[\r\n])*" * (not bpm),
b"\s*(\d+)\s(?:\s*#.*[\r\n]\s)*)")), data).groups() + (1, ) * bpm
self.header = regroups[0]
self.magicnum = regroups[1]
self.width = int(regroups[2])
self.height = int(regroups[3])
self.maxval = int(regroups[4])
self.depth = 3 if self.magicnum in b"P3P6P7 332" else 1
self.tupltypes = [self._types[self.magicnum]]
def _read_data(self, fh, byteorder='>'):
"""Return image data from open file as numpy array."""
fh.seek(len(self.header))
data = fh.read()
dtype = 'u1' if self.maxval < 256 else byteorder + 'u2'
depth = 1 if self.magicnum == b"P7 332" else self.depth
shape = [-1, self.height, self.width, depth]
size = numpy.prod(shape[1:])
if self.magicnum in b"P1P2P3":
data = numpy.array(data.split(None, size)[:size], dtype)
data = data.reshape(shape)
elif self.maxval == 1:
shape[2] = int(math.ceil(self.width / 8))
data = numpy.frombuffer(data, dtype).reshape(shape)
data = numpy.unpackbits(data, axis=-2)[:, :, :self.width, :]
else:
data = numpy.frombuffer(data, dtype)
data = data[:size * (data.size // size)].reshape(shape)
if data.shape[0] < 2:
data = data.reshape(data.shape[1:])
if data.shape[-1] < 2:
data = data.reshape(data.shape[:-1])
if self.magicnum == b"P7 332":
rgb332 = numpy.array(list(numpy.ndindex(8, 8, 4)), numpy.uint8)
rgb332 *= [36, 36, 85]
data = numpy.take(rgb332, data, axis=0)
return data
def _fromdata(self, data, maxval=None):
"""Initialize instance from numpy array."""
data = numpy.array(data, ndmin=2, copy=True)
if data.dtype.kind not in "uib":
raise ValueError("not an integer type: %s" % data.dtype)
if data.dtype.kind == 'i' and numpy.min(data) < 0:
raise ValueError("data out of range: %i" % numpy.min(data))
if maxval is None:
maxval = numpy.max(data)
maxval = 255 if maxval < 256 else 65535
if maxval < 0 or maxval > 65535:
raise ValueError("data out of range: %i" % maxval)
data = data.astype('u1' if maxval < 256 else '>u2')
self._data = data
if data.ndim > 2 and data.shape[-1] in (3, 4):
self.depth = data.shape[-1]
self.width = data.shape[-2]
self.height = data.shape[-3]
self.magicnum = b'P7' if self.depth == 4 else b'P6'
else:
self.depth = 1
self.width = data.shape[-1]
self.height = data.shape[-2]
self.magicnum = b'P5' if maxval > 1 else b'P4'
self.maxval = maxval
self.tupltypes = [self._types[self.magicnum]]
self.header = self._header()
def _tofile(self, fh, pam=False):
"""Write Netbm file."""
fh.seek(0)
fh.write(self._header(pam))
data = self.asarray(copy=False)
if self.maxval == 1:
data = numpy.packbits(data, axis=-1)
data.tofile(fh)
def _header(self, pam=False):
"""Return file header as byte string."""
if pam or self.magicnum == b'P7':
header = "\n".join((
"P7",
"HEIGHT %i" % self.height,
"WIDTH %i" % self.width,
"DEPTH %i" % self.depth,
"MAXVAL %i" % self.maxval,
"\n".join("TUPLTYPE %s" % unicode(i) for i in self.tupltypes),
"ENDHDR\n"))
elif self.maxval == 1:
header = "P4 %i %i\n" % (self.width, self.height)
elif self.depth == 1:
header = "P5 %i %i %i\n" % (self.width, self.height, self.maxval)
else:
header = "P6 %i %i %i\n" % (self.width, self.height, self.maxval)
if sys.version_info[0] > 2:
header = bytes(header, 'ascii')
return header
def __str__(self):
"""Return information about instance."""
return unicode(self.header)
if sys.version_info[0] > 2:
basestring = str
unicode = lambda x: str(x, 'ascii')
if __name__ == "__main__":
# Show images specified on command line or all images in current directory
from glob import glob
from matplotlib import pyplot
files = sys.argv[1:] if len(sys.argv) > 1 else glob('*.p*m')
for fname in files:
try:
pam = NetpbmFile(fname)
img = pam.asarray(copy=False)
if False:
pam.write('_tmp.pgm.out', pam=True)
img2 = imread('_tmp.pgm.out')
assert numpy.all(img == img2)
imsave('_tmp.pgm.out', img)
img2 = imread('_tmp.pgm.out')
assert numpy.all(img == img2)
pam.close()
except ValueError as e:
print(fname, e)
continue
_shape = img.shape
if img.ndim > 3 or (img.ndim > 2 and img.shape[-1] not in (3, 4)):
img = img[0]
cmap = 'gray' if pam.maxval > 1 else 'binary'
pyplot.imshow(img, cmap, interpolation='nearest')
pyplot.title("%s %s %s %s" % (fname, unicode(pam.magicnum),
_shape, img.dtype))
pyplot.show()

View file

@ -1,32 +1,113 @@
from sympy import Function, S, oo, I, cos, sin
from sympy import Function, S, oo, I, cos, sin, asin, log, erf,pi,exp
class ln_diff_erf(Function):
nargs = 2
def fdiff(self, argindex=2):
if argindex == 2:
x0, x1 = self.args
return -2*exp(-x1**2)/(sqrt(pi)*(erf(x0)-erf(x1)))
elif argindex == 1:
x0, x1 = self.args
return 2*exp(-x0**2)/(sqrt(pi)*(erf(x0)-erf(x1)))
else:
raise ArgumentIndexError(self, argindex)
@classmethod
def eval(cls, x0, x1):
if x0.is_Number and x1.is_Number:
return log(erf(x0)-erf(x1))
class sim_h(Function):
nargs = 5
def fdiff(self, argindex=1):
pass
@classmethod
def eval(cls, t, tprime, d_i, d_j, l):
# putting in the is_Number stuff forces it to look for a fdiff method for derivative.
if (t.is_Number
and tprime.is_Number
and d_i.is_Number
and d_j.is_Number
and l.is_Number):
if (t is S.NaN
or tprime is S.NaN
or d_i is S.NaN
or d_j is S.NaN
or l is S.NaN):
return S.NaN
else:
return (exp((d_j/2*l)**2)/(d_i+d_j)
*(exp(-d_j*(tprime - t))
*(erf((tprime-t)/l - d_j/2*l)
+ erf(t/l + d_j/2*l))
- exp(-(d_j*tprime + d_i))
*(erf(tprime/l - d_j/2*l)
+ erf(d_j/2*l))))
class erfc(Function):
nargs = 1
@classmethod
def eval(cls, arg):
return 1-erf(arg)
class erfcx(Function):
nargs = 1
@classmethod
def eval(cls, arg):
return erfc(arg)*exp(arg*arg)
class sinc_grad(Function):
nargs = 1
def fdiff(self, argindex=1):
return ((2-x*x)*sin(self.args[0]) - 2*x*cos(x))/(x*x*x)
if argindex==1:
# Strictly speaking this should be computed separately, as it won't work when x=0. See http://calculus.subwiki.org/wiki/Sinc_function
return ((2-x*x)*sin(self.args[0]) - 2*x*cos(x))/(x*x*x)
else:
raise ArgumentIndexError(self, argindex)
@classmethod
def eval(cls, x):
if x is S.Zero:
return S.Zero
else:
return (x*cos(x) - sin(x))/(x*x)
if x.is_Number:
if x is S.NaN:
return S.NaN
elif x is S.Zero:
return S.Zero
else:
return (x*cos(x) - sin(x))/(x*x)
class sinc(Function):
nargs = 1
def fdiff(self, argindex=1):
return sinc_grad(self.args[0])
if argindex==1:
return sinc_grad(self.args[0])
else:
raise ArgumentIndexError(self, argindex)
@classmethod
def eval(cls, x):
if x is S.Zero:
return S.One
else:
return sin(x)/x
def eval(cls, arg):
if arg.is_Number:
if arg is S.NaN:
return S.NaN
elif arg is S.Zero:
return S.One
else:
return sin(arg)/arg
if arg.func is asin:
x = arg.args[0]
return x / arg
def _eval_is_real(self):
return self.args[0].is_real

View file

@ -13,24 +13,32 @@ def std_norm_cdf(x):
Cumulative standard Gaussian distribution
Based on Abramowitz, M. and Stegun, I. (1970)
"""
#Generalize for many x
x = np.asarray(x).copy()
cdf_x = np.zeros_like(x)
N = x.size
support_code = "#include <math.h>"
code = """
double sign = 1.0;
if (x < 0.0){
sign = -1.0;
x = -x;
double sign, t, erf;
for (int i=0; i<N; i++){
sign = 1.0;
if (x[i] < 0.0){
sign = -1.0;
x[i] = -x[i];
}
x[i] = x[i]/sqrt(2.0);
t = 1.0/(1.0 + 0.3275911*x[i]);
erf = 1. - exp(-x[i]*x[i])*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))));
//return_val = 0.5*(1.0 + sign*erf);
cdf_x[i] = 0.5*(1.0 + sign*erf);
}
x = x/sqrt(2.0);
double t = 1.0/(1.0 + 0.3275911*x);
double erf = 1. - exp(-x*x)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))));
return_val = 0.5*(1.0 + sign*erf);
"""
x = float(x)
return weave.inline(code,arg_names=['x'],support_code=support_code)
weave.inline(code, arg_names=['x', 'cdf_x', 'N'], support_code=support_code)
return cdf_x
def inv_std_norm_cdf(x):
"""

View file

@ -246,17 +246,36 @@ class lvm_dimselect(lvm):
class image_show(matplotlib_show):
"""Show a data vector as an image."""
def __init__(self, vals, axes=None, dimensions=(16,16), transpose=False, invert=False, scale=False, palette=[], presetMean = 0., presetSTD = -1., selectImage=0):
"""Show a data vector as an image. This visualizer rehapes the output vector and displays it as an image.
:param vals: the values of the output to display.
:type vals: ndarray
:param axes: the axes to show the output on.
:type vals: axes handle
:param dimensions: the dimensions that the image needs to be transposed to for display.
:type dimensions: tuple
:param transpose: whether to transpose the image before display.
:type bool: default is False.
:param order: whether array is in Fortan ordering ('F') or Python ordering ('C'). Default is python ('C').
:type order: string
:param invert: whether to invert the pixels or not (default False).
:type invert: bool
:param palette: a palette to use for the image.
:param preset_mean: the preset mean of a scaled image.
:type preset_mean: double
:param preset_std: the preset standard deviation of a scaled image.
:type preset_std: double"""
def __init__(self, vals, axes=None, dimensions=(16,16), transpose=False, order='C', invert=False, scale=False, palette=[], preset_mean = 0., preset_std = -1., select_image=0):
matplotlib_show.__init__(self, vals, axes)
self.dimensions = dimensions
self.transpose = transpose
self.order = order
self.invert = invert
self.scale = scale
self.palette = palette
self.presetMean = presetMean
self.presetSTD = presetSTD
self.selectImage = selectImage # This is used when the y vector contains multiple images concatenated.
self.preset_mean = preset_mean
self.preset_std = preset_std
self.select_image = select_image # This is used when the y vector contains multiple images concatenated.
self.set_image(self.vals)
if not self.palette == []: # Can just show the image (self.set_image() took care of setting the palette)
@ -272,22 +291,22 @@ class image_show(matplotlib_show):
def set_image(self, vals):
dim = self.dimensions[0] * self.dimensions[1]
nImg = np.sqrt(vals[0,].size/dim)
if nImg > 1 and nImg.is_integer(): # Show a mosaic of images
nImg = np.int(nImg)
self.vals = np.zeros((self.dimensions[0]*nImg, self.dimensions[1]*nImg))
for iR in range(nImg):
for iC in range(nImg):
currImgId = iR*nImg + iC
currImg = np.reshape(vals[0,dim*currImgId+np.array(range(dim))], self.dimensions, order='F')
firstRow = iR*self.dimensions[0]
lastRow = (iR+1)*self.dimensions[0]
firstCol = iC*self.dimensions[1]
lastCol = (iC+1)*self.dimensions[1]
self.vals[firstRow:lastRow, firstCol:lastCol] = currImg
num_images = np.sqrt(vals[0,].size/dim)
if num_images > 1 and num_images.is_integer(): # Show a mosaic of images
num_images = np.int(num_images)
self.vals = np.zeros((self.dimensions[0]*num_images, self.dimensions[1]*num_images))
for iR in range(num_images):
for iC in range(num_images):
cur_img_id = iR*num_images + iC
cur_img = np.reshape(vals[0,dim*cur_img_id+np.array(range(dim))], self.dimensions, order=self.order)
first_row = iR*self.dimensions[0]
last_row = (iR+1)*self.dimensions[0]
first_col = iC*self.dimensions[1]
last_col = (iC+1)*self.dimensions[1]
self.vals[first_row:last_row, first_col:last_col] = cur_img
else:
self.vals = np.reshape(vals[0,dim*self.selectImage+np.array(range(dim))], self.dimensions, order='F')
self.vals = np.reshape(vals[0,dim*self.select_image+np.array(range(dim))], self.dimensions, order=self.order)
if self.transpose:
self.vals = self.vals.T
# if not self.scale:
@ -296,8 +315,8 @@ class image_show(matplotlib_show):
self.vals = -self.vals
# un-normalizing, for visualisation purposes:
if self.presetSTD >= 0: # The Mean is assumed to be in the range (0,255)
self.vals = self.vals*self.presetSTD + self.presetMean
if self.preset_std >= 0: # The Mean is assumed to be in the range (0,255)
self.vals = self.vals*self.preset_std + self.preset_mean
# Clipping the values:
self.vals[self.vals < 0] = 0
self.vals[self.vals > 255] = 255

View file

@ -20,6 +20,14 @@ GPy.examples.dimensionality_reduction module
:undoc-members:
:show-inheritance:
GPy.examples.laplace_approximations module
------------------------------------------
.. automodule:: GPy.examples.laplace_approximations
:members:
:undoc-members:
:show-inheritance:
GPy.examples.regression module
------------------------------

View file

@ -28,6 +28,14 @@ GPy.kern.parts.Matern52 module
:undoc-members:
:show-inheritance:
GPy.kern.parts.ODE_1 module
---------------------------
.. automodule:: GPy.kern.parts.ODE_1
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.bias module
--------------------------
@ -44,6 +52,14 @@ GPy.kern.parts.coregionalize module
:undoc-members:
:show-inheritance:
GPy.kern.parts.eq_ode1 module
-----------------------------
.. automodule:: GPy.kern.parts.eq_ode1
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.exponential module
---------------------------------

View file

@ -4,10 +4,10 @@ GPy.likelihoods.noise_models package
Submodules
----------
GPy.likelihoods.noise_models.binomial_noise module
--------------------------------------------------
GPy.likelihoods.noise_models.bernoulli_noise module
---------------------------------------------------
.. automodule:: GPy.likelihoods.noise_models.binomial_noise
.. automodule:: GPy.likelihoods.noise_models.bernoulli_noise
:members:
:undoc-members:
:show-inheritance:
@ -60,6 +60,14 @@ GPy.likelihoods.noise_models.poisson_noise module
:undoc-members:
:show-inheritance:
GPy.likelihoods.noise_models.student_t_noise module
---------------------------------------------------
.. automodule:: GPy.likelihoods.noise_models.student_t_noise
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------

View file

@ -43,6 +43,14 @@ GPy.likelihoods.gaussian_mixed_noise module
:undoc-members:
:show-inheritance:
GPy.likelihoods.laplace module
------------------------------
.. automodule:: GPy.likelihoods.laplace
:members:
:undoc-members:
:show-inheritance:
GPy.likelihoods.likelihood module
---------------------------------

View file

@ -4,6 +4,14 @@ GPy.testing package
Submodules
----------
GPy.testing.bcgplvm_tests module
--------------------------------
.. automodule:: GPy.testing.bcgplvm_tests
:members:
:undoc-members:
:show-inheritance:
GPy.testing.bgplvm_tests module
-------------------------------
@ -28,6 +36,14 @@ GPy.testing.examples_tests module
:undoc-members:
:show-inheritance:
GPy.testing.gp_transformation_tests module
------------------------------------------
.. automodule:: GPy.testing.gp_transformation_tests
:members:
:undoc-members:
:show-inheritance:
GPy.testing.gplvm_tests module
------------------------------
@ -44,6 +60,14 @@ GPy.testing.kernel_tests module
:undoc-members:
:show-inheritance:
GPy.testing.likelihoods_tests module
------------------------------------
.. automodule:: GPy.testing.likelihoods_tests
:members:
:undoc-members:
:show-inheritance:
GPy.testing.mapping_tests module
--------------------------------

View file

@ -27,6 +27,14 @@ GPy.util.classification module
:undoc-members:
:show-inheritance:
GPy.util.config module
----------------------
.. automodule:: GPy.util.config
:members:
:undoc-members:
:show-inheritance:
GPy.util.datasets module
------------------------
@ -43,6 +51,14 @@ GPy.util.decorators module
:undoc-members:
:show-inheritance:
GPy.util.erfcx module
---------------------
.. automodule:: GPy.util.erfcx
:members:
:undoc-members:
:show-inheritance:
GPy.util.linalg module
----------------------
@ -51,6 +67,14 @@ GPy.util.linalg module
:undoc-members:
:show-inheritance:
GPy.util.ln_diff_erfs module
----------------------------
.. automodule:: GPy.util.ln_diff_erfs
:members:
:undoc-members:
:show-inheritance:
GPy.util.misc module
--------------------
@ -75,6 +99,14 @@ GPy.util.multioutput module
:undoc-members:
:show-inheritance:
GPy.util.netpbmfile module
--------------------------
.. automodule:: GPy.util.netpbmfile
:members:
:undoc-members:
:show-inheritance:
GPy.util.plot module
--------------------
@ -99,6 +131,14 @@ GPy.util.squashers module
:undoc-members:
:show-inheritance:
GPy.util.symbolic module
------------------------
.. automodule:: GPy.util.symbolic
:members:
:undoc-members:
:show-inheritance:
GPy.util.univariate_Gaussian module
-----------------------------------