Merged with devel

This commit is contained in:
Alan Saul 2013-10-25 12:30:20 +01:00
commit ba1cf96cb1
10 changed files with 101 additions and 394 deletions

View file

@ -27,12 +27,6 @@ class GP(GPBase):
GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self.update_likelihood_approximation()
def getstate(self):
return GPBase.getstate(self)
def setstate(self, state):
GPBase.setstate(self, state)
self._set_params(self._get_params())
def _set_params(self, p):
new_kern_params = p[:self.kern.num_params_transformed()]
@ -200,3 +194,11 @@ class GP(GPBase):
"""
Xnew = self._add_output_index(Xnew, output)
return self.predict(Xnew, which_parts=which_parts, full_cov=full_cov, likelihood_args=likelihood_args)
def getstate(self):
return GPBase.getstate(self)
def setstate(self, state):
GPBase.setstate(self, state)
self._set_params(self._get_params())

View file

@ -9,7 +9,9 @@ from ..likelihoods import Gaussian, Gaussian_Mixed_Noise
class GPBase(Model):
"""
Gaussian process base model for holding shared behaviour between
sparse_GP and GP models.
sparse_GP and GP models, and potentially other models in the future.
Here we define some functions that are use
"""
def __init__(self, X, likelihood, kernel, normalize_X=False):
self.X = X
@ -34,29 +36,6 @@ class GPBase(Model):
# All leaf nodes should call self._set_params(self._get_params()) at
# the end
def getstate(self):
"""
Get the current state of the class, here we return everything that is needed to recompute the model.
"""
return Model.getstate(self) + [self.X,
self.num_data,
self.input_dim,
self.kern,
self.likelihood,
self.output_dim,
self._Xoffset,
self._Xscale]
def setstate(self, state):
self._Xscale = state.pop()
self._Xoffset = state.pop()
self.output_dim = state.pop()
self.likelihood = state.pop()
self.kern = state.pop()
self.input_dim = state.pop()
self.num_data = state.pop()
self.X = state.pop()
Model.setstate(self, state)
def posterior_samples_f(self,X,size=10,which_parts='all',full_cov=True):
"""
@ -269,155 +248,33 @@ class GPBase(Model):
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def plot_single_output_f(self, output=None, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
def getstate(self):
"""
For a specific output, in a multioutput model, this function works just as plot_f on single output models.
:param output: which output to plot (for multiple output models only)
:type output: integer (first output is 0)
:param samples: the number of a posteriori samples to plot
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param full_cov:
:type full_cov: bool
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
Get the curent state of the class. This is only used to efficiently
pickle the model. See also self.setstate
"""
assert output is not None, "An output must be specified."
assert len(self.likelihood.noise_model_list) > output, "The model has only %s outputs." %(self.output_dim + 1)
return Model.getstate(self) + [self.X,
self.num_data,
self.input_dim,
self.kern,
self.likelihood,
self.output_dim,
self._Xoffset,
self._Xscale]
if which_data == 'all':
which_data = slice(None)
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if self.X.shape[1] == 2:
Xu = self.X[self.X[:,-1]==output ,0:1]
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
Xnew_indexed = self._add_output_index(Xnew,output)
m, v = self._raw_predict(Xnew_indexed, which_parts=which_parts)
if samples:
Ysim = self.posterior_samples_f(Xnew_indexed, samples, which_parts=which_parts, full_cov=True)
for yi in Ysim.T:
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5)
ax.set_xlim(xmin, xmax)
ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 3:
raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet"
#if samples:
# warnings.warn("Samples only implemented for 1 dimensional inputs.")
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def plot_single_output(self, output=None, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
def setstate(self, state):
"""
For a specific output, in a multioutput model, this function works just as plot_f on single output models.
:param output: which output to plot (for multiple output models only)
:type output: integer (first output is 0)
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param levels: number of levels to plot in a contour plot.
:type levels: int
:param samples: the number of a posteriori samples to plot
:type samples: int
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:type output: integer (first output is 0)
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param linecol: color of line to plot.
:type linecol:
:param fillcol: color of fill
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
Set the state of the model. Used for efficient pickling
"""
assert output is not None, "An output must be specified."
assert len(self.likelihood.noise_model_list) > output, "The model has only %s outputs." %(self.output_dim + 1)
if which_data == 'all':
which_data = slice(None)
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if self.X.shape[1] == 2:
resolution = resolution or 200
Xu = self.X[self.X[:,-1]==output,:] #keep the output of interest
Xu = self.X * self._Xscale + self._Xoffset
Xu = self.X[self.X[:,-1]==output ,0:1] #get rid of the index column
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
Xnew_indexed = self._add_output_index(Xnew,output)
m, v, lower, upper = self.predict(Xnew_indexed, which_parts=which_parts,noise_model=output)
if samples: #NOTE not tested with fixed_inputs
Ysim = self.posterior_samples(Xnew_indexed, samples, which_parts=which_parts, full_cov=True,noise_model=output)
for yi in Ysim.T:
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
for d in range(m.shape[1]):
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
ax.plot(Xu[which_data], self.likelihood.noise_model_list[output].data, 'kx', mew=1.5)
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 3:
raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet"
#if samples:
# warnings.warn("Samples only implemented for 1 dimensional inputs.")
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def _add_output_index(self,X,output):
"""
In a multioutput model, appends an index column to X to specify the output it is related to.
:param X: Input data
:type X: np.ndarray, N x self.input_dim
:param output: output X is related to
:type output: integer in {0,..., output_dim-1}
.. Note:: For multiple non-independent outputs models only.
"""
assert hasattr(self,'multioutput'), 'This function is for multiple output models only.'
index = np.ones((X.shape[0],1))*output
return np.hstack((X,index))
self._Xscale = state.pop()
self._Xoffset = state.pop()
self.output_dim = state.pop()
self.likelihood = state.pop()
self.kern = state.pop()
self.input_dim = state.pop()
self.num_data = state.pop()
self.X = state.pop()
Model.setstate(self, state)
def log_predictive_density(self, x_test, y_test):
"""

View file

@ -52,23 +52,6 @@ class SparseGP(GPBase):
self._const_jitter = None
def getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return GPBase.getstate(self) + [self.Z,
self.num_inducing,
self.has_uncertain_inputs,
self.X_variance]
def setstate(self, state):
self.X_variance = state.pop()
self.has_uncertain_inputs = state.pop()
self.num_inducing = state.pop()
self.Z = state.pop()
GPBase.setstate(self, state)
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z)
@ -87,7 +70,6 @@ class SparseGP(GPBase):
# factor Kmm
self._Lm = jitchol(self.Kmm + self._const_jitter)
# TODO: no white kernel needed anymore, all noise in likelihood --------
# The rather complex computations of self._A
if self.has_uncertain_inputs:
@ -421,145 +403,21 @@ class SparseGP(GPBase):
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False):
def getstate(self):
"""
For a specific output, predict the function at the new point(s) Xnew.
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param output: output to predict
:type output: integer in {0,..., num_outputs-1}
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
.. Note:: For multiple output models only
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return GPBase.getstate(self) + [self.Z,
self.num_inducing,
self.has_uncertain_inputs,
self.X_variance]
assert hasattr(self,'multioutput')
index = np.ones_like(Xnew)*output
Xnew = np.hstack((Xnew,index))
# normalize X values
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output)
return mean, var, _025pm, _975pm
def _raw_predict_single_output(self, _Xnew, output=0, X_variance_new=None, which_parts='all', full_cov=False,stop=False):
"""
Internal helper function for making predictions for a specific output,
does not account for normalization or likelihood
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param output: output to predict
:type output: integer in {0,..., num_outputs-1}
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just the diagonal
.. Note:: For multiple output models only
"""
Bi, _ = dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work!
symmetrify(Bi)
Kmmi_LmiBLmi = backsub_both_sides(self._Lm, np.eye(self.num_inducing) - Bi)
if self.Cpsi1V is None:
psi1V = np.dot(self.psi1.T,self.likelihood.V)
tmp, _ = dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0)
tmp, _ = dpotrs(self.LB, tmp, lower=1)
self.Cpsi1V, _ = dtrtrs(self._Lm, tmp, lower=1, trans=1)
assert hasattr(self,'multioutput')
index = np.ones_like(_Xnew)*output
_Xnew = np.hstack((_Xnew,index))
if X_variance_new is None:
Kx = self.kern.K(self.Z, _Xnew, which_parts=which_parts)
mu = np.dot(Kx.T, self.Cpsi1V)
if full_cov:
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
else:
Kx = self.kern.psi1(self.Z, _Xnew, X_variance_new)
mu = np.dot(Kx, self.Cpsi1V)
if full_cov:
raise NotImplementedError, "TODO"
else:
Kxx = self.kern.psi0(self.Z, _Xnew, X_variance_new)
psi2 = self.kern.psi2(self.Z, _Xnew, X_variance_new)
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
return mu, var[:, None]
def setstate(self, state):
self.X_variance = state.pop()
self.has_uncertain_inputs = state.pop()
self.num_inducing = state.pop()
self.Z = state.pop()
GPBase.setstate(self, state)
def plot_single_output_f(self, output=None, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if fignum is None and ax is None:
fignum = fig.num
if which_data is 'all':
which_data = slice(None)
GPBase.plot_single_output_f(self, output=output, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, full_cov=full_cov, fignum=fignum, ax=ax)
if self.X.shape[1] == 2:
if self.has_uncertain_inputs:
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
Zu = self.Z * self._Xscale + self._Xoffset
Zu = Zu[Zu[:,1]==output,0:1]
ax.plot(Zu[:,0], np.zeros_like(Zu[:,0]) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
elif self.X.shape[1] == 2:
Zu = self.Z * self._Xscale + self._Xoffset
Zu = Zu[Zu[:,1]==output,0:2]
ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def plot_single_output(self, output=None, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None):
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if fignum is None and ax is None:
fignum = fig.num
if which_data is 'all':
which_data = slice(None)
GPBase.plot_single_output(self, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, levels=20, fignum=fignum, ax=ax, output=output)
if self.X.shape[1] == 2:
if self.has_uncertain_inputs:
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
Zu = self.Z * self._Xscale + self._Xoffset
Zu = Zu[Zu[:,1]==output,0:1]
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
elif self.X.shape[1] == 3:
Zu = self.Z * self._Xscale + self._Xoffset
Zu = Zu[Zu[:,1]==output,0:1]
ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"

View file

@ -18,30 +18,16 @@ class SVIGP(GPBase):
Stochastic Variational inference in a Gaussian Process
:param X: inputs
:type X: np.ndarray (N x Q)
:type X: np.ndarray (num_data x num_inputs)
:param Y: observed data
:type Y: np.ndarray of observations (N x D)
:param batchsize: the size of a h
Additional kwargs are used as for a sparse GP. They include:
:type Y: np.ndarray of observations (num_data x output_dim)
:param batchsize: the size of a minibatch
:param q_u: canonical parameters of the distribution squasehd into a 1D array
:type q_u: np.ndarray
:param M: Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param kernel: the kernel/covariance function. See link kernels
:type kernel: a GPy kernel
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
:type X_uncertainty: np.ndarray (N x Q) | None
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
:param M: Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param beta: noise precision. TODO: ignore beta if doing EP
:type beta: float
:param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
:param Z: inducing inputs
:type Z: np.ndarray (num_inducing x num_inputs)
"""

View file

@ -57,8 +57,8 @@ def coregionalization_toy(max_iters=100):
m.optimize(max_iters=max_iters)
fig, axes = pb.subplots(2,1)
m.plot_single_output(output=0,ax=axes[0])
m.plot_single_output(output=1,ax=axes[1])
m.plot(fixed_inputs=[(1,0)],ax=axes[0])
m.plot(fixed_inputs=[(1,1)],ax=axes[1])
axes[0].set_title('Output 0')
axes[1].set_title('Output 1')
return m

View file

@ -4,4 +4,4 @@
# Enable openmp support. This speeds up some computations, depending on the number
# of cores available. Setting up a compiler with openmp support can be difficult on
# some platforms, hence this option.
openmp=True
openmp=False

View file

@ -49,18 +49,6 @@ class BayesianGPLVM(SparseGP, GPLVM):
SparseGP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs)
self.ensure_default_constraints()
def getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return SparseGP.getstate(self) + [self.init]
def setstate(self, state):
self._const_jitter = None
self.init = state.pop()
SparseGP.setstate(self, state)
def _get_param_names(self):
X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
@ -285,6 +273,19 @@ class BayesianGPLVM(SparseGP, GPLVM):
fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
return fig
def getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return SparseGP.getstate(self) + [self.init]
def setstate(self, state):
self._const_jitter = None
self.init = state.pop()
SparseGP.setstate(self, state)
def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
"""
objective function for fitting the latent variables for test points

View file

@ -7,7 +7,7 @@ import pylab as pb
import sys, pdb
from ..core import GP
from ..models import GPLVM
from ..mappings import *
from ..mappings import Kernel
class BCGPLVM(GPLVM):

View file

@ -44,12 +44,6 @@ class GPLVM(GP):
Xr[:PC.shape[0], :PC.shape[1]] = PC
return Xr
def getstate(self):
return GP.getstate(self)
def setstate(self, state):
GP.setstate(self, state)
def _get_param_names(self):
return sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) + GP._get_param_names(self)
@ -68,7 +62,7 @@ class GPLVM(GP):
def jacobian(self,X):
target = np.zeros((X.shape[0],X.shape[1],self.output_dim))
for i in range(self.output_dim):
target[:,:,i]=self.kern.dK_dX(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X)
target[:,:,i] = self.kern.dK_dX(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X)
return target
def magnification(self,X):
@ -91,3 +85,11 @@ class GPLVM(GP):
def plot_magnification(self, *args, **kwargs):
return util.plot_latent.plot_magnification(self, *args, **kwargs)
def getstate(self):
return GP.getstate(self)
def setstate(self, state):
GP.setstate(self, state)

View file

@ -81,29 +81,6 @@ class MRD(Model):
Model.__init__(self)
self.ensure_default_constraints()
def getstate(self):
return Model.getstate(self) + [self.names,
self.bgplvms,
self.gref,
self.nparams,
self.input_dim,
self.num_inducing,
self.num_data,
self.NQ,
self.MQ]
def setstate(self, state):
self.MQ = state.pop()
self.NQ = state.pop()
self.num_data = state.pop()
self.num_inducing = state.pop()
self.input_dim = state.pop()
self.nparams = state.pop()
self.gref = state.pop()
self.bgplvms = state.pop()
self.names = state.pop()
Model.setstate(self, state)
@property
def X(self):
return self.gref.X
@ -371,4 +348,28 @@ class MRD(Model):
pylab.draw()
fig.tight_layout()
def getstate(self):
return Model.getstate(self) + [self.names,
self.bgplvms,
self.gref,
self.nparams,
self.input_dim,
self.num_inducing,
self.num_data,
self.NQ,
self.MQ]
def setstate(self, state):
self.MQ = state.pop()
self.NQ = state.pop()
self.num_data = state.pop()
self.num_inducing = state.pop()
self.input_dim = state.pop()
self.nparams = state.pop()
self.gref = state.pop()
self.bgplvms = state.pop()
self.names = state.pop()
Model.setstate(self, state)