docstrings and removal of duplicated plotting code in gp_base

This commit is contained in:
James Hensman 2013-10-24 21:30:23 +01:00
parent 33b6a7d24f
commit bddb22f4af

View file

@ -9,7 +9,9 @@ from ..likelihoods import Gaussian, Gaussian_Mixed_Noise
class GPBase(Model):
"""
Gaussian process base model for holding shared behaviour between
sparse_GP and GP models.
sparse_GP and GP models, and potentially other models in the future.
Here we define some functions that are use
"""
def __init__(self, X, likelihood, kernel, normalize_X=False):
self.X = X
@ -34,29 +36,6 @@ class GPBase(Model):
# All leaf nodes should call self._set_params(self._get_params()) at
# the end
def getstate(self):
"""
Get the current state of the class, here we return everything that is needed to recompute the model.
"""
return Model.getstate(self) + [self.X,
self.num_data,
self.input_dim,
self.kern,
self.likelihood,
self.output_dim,
self._Xoffset,
self._Xscale]
def setstate(self, state):
self._Xscale = state.pop()
self._Xoffset = state.pop()
self.output_dim = state.pop()
self.likelihood = state.pop()
self.kern = state.pop()
self.input_dim = state.pop()
self.num_data = state.pop()
self.X = state.pop()
Model.setstate(self, state)
def posterior_samples_f(self,X,size=10,which_parts='all',full_cov=True):
"""
@ -269,152 +248,32 @@ class GPBase(Model):
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def plot_single_output_f(self, output=None, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
def getstate(self):
"""
For a specific output, in a multioutput model, this function works just as plot_f on single output models.
:param output: which output to plot (for multiple output models only)
:type output: integer (first output is 0)
:param samples: the number of a posteriori samples to plot
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param full_cov:
:type full_cov: bool
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
Get the curent state of the class. This is only used to efficiently
pickle the model. See also self.setstate
"""
assert output is not None, "An output must be specified."
assert len(self.likelihood.noise_model_list) > output, "The model has only %s outputs." %(self.output_dim + 1)
return Model.getstate(self) + [self.X,
self.num_data,
self.input_dim,
self.kern,
self.likelihood,
self.output_dim,
self._Xoffset,
self._Xscale]
if which_data == 'all':
which_data = slice(None)
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if self.X.shape[1] == 2:
Xu = self.X[self.X[:,-1]==output ,0:1]
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
Xnew_indexed = self._add_output_index(Xnew,output)
m, v = self._raw_predict(Xnew_indexed, which_parts=which_parts)
if samples:
Ysim = self.posterior_samples_f(Xnew_indexed, samples, which_parts=which_parts, full_cov=True)
for yi in Ysim.T:
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5)
ax.set_xlim(xmin, xmax)
ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 3:
raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet"
#if samples:
# warnings.warn("Samples only implemented for 1 dimensional inputs.")
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def plot_single_output(self, output=None, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
def setstate(self, state):
"""
For a specific output, in a multioutput model, this function works just as plot_f on single output models.
:param output: which output to plot (for multiple output models only)
:type output: integer (first output is 0)
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param levels: number of levels to plot in a contour plot.
:type levels: int
:param samples: the number of a posteriori samples to plot
:type samples: int
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:type output: integer (first output is 0)
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param linecol: color of line to plot.
:type linecol:
:param fillcol: color of fill
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
Set the state of the model. Used for efficient pickling
"""
assert output is not None, "An output must be specified."
assert len(self.likelihood.noise_model_list) > output, "The model has only %s outputs." %(self.output_dim + 1)
if which_data == 'all':
which_data = slice(None)
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if self.X.shape[1] == 2:
resolution = resolution or 200
Xu = self.X[self.X[:,-1]==output,:] #keep the output of interest
Xu = self.X * self._Xscale + self._Xoffset
Xu = self.X[self.X[:,-1]==output ,0:1] #get rid of the index column
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
Xnew_indexed = self._add_output_index(Xnew,output)
self._Xscale = state.pop()
self._Xoffset = state.pop()
self.output_dim = state.pop()
self.likelihood = state.pop()
self.kern = state.pop()
self.input_dim = state.pop()
self.num_data = state.pop()
self.X = state.pop()
Model.setstate(self, state)
m, v, lower, upper = self.predict(Xnew_indexed, which_parts=which_parts,noise_model=output)
if samples: #NOTE not tested with fixed_inputs
Ysim = self.posterior_samples(Xnew_indexed, samples, which_parts=which_parts, full_cov=True,noise_model=output)
for yi in Ysim.T:
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
for d in range(m.shape[1]):
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
ax.plot(Xu[which_data], self.likelihood.noise_model_list[output].data, 'kx', mew=1.5)
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 3:
raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet"
#if samples:
# warnings.warn("Samples only implemented for 1 dimensional inputs.")
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def _add_output_index(self,X,output):
"""
In a multioutput model, appends an index column to X to specify the output it is related to.
:param X: Input data
:type X: np.ndarray, N x self.input_dim
:param output: output X is related to
:type output: integer in {0,..., output_dim-1}
.. Note:: For multiple non-independent outputs models only.
"""
assert hasattr(self,'multioutput'), 'This function is for multiple output models only.'
index = np.ones((X.shape[0],1))*output
return np.hstack((X,index))