GPy/GPy/plotting/matplot_dep/models_plots.py

507 lines
23 KiB
Python

# Copyright (c) 2012-2015, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from . import Tango
from .base_plots import gpplot, x_frame1D, x_frame2D,gperrors
from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
from scipy import sparse
from ...core.parameterization.variational import VariationalPosterior
from matplotlib import pyplot as plt
from .base_plots import gradient_fill
from functools import wraps
def plot_data(self, which_data_rows='all',
which_data_ycols='all', visible_dims=None,
fignum=None, ax=None, data_symbol='kx',mew=1.5,**kwargs):
"""
Plot the training data
- For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed.
Can plot only part of the data
using which_data_rows and which_data_ycols.
:param which_data_rows: which of the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice self.X, self.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_rows: 'all' or a list of integers
:param visible_dims: an array specifying the input dimensions to plot (maximum two)
:type visible_dims: a numpy array
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
"""
#deal with optional arguments
if which_data_rows == 'all':
which_data_rows = slice(None)
if which_data_ycols == 'all':
which_data_ycols = np.arange(self.output_dim)
if ax is None:
fig = plt.figure(num=fignum)
ax = fig.add_subplot(111)
if hasattr(self, 'has_uncertain_inputs') and self.has_uncertain_inputs():
X = self.X.mean
X_variance = self.X.variance
else:
X = self.X
X_variance = None
Y = self.Y
#work out what the inputs are for plotting (1D or 2D)
if visible_dims is None:
visible_dims = np.arange(self.input_dim)
assert visible_dims.size <= 2, "Visible inputs cannot be larger than two"
free_dims = visible_dims
plots = {}
#one dimensional plotting
if len(free_dims) == 1:
plots['dataplot'] = []
if X_variance is not None: plots['xerrorbar'] = []
for d in which_data_ycols:
plots['dataplot'].append(ax.plot(X[which_data_rows, free_dims], Y[which_data_rows, d], data_symbol, mew=mew))
if X_variance is not None:
plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, d].flatten(),
xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
ecolor='k', fmt='none', elinewidth=.5, alpha=.5)
#2D plotting
elif len(free_dims) == 2:
for d in which_data_ycols:
plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40,
Y[which_data_rows, d], cmap=plt.cm.jet, vmin=Y.min(), vmax=Y.max(), linewidth=0.)
else:
raise NotImplementedError("Cannot define a frame with more than two input dimensions")
return plots
def plot_fit(self, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=False,
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx',
apply_link=False, samples_y=0, plot_uncertain_inputs=True, predict_kw=None, plot_training_data=True):
"""
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed.
Can plot only part of the data and part of the posterior functions
using which_data_rowsm which_data_ycols.
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array
:param which_data_rows: which of the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice self.X, self.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_rows: 'all' or a list of integers
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
:type levels: int
:param samples: the number of a posteriori samples to plot p(f*|y)
:type samples: int
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param plot_raw: Whether to plot the raw function p(f|y)
:type plot_raw: boolean
:param linecol: color of line to plot.
:type linecol: hex or color
:param fillcol: color of fill
:type fillcol: hex or color
:param apply_link: apply the link function if plotting f (default false), as well as posterior samples if requested
:type apply_link: boolean
:param samples_y: the number of posteriori f samples to plot p(y*|y)
:type samples_y: int
:param plot_uncertain_inputs: plot the uncertainty of the inputs as error bars if they have uncertainty (BGPLVM etc.)
:type plot_uncertain_inputs: boolean
:param predict_kw: keyword args for _raw_predict and predict functions if required
:type predict_kw: dict
:param plot_training_data: whether or not to plot the training points
:type plot_training_data: boolean
"""
#deal with optional arguments
if which_data_rows == 'all':
which_data_rows = slice(None)
if which_data_ycols == 'all':
which_data_ycols = np.arange(self.output_dim)
#if len(which_data_ycols)==0:
#raise ValueError('No data selected for plotting')
if ax is None:
fig = plt.figure(num=fignum)
ax = fig.add_subplot(111)
if hasattr(self, 'has_uncertain_inputs') and self.has_uncertain_inputs():
X = self.X.mean
X_variance = self.X.variance
else:
X = self.X
Y = self.Y
if sparse.issparse(Y): Y = Y.todense().view(np.ndarray)
if hasattr(self, 'Z'): Z = self.Z
if predict_kw is None:
predict_kw = {}
#work out what the inputs are for plotting (1D or 2D)
fixed_dims = np.array([i for i,v in fixed_inputs])
free_dims = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
plots = {}
#one dimensional plotting
if len(free_dims) == 1:
#define the frame on which to plot
Xnew, xmin, xmax = x_frame1D(X[:,free_dims], plot_limits=plot_limits, resolution=resolution or 200)
Xgrid = np.empty((Xnew.shape[0],self.input_dim))
Xgrid[:,free_dims] = Xnew
for i,v in fixed_inputs:
Xgrid[:,i] = v
#make a prediction on the frame and plot it
if plot_raw:
m, v = self._raw_predict(Xgrid, **predict_kw)
if apply_link:
lower = self.likelihood.gp_link.transf(m - 2*np.sqrt(v))
upper = self.likelihood.gp_link.transf(m + 2*np.sqrt(v))
#Once transformed this is now the median of the function
m = self.likelihood.gp_link.transf(m)
else:
lower = m - 2*np.sqrt(v)
upper = m + 2*np.sqrt(v)
else:
if isinstance(self,GPCoregionalizedRegression) or isinstance(self,SparseGPCoregionalizedRegression):
extra_data = Xgrid[:,-1:].astype(np.int)
if Y_metadata is None:
Y_metadata = {'output_index': extra_data}
else:
Y_metadata['output_index'] = extra_data
m, v = self.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw)
fmu, fv = self._raw_predict(Xgrid, full_cov=False, **predict_kw)
lower, upper = self.likelihood.predictive_quantiles(fmu, fv, (2.5, 97.5), Y_metadata=Y_metadata)
for d in which_data_ycols:
plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol)
#if not plot_raw: plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5)
if not plot_raw and plot_training_data:
plots['dataplot'] = plot_data(self=self, which_data_rows=which_data_rows,
visible_dims=free_dims, data_symbol=data_symbol, mew=1.5, ax=ax, fignum=fignum)
#optionally plot some samples
if samples: #NOTE not tested with fixed_inputs
Fsim = self.posterior_samples_f(Xgrid, samples)
if apply_link:
Fsim = self.likelihood.gp_link.transf(Fsim)
for fi in Fsim.T:
plots['posterior_samples'] = ax.plot(Xnew, fi[:,None], '#3300FF', linewidth=0.25)
#ax.plot(Xnew, fi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
if samples_y: #NOTE not tested with fixed_inputs
Ysim = self.posterior_samples(Xgrid, samples_y, Y_metadata=Y_metadata)
for yi in Ysim.T:
plots['posterior_samples_y'] = ax.scatter(Xnew, yi[:,None], s=5, c=Tango.colorsHex['darkBlue'], marker='o', alpha=0.5)
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
#add error bars for uncertain (if input uncertainty is being modelled)
if hasattr(self,"has_uncertain_inputs") and self.has_uncertain_inputs() and plot_uncertain_inputs:
if plot_raw:
#add error bars for uncertain (if input uncertainty is being modelled), for plot_f
#Hack to plot error bars on latent function, rather than on the data
vs = self.X.mean.values.copy()
for i,v in fixed_inputs:
vs[:,i] = v
m_X, _ = self._raw_predict(vs)
if apply_link:
m_X = self.likelihood.gp_link.transf(m_X)
plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), m_X[which_data_rows, which_data_ycols].flatten(),
xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
#set the limits of the plot to some sensible values
try:
ymin, ymax = min(np.append(Y[which_data_rows, which_data_ycols].flatten(), lower)), max(np.append(Y[which_data_rows, which_data_ycols].flatten(), upper))
if ymin != ymax:
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
except:
# do nothing
# No training data on model
pass
#add inducing inputs (if a sparse model is used)
if hasattr(self,"Z"):
#Zu = self.Z[:,free_dims] * self._Xscale[:,free_dims] + self._Xoffset[:,free_dims]
if isinstance(self,SparseGPCoregionalizedRegression):
Z = Z[Z[:,-1] == Y_metadata['output_index'],:]
Zu = Z[:,free_dims]
z_height = ax.get_ylim()[0]
plots['inducing_inputs'] = ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12)
#2D plotting
elif len(free_dims) == 2:
#define the frame for plotting on
resolution = resolution or 50
Xnew, x, y, xmin, xmax = x_frame2D(X[:,free_dims], plot_limits, resolution)
Xgrid = np.empty((Xnew.shape[0],self.input_dim))
Xgrid[:,free_dims] = Xnew
for i,v in fixed_inputs:
Xgrid[:,i] = v
#x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
#predict on the frame and plot
if plot_raw:
m, _ = self._raw_predict(Xgrid, **predict_kw)
else:
if isinstance(self,GPCoregionalizedRegression) or isinstance(self,SparseGPCoregionalizedRegression):
extra_data = Xgrid[:,-1:].astype(np.int)
if Y_metadata is None:
Y_metadata = {'output_index': extra_data}
else:
Y_metadata['output_index'] = extra_data
m, v = self.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw)
for d in which_data_ycols:
m_d = m[:,d].reshape(resolution, resolution).T
plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=plt.cm.jet)
#if not plot_raw: plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=plt.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
if not plot_raw and plot_training_data:
plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=plt.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
#set the limits of the plot to some sensible values
ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1])
if samples:
warnings.warn("Samples are rather difficult to plot for 2D inputs...")
#add inducing inputs (if a sparse self is used)
if hasattr(self,"Z"):
#Zu = self.Z[:,free_dims] * self._Xscale[:,free_dims] + self._Xoffset[:,free_dims]
Zu = Z[:,free_dims]
plots['inducing_inputs'] = ax.plot(Zu[:,0], Zu[:,1], 'wo')
else:
raise NotImplementedError("Cannot define a frame with more than two input dimensions")
return plots
def plot_density(self, levels=20, plot_limits=None,
fixed_inputs=[], plot_raw=False, edgecolor='none', facecolor='#3465a4',
predict_kw=None,Y_metadata=None,
apply_link=False, resolution=200, **patch_kwargs):
"""
Plot the posterior density of the GP.
- In one dimension, the function is plotted with a shaded gradient, visualizing the density of the posterior.
- Only implemented for one dimension, for higher dimensions use `plot`.
:param levels: number of levels to plot in the density plot. This is a number between 1 and 100. 1 corresponds to the normal plot_fit.
:type levels: int
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param edgecolor: color of line to plot [Tango.colorsHex['darkBlue']]
:type edgecolor: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
:param facecolor: color of fill [Tango.colorsHex['lightBlue']]
:type facecolor: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
:param Y_metadata: additional data associated with Y which may be needed
:type Y_metadata: dict
:param apply_link: if there is a link function of the likelihood, plot the link(f*) rather than f*, when plotting posterior samples f
:type apply_link: boolean
:param resolution: resolution of interpolation (how many points to interpolate of the posterior).
:type resolution: int
:param: patch_kw: the keyword arguments for the patchcollection fill.
"""
#deal with optional arguments
if hasattr(self, 'has_uncertain_inputs') and self.has_uncertain_inputs():
X = self.X.mean
else:
X = self.X
Y = self.Y
if sparse.issparse(Y): Y = Y.todense().view(np.ndarray)
if predict_kw is None:
predict_kw = {}
#work out what the inputs are for plotting (1D or 2D)
fixed_dims = np.array([i for i,v in fixed_inputs])
free_dims = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
plots = {}
#one dimensional plotting
if len(free_dims) == 1:
#define the frame on which to plot
Xnew, xmin, xmax = x_frame1D(X[:,free_dims], plot_limits=plot_limits, resolution=resolution)
Xgrid = np.empty((Xnew.shape[0],self.input_dim))
Xgrid[:,free_dims] = Xnew
for i,v in fixed_inputs:
Xgrid[:,i] = v
percs = np.linspace(2.5, 97.5, levels*2)
#make a prediction on the frame and plot it
if plot_raw:
from scipy import stats
from ...likelihoods import Gaussian
lik = Gaussian(variance=0)
else:
if isinstance(self,GPCoregionalizedRegression) or isinstance(self,SparseGPCoregionalizedRegression):
extra_data = Xgrid[:,-1:].astype(np.int)
if Y_metadata is None:
Y_metadata = {'output_index': extra_data}
else:
Y_metadata['output_index'] = extra_data
lik = None
percentiles = [i[:, 0] for i in self.predict_quantiles(Xgrid, percs, Y_metadata=Y_metadata, likelihood=lik, **predict_kw)]
if apply_link:
percentiles = self.likelihood.gp_link.transf(percentiles)
patch_kwargs['facecolor'] = facecolor
patch_kwargs['edgecolor'] = edgecolor
plots['density'] = gradient_fill(Xgrid[:, 0], percentiles, **patch_kwargs)
else:
raise NotImplementedError('Only 1D density plottable.')
return plots
@wraps(plot_fit)
def plot_fit_f(self, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=True,
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx',
apply_link=False, samples_y=0, plot_uncertain_inputs=True, predict_kw=None, plot_training_data=True):
return plot_fit(self, plot_limits, which_data_rows, which_data_ycols, fixed_inputs, levels, samples, fignum, ax, resolution, plot_raw, linecol, fillcol, Y_metadata, data_symbol, apply_link, samples_y, plot_uncertain_inputs, predict_kw, plot_training_data)
def fixed_inputs(model, non_fixed_inputs, fix_routine='median', as_list=True, X_all=False):
"""
Convenience function for returning back fixed_inputs where the other inputs
are fixed using fix_routine
:param model: model
:type model: Model
:param non_fixed_inputs: dimensions of non fixed inputs
:type non_fixed_inputs: list
:param fix_routine: fixing routine to use, 'mean', 'median', 'zero'
:type fix_routine: string
:param as_list: if true, will return a list of tuples with (dimension, fixed_val) otherwise it will create the corresponding X matrix
:type as_list: boolean
"""
f_inputs = []
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
X = model.X.mean.values.copy()
elif isinstance(model.X, VariationalPosterior):
X = model.X.values.copy()
else:
if X_all:
X = model.X_all.copy()
else:
X = model.X.copy()
for i in range(X.shape[1]):
if i not in non_fixed_inputs:
if fix_routine == 'mean':
f_inputs.append( (i, np.mean(X[:,i])) )
if fix_routine == 'median':
f_inputs.append( (i, np.median(X[:,i])) )
else: # set to zero zero
f_inputs.append( (i, 0) )
if not as_list:
X[:,i] = f_inputs[-1][1]
if as_list:
return f_inputs
else:
return X
def plot_errorbars_trainset(model, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[],
fignum=None, ax=None,
linecol='red', data_symbol='kx',
predict_kw=None, plot_training_data=True, **kwargs):
"""
Plot the posterior error bars corresponding to the training data
- For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed.
Can plot only part of the data
using which_data_rows and which_data_ycols.
:param which_data_rows: which of the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_rows: 'all' or a list of integers
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:param plot_training_data: whether or not to plot the training points
:type plot_training_data: boolean
"""
#deal with optional arguments
if which_data_rows == 'all':
which_data_rows = slice(None)
if which_data_ycols == 'all':
which_data_ycols = np.arange(model.output_dim)
if ax is None:
fig = plt.figure(num=fignum)
ax = fig.add_subplot(111)
X = model.X
Y = model.Y
if predict_kw is None:
predict_kw = {}
#work out what the inputs are for plotting (1D or 2D)
fixed_dims = np.array([i for i,v in fixed_inputs])
free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims)
plots = {}
#one dimensional plotting
if len(free_dims) == 1:
m, v = model.predict(X, full_cov=False, Y_metadata=model.Y_metadata, **predict_kw)
fmu, fv = model._raw_predict(X, full_cov=False, **predict_kw)
lower, upper = model.likelihood.predictive_quantiles(fmu, fv, (2.5, 97.5), Y_metadata=model.Y_metadata)
for d in which_data_ycols:
plots['gperrors'] = gperrors(X, m[:, d], lower[:, d], upper[:, d], edgecol=linecol, ax=ax, fignum=fignum, **kwargs )
if plot_training_data:
plots['dataplot'] = plot_data(self=model, which_data_rows=which_data_rows,
visible_dims=free_dims, data_symbol=data_symbol, mew=1.5, ax=ax, fignum=fignum)
#set the limits of the plot to some sensible values
ymin, ymax = min(np.append(Y[which_data_rows, which_data_ycols].flatten(), lower)), max(np.append(Y[which_data_rows, which_data_ycols].flatten(), upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(X[:,free_dims].min(), X[:,free_dims].max())
ax.set_ylim(ymin, ymax)
elif len(free_dims) == 2:
raise NotImplementedError("Not implemented yet")
else:
raise NotImplementedError("Cannot define a frame with more than two input dimensions")
return plots