From 8e437e4bda653cc2dabd511a2be49b9e14c7848b Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 2 Sep 2015 19:40:11 +0100 Subject: [PATCH] New function to plot just the errorbars of the training data --- GPy/core/gp.py | 86 ++++++++++++- GPy/plotting/matplot_dep/base_plots.py | 21 +++ GPy/plotting/matplot_dep/models_plots.py | 157 ++++++++++++++++++++++- 3 files changed, 256 insertions(+), 8 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index a87ac66e..1f5bc136 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -407,8 +407,8 @@ class GP(Model): def plot(self, plot_limits=None, which_data_rows='all', which_data_ycols='all', fixed_inputs=[], levels=20, samples=0, fignum=None, ax=None, resolution=None, - plot_raw=False, - linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx', predict_kw=None): + plot_raw=False, linecol=None,fillcol=None, Y_metadata=None, + data_symbol='kx', predict_kw=None, plot_training_data=True): """ Plot the posterior of the GP. - In one dimension, the function is plotted with a shaded region identifying two standard deviations. @@ -445,6 +445,8 @@ class GP(Model): :type Y_metadata: dict :param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx') :type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib. + :param plot_training_data: whether or not to plot the training points + :type plot_training_data: boolean """ assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ..plotting.matplot_dep import models_plots @@ -457,7 +459,85 @@ class GP(Model): which_data_ycols, fixed_inputs, levels, samples, fignum, ax, resolution, plot_raw=plot_raw, Y_metadata=Y_metadata, - data_symbol=data_symbol, predict_kw=predict_kw, **kw) + data_symbol=data_symbol, predict_kw=predict_kw, + plot_training_data=plot_training_data, **kw) + + + def plot_data(self, which_data_rows='all', + which_data_ycols='all', visible_dims=None, + fignum=None, ax=None, data_symbol='kx'): + """ + Plot the training data + - For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed. + + Can plot only part of the data + using which_data_rows and which_data_ycols. + + :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits + :type plot_limits: np.array + :param which_data_rows: which of the training data to plot (default all) + :type which_data_rows: 'all' or a slice object to slice model.X, model.Y + :param which_data_ycols: when the data has several columns (independant outputs), only plot these + :type which_data_ycols: 'all' or a list of integers + :param visible_dims: an array specifying the input dimensions to plot (maximum two) + :type visible_dims: a numpy array + :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D + :type resolution: int + :param levels: number of levels to plot in a contour plot. + :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure + :type levels: int + :param samples: the number of a posteriori samples to plot + :type samples: int + :param fignum: figure to plot on. + :type fignum: figure number + :param ax: axes to plot on. + :type ax: axes handle + :param linecol: color of line to plot [Tango.colorsHex['darkBlue']] + :type linecol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib + :param fillcol: color of fill [Tango.colorsHex['lightBlue']] + :type fillcol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib + :param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx') + :type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib. + """ + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ..plotting.matplot_dep import models_plots + kw = {} + return models_plots.plot_data(self, which_data_rows, + which_data_ycols, visible_dims, + fignum, ax, data_symbol, **kw) + + + def plot_fit_errorbars(self, which_data_rows='all', + which_data_ycols='all', fixed_inputs=[], fignum=None, ax=None, + linecol=None, data_symbol='kx', predict_kw=None, plot_training_data=True): + + """ + Plot the posterior error bars corresponding to the training data + - For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed. + + Can plot only part of the data + using which_data_rows and which_data_ycols. + + :param which_data_rows: which of the training data to plot (default all) + :type which_data_rows: 'all' or a slice object to slice model.X, model.Y + :param which_data_ycols: when the data has several columns (independant outputs), only plot these + :type which_data_rows: 'all' or a list of integers + :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. + :type fixed_inputs: a list of tuples + :param fignum: figure to plot on. + :type fignum: figure number + :param ax: axes to plot on. + :type ax: axes handle + :param plot_training_data: whether or not to plot the training points + :type plot_training_data: boolean + """ + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ..plotting.matplot_dep import models_plots + kw = {} + return models_plots.plot_fit_errorbars(self, which_data_rows, which_data_ycols, fixed_inputs, + fignum, ax, linecol, data_symbol, + predict_kw, plot_training_data, **kw) + def input_sensitivity(self, summarize=True): """ diff --git a/GPy/plotting/matplot_dep/base_plots.py b/GPy/plotting/matplot_dep/base_plots.py index dade87cf..80f114b1 100644 --- a/GPy/plotting/matplot_dep/base_plots.py +++ b/GPy/plotting/matplot_dep/base_plots.py @@ -47,6 +47,27 @@ def gpplot(x, mu, lower, upper, edgecol='#3300FF', fillcol='#33CCFF', ax=None, f return plots +def gperrors(x, mu, lower, upper, edgecol=None, ax=None, fignum=None, **kwargs): + _, axes = ax_default(fignum, ax) + + mu = mu.flatten() + x = x.flatten() + lower = lower.flatten() + upper = upper.flatten() + + plots = [] + + if edgecol is None: + edgecol='#3300FF' + + if not 'alpha' in kwargs.keys(): + kwargs['alpha'] = 0.3 + + plots.append(axes.errorbar(x,mu,yerr=np.vstack([mu-lower,upper-mu]),color=edgecol,**kwargs)) + plots[-1][0].remove() + return plots + + def removeRightTicks(ax=None): ax = ax or pb.gca() for i, line in enumerate(ax.get_yticklines()): diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index d0f6c952..e1ba327e 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -3,19 +3,79 @@ import numpy as np from . import Tango -from base_plots import gpplot, x_frame1D, x_frame2D +from base_plots import gpplot, x_frame1D, x_frame2D,gperrors from ...models.gp_coregionalized_regression import GPCoregionalizedRegression from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression from scipy import sparse from ...core.parameterization.variational import VariationalPosterior from matplotlib import pyplot as plt + +def plot_data(model, which_data_rows='all', + which_data_ycols='all', visible_dims=None, + fignum=None, ax=None, data_symbol='kx',mew=1.5): + """ + Plot the training data + - For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed. + + Can plot only part of the data + using which_data_rows and which_data_ycols. + + :param which_data_rows: which of the training data to plot (default all) + :type which_data_rows: 'all' or a slice object to slice model.X, model.Y + :param which_data_ycols: when the data has several columns (independant outputs), only plot these + :type which_data_rows: 'all' or a list of integers + :param visible_dims: an array specifying the input dimensions to plot (maximum two) + :type visible_dims: a numpy array + :param fignum: figure to plot on. + :type fignum: figure number + :param ax: axes to plot on. + :type ax: axes handle + """ + #deal with optional arguments + if which_data_rows == 'all': + which_data_rows = slice(None) + if which_data_ycols == 'all': + which_data_ycols = np.arange(model.output_dim) + + if ax is None: + fig = plt.figure(num=fignum) + ax = fig.add_subplot(111) + + #data + X = model.X + Y = model.Y + + #work out what the inputs are for plotting (1D or 2D) + if visible_dims is None: + visible_dims = np.arange(model.input_dim) + assert visible_dims.size <= 2, "Visible inputs cannot be larger than two" + free_dims = visible_dims + plots = {} + #one dimensional plotting + if len(free_dims) == 1: + + for d in which_data_ycols: + plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=mew) + + #2D plotting + elif len(free_dims) == 2: + + for d in which_data_ycols: + plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, + Y[which_data_rows, d], cmap=plt.cm.jet, vmin=Y.min(), vmax=Y.max(), linewidth=0.) + + else: + raise NotImplementedError("Cannot define a frame with more than two input dimensions") + return plots + + def plot_fit(model, plot_limits=None, which_data_rows='all', which_data_ycols='all', fixed_inputs=[], levels=20, samples=0, fignum=None, ax=None, resolution=None, plot_raw=False, linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx', - apply_link=False, samples_f=0, plot_uncertain_inputs=True, predict_kw=None): + apply_link=False, samples_f=0, plot_uncertain_inputs=True, predict_kw=None, plot_training_data=True): """ Plot the posterior of the GP. - In one dimension, the function is plotted with a shaded region identifying two standard deviations. @@ -43,7 +103,6 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', :type fignum: figure number :param ax: axes to plot on. :type ax: axes handle - :type output: integer (first output is 0) :param linecol: color of line to plot. :type linecol: :param fillcol: color of fill @@ -52,6 +111,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', :type apply_link: boolean :param samples_f: the number of posteriori f samples to plot p(f*|y) :type samples_f: int + :param plot_training_data: whether or not to plot the training points + :type plot_training_data: boolean """ #deal with optional arguments if which_data_rows == 'all': @@ -116,7 +177,11 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', for d in which_data_ycols: plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol) - if not plot_raw: plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5) + #if not plot_raw: plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5) + if not plot_raw and plot_training_data: + plots['dataplot'] = plot_data(model=model, which_data_rows=which_data_rows, + visible_dims=free_dims, data_symbol=data_symbol, mew=1.5, ax=ax, fignum=fignum) + #optionally plot some samples if samples: #NOTE not tested with fixed_inputs @@ -196,7 +261,9 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', for d in which_data_ycols: m_d = m[:,d].reshape(resolution, resolution).T plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=plt.cm.jet) - if not plot_raw: plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=plt.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) + #if not plot_raw: plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=plt.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) + if not plot_raw and plot_training_data: + plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=plt.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) #set the limits of the plot to some sensible values ax.set_xlim(xmin[0], xmax[0]) @@ -261,3 +328,83 @@ def fixed_inputs(model, non_fixed_inputs, fix_routine='median', as_list=True, X_ return f_inputs else: return X + + +def plot_fit_errorbars(model, which_data_rows='all', + which_data_ycols='all', fixed_inputs=[], + fignum=None, ax=None, + linecol='red', data_symbol='kx', + predict_kw=None, plot_training_data=True): + + """ + Plot the posterior error bars corresponding to the training data + - For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed. + + Can plot only part of the data + using which_data_rows and which_data_ycols. + + :param which_data_rows: which of the training data to plot (default all) + :type which_data_rows: 'all' or a slice object to slice model.X, model.Y + :param which_data_ycols: when the data has several columns (independant outputs), only plot these + :type which_data_rows: 'all' or a list of integers + :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. + :type fixed_inputs: a list of tuples + :param fignum: figure to plot on. + :type fignum: figure number + :param ax: axes to plot on. + :type ax: axes handle + :param plot_training_data: whether or not to plot the training points + :type plot_training_data: boolean + """ + + #deal with optional arguments + if which_data_rows == 'all': + which_data_rows = slice(None) + if which_data_ycols == 'all': + which_data_ycols = np.arange(model.output_dim) + + if ax is None: + fig = plt.figure(num=fignum) + ax = fig.add_subplot(111) + + X = model.X + Y = model.Y + + if predict_kw is None: + predict_kw = {} + + + #work out what the inputs are for plotting (1D or 2D) + fixed_dims = np.array([i for i,v in fixed_inputs]) + free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims) + plots = {} + + #one dimensional plotting + if len(free_dims) == 1: + + m, v = model.predict(X, full_cov=False, Y_metadata=model.Y_metadata, **predict_kw) + fmu, fv = model._raw_predict(X, full_cov=False, **predict_kw) + lower, upper = model.likelihood.predictive_quantiles(fmu, fv, (2.5, 97.5), Y_metadata=model.Y_metadata) + + + for d in which_data_ycols: + plots['gperrors'] = gperrors(X, m[:, d], lower[:, d], upper[:, d], edgecol=linecol, ax=ax, fignum=fignum ) + if plot_training_data: + plots['dataplot'] = plot_data(model=model, which_data_rows=which_data_rows, + visible_dims=free_dims, data_symbol=data_symbol, mew=1.5, ax=ax, fignum=fignum) + + + #set the limits of the plot to some sensible values + ymin, ymax = min(np.append(Y[which_data_rows, which_data_ycols].flatten(), lower)), max(np.append(Y[which_data_rows, which_data_ycols].flatten(), upper)) + ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) + ax.set_xlim(X[:,free_dims].min(), X[:,free_dims].max()) + ax.set_ylim(ymin, ymax) + + + elif len(free_dims) == 2: + raise NotImplementedError("Not implemented yet") + + + else: + raise NotImplementedError("Cannot define a frame with more than two input dimensions") + return plots