[plotting] getting there, plots to go: dim red, kern, mapping etc

2026-05-04 01:02:39 +02:00 · 2015-10-03 19:45:19 +01:00 · 2015-10-03 19:45:19 +01:00 · 57c4306d92
commit 57c4306d92
parent 7d5283314a
7 changed files with 315 additions and 86 deletions
--- a/GPy/plotting/gpy_plot/data_plots.py
+++ b/GPy/plotting/gpy_plot/data_plots.py
@ -32,7 +32,7 @@ from . import pl

 import numpy as np
 from .plot_util import get_x_y_var, get_free_dims, get_which_data_ycols,\
-    get_which_data_rows, update_not_existing_kwargs
+    get_which_data_rows, update_not_existing_kwargs, helper_predict_with_model

 def _plot_data(self, canvas, which_data_rows='all',
        which_data_ycols='all', visible_dims=None,
@ -65,6 +65,8 @@ def _plot_data(self, canvas, which_data_rows='all',
            update_not_existing_kwargs(plot_kwargs, pl.defaults.data_2d)
            plots['dataplot'].append(pl.scatter(canvas, X[rows, free_dims[0]], X[rows, free_dims[1]], 
                                           c=Y[rows, d], vmin=Y.min(), vmax=Y.max(), **plot_kwargs))
+    elif len(free_dims) == 0:
+        pass #Nothing to plot!
    else:
        raise NotImplementedError("Cannot plot in more then two dimensions")
    return plots
@ -93,3 +95,100 @@ def plot_data(self, which_data_rows='all',
    canvas, kwargs = pl.get_new_canvas(plot_kwargs)
    plots = _plot_data(self, canvas, which_data_rows, which_data_ycols, visible_dims, error_kwargs, **kwargs)
    return pl.show_canvas(canvas, plots)
+
+
+def plot_inducing(self, visible_dims=None, **plot_kwargs):
+    """
+    Plot the inducing inputs of a sparse gp model
+    
+    :param array-like visible_dims: an array specifying the input dimensions to plot (maximum two)
+    :param kwargs plot_kwargs: keyword arguments for the plotting library
+    """
+    canvas, kwargs = pl.get_new_canvas(plot_kwargs)
+    plots = _plot_inducing(self, canvas, visible_dims, **kwargs)
+    return pl.show_canvas(canvas, plots)
+
+def _plot_inducing(self, canvas, visible_dims, **plot_kwargs):
+    free_dims = get_free_dims(self, visible_dims, None)
+
+    Z = self.Z[:, free_dims]
+    plots = {}
+
+    #one dimensional plotting
+    if len(free_dims) == 1:
+        update_not_existing_kwargs(plot_kwargs, pl.defaults.inducing_1d)
+        plots['inducing'] = pl.plot_axis_lines(canvas, Z[:, free_dims], **plot_kwargs)
+    #2D plotting
+    elif len(free_dims) == 2:
+        update_not_existing_kwargs(plot_kwargs, pl.defaults.inducing_2d)
+        plots['inducing'] = pl.scatter(canvas, Z[:, free_dims[0]], Z[:, free_dims[1]], 
+                                       **plot_kwargs)
+    elif len(free_dims) == 0:
+        pass #Nothing to plot!
+    else:
+        raise NotImplementedError("Cannot plot in more then two dimensions")
+    return plots
+
+def plot_errorbars_trainset(self, which_data_rows='all',
+        which_data_ycols='all', fixed_inputs=None, 
+        plot_raw=False, apply_link=False,
+        predict_kw=None, **plot_kwargs):
+    """
+    Plot the errorbars of the GP likelihood on the training data.
+    These are the errorbars after the appropriate 
+    approximations according to the likelihood are done.
+    
+    This also works for heteroscedastic likelihoods.
+    
+    Give the Y_metadata in the predict_kw if you need it.
+    
+    :param which_data_rows: which of the training data to plot (default all)
+    :type which_data_rows: 'all' or a slice object to slice self.X, self.Y
+    :param which_data_ycols: when the data has several columns (independant outputs), only plot these
+    :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input dimension i should be set to value v.
+    :type fixed_inputs: a list of tuples
+    :param dict predict_kwargs: kwargs for the prediction used to predict the right quantiles.
+    :param kwargs plot_kwargs: kwargs for the data plot for the plotting library you are using
+    """
+    canvas, kwargs = pl.get_new_canvas(plot_kwargs)
+    plots = _plot_errorbars_trainset(self, canvas, which_data_rows, which_data_ycols, 
+                                     fixed_inputs, plot_raw, apply_link, predict_kw, **kwargs)
+    return pl.show_canvas(canvas, plots)
+
+def _plot_errorbars_trainset(self, canvas, 
+        which_data_rows='all', which_data_ycols='all', 
+        fixed_inputs=None,
+        plot_raw=False, apply_link=False,
+        predict_kw=None, **plot_kwargs):
+
+    ycols = get_which_data_ycols(self, which_data_ycols)
+    rows = get_which_data_rows(self, which_data_rows)
+
+    X, _, Y = get_x_y_var(self)
+    
+    if fixed_inputs is None:
+        fixed_inputs = []
+    free_dims = get_free_dims(self, None, fixed_inputs)    
+
+    Xgrid = X.copy()
+    for i, v in fixed_inputs:
+        Xgrid[:, i] = v
+
+    plots = []
+    
+    if len(free_dims)<2:
+        if len(free_dims)==1:
+            update_not_existing_kwargs(plot_kwargs, pl.defaults.yerrorbar)
+            _, percs = helper_predict_with_model(self, Xgrid, plot_raw, 
+                                              apply_link, (2.5, 97.5), 
+                                              ycols, predict_kw)
+            for d in ycols:
+                plots.append(pl.yerrorbar(canvas, X[rows,free_dims[0]], Y[rows,d], 
+                                          np.vstack([Y[rows,d]-percs[0][rows,d], percs[1][rows,d]-Y[rows,d]]),
+                                          **plot_kwargs))
+            return dict(yerrorbars=plots)
+        else:
+            pass #Nothing to plot!
+    else:
+        raise NotImplementedError("Cannot plot in more then one dimension.")
+    return plots
--- a/GPy/plotting/gpy_plot/gp_plots.py
+++ b/GPy/plotting/gpy_plot/gp_plots.py
@ -32,40 +32,8 @@ import numpy as np
 from functools import wraps

 from . import pl
-from .plot_util import get_x_y_var, get_fixed_dims, get_free_dims, \
-    x_frame1D, x_frame2D, update_not_existing_kwargs, \
-    helper_predict_with_model
-
-def _helper_for_plots(self, plot_limits, fixed_inputs, resolution):
-    """
-    Figure out the data, free_dims and create an Xgrid for
-    the prediction. 
-    """
-    X, Xvar, Y = get_x_y_var(self)
-
-    #work out what the inputs are for plotting (1D or 2D)
-    fixed_dims = get_fixed_dims(self, fixed_inputs)
-    free_dims = get_free_dims(self, None, fixed_dims)
-    
-    if len(free_dims) == 1:
-        #define the frame on which to plot
-        resolution = resolution or 200
-        Xnew, xmin, xmax = x_frame1D(X[:,free_dims], plot_limits=plot_limits, resolution=resolution)
-        Xgrid = np.empty((Xnew.shape[0],self.input_dim))
-        Xgrid[:,free_dims] = Xnew
-        for i,v in fixed_dims:
-            Xgrid[:,i] = v
-        x = Xgrid
-        y = None
-    elif len(free_dims) == 2:
-        #define the frame for plotting on
-        resolution = resolution or 50
-        Xnew, x, y, xmin, xmax = x_frame2D(X[:,free_dims], plot_limits, resolution)
-        Xgrid = np.empty((Xnew.shape[0],self.input_dim))
-        Xgrid[:,free_dims] = Xnew
-        for i,v in fixed_dims:
-            Xgrid[:,i] = v    
-    return X, Xvar, Y, fixed_dims, free_dims, Xgrid, x, y, xmin, xmax, resolution
+from .plot_util import helper_for_plot_data, update_not_existing_kwargs, \
+    helper_predict_with_model, get_which_data_ycols

 def plot_mean(self, plot_limits=None, fixed_inputs=None,
              resolution=None, plot_raw=False,
@ -76,6 +44,9 @@ def plot_mean(self, plot_limits=None, fixed_inputs=None,
              **kwargs):
    """
    Plot the mean of the GP.
+
+    Give the Y_metadata in the predict_kw if you need it.
+   
    
    :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
    :type plot_limits: np.array
@ -90,7 +61,10 @@ def plot_mean(self, plot_limits=None, fixed_inputs=None,
    :param int levels: for 2D plotting, the number of contour levels to use is 
    """
    canvas, kwargs = pl.get_new_canvas(kwargs)
-    plots = _plot_mean(self, canvas, plot_limits, fixed_inputs, resolution, plot_raw, Y_metadata, apply_link, which_data_ycols, levels, predict_kw, **kwargs)
+    plots = _plot_mean(self, canvas, plot_limits, fixed_inputs, 
+                       resolution, plot_raw, Y_metadata, 
+                       apply_link, which_data_ycols, levels, 
+                       predict_kw, **kwargs)
    return pl.show_canvas(canvas, plots)

@wraps(plot_mean)
@ -100,27 +74,30 @@ def _plot_mean(self, canvas, plot_limits=None, fixed_inputs=None,
              which_data_ycols=None,
              levels=20, 
              predict_kw=None, **kwargs):
-    if predict_kw is None:
-        predict_kw = {}
+    _, _, _, _, free_dims, Xgrid, x, y, _, _, resolution = helper_for_plot_data(self, plot_limits, fixed_inputs, resolution)

-    _, _, _, _, free_dims, Xgrid, x, y, _, _, resolution = _helper_for_plots(self, plot_limits, fixed_inputs, resolution)
-
-    if len(free_dims<=2):
-        which_data_ycols = get_which_data_ycols(self, which_data_ycols)
-        mu, _ = helper_predict_with_model(self, Xgrid, plot_raw, apply_link, None, which_data_ycols, **predict_kw)
+    if len(free_dims)<=2:
+        mu, _ = helper_predict_with_model(self, Xgrid, plot_raw, 
+                                          apply_link, None, 
+                                          get_which_data_ycols(self, which_data_ycols), 
+                                          predict_kw)
        if len(free_dims)==1:
            # 1D plotting:
            update_not_existing_kwargs(kwargs, pl.defaults.meanplot_1d)
-            return dict(gpmean=[pl.plot(canvas, Xgrid, mu, **kwargs)])
+            return dict(gpmean=[pl.plot(canvas, Xgrid[:, free_dims], mu, **kwargs)])
        else:
            update_not_existing_kwargs(kwargs, pl.defaults.meanplot_2d)
            return dict(gpmean=[pl.contour(canvas, x, y, 
                                           mu.reshape(resolution, resolution), 
                                           levels=levels, **kwargs)])
+    elif len(free_dims)==0:
+        pass # Nothing to plot!
+    else:
+        raise RuntimeError('Cannot plot mean in more then 2 input dimensions')

 def plot_confidence(self, lower=2.5, upper=97.5, plot_limits=None, fixed_inputs=None,
              resolution=None, plot_raw=False,
-              Y_metadata=None, apply_link=False, 
+              apply_link=False, 
              which_data_ycols='all',
              predict_kw=None, 
              **kwargs):
@ -128,6 +105,9 @@ def plot_confidence(self, lower=2.5, upper=97.5, plot_limits=None, fixed_inputs=
    Plot the confidence interval between the percentiles lower and upper.
    E.g. the 95% confidence interval is $2.5, 97.5$.
    Note: Only implemented for one dimension!
+
+    Give the Y_metadata in the predict_kw if you need it.
+   
    
    :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
    :type plot_limits: np.array
@ -142,26 +122,99 @@ def plot_confidence(self, lower=2.5, upper=97.5, plot_limits=None, fixed_inputs=
    """
    canvas, kwargs = pl.get_new_canvas(kwargs)
    plots = _plot_confidence(self, canvas, lower, upper, plot_limits, 
-                             fixed_inputs, resolution, plot_raw, Y_metadata, 
+                             fixed_inputs, resolution, plot_raw, 
                             apply_link, which_data_ycols, 
                             predict_kw, **kwargs)
    return pl.show_canvas(canvas, plots)

 def _plot_confidence(self, canvas, lower, upper, plot_limits=None, fixed_inputs=None,
              resolution=None, plot_raw=False,
-              Y_metadata=None, apply_link=False, 
+              apply_link=False, 
              which_data_ycols=None, 
              predict_kw=None, 
              **kwargs):
-    if predict_kw is None:
-        predict_kw = {}
-    
-    _, _, _, _, _, Xgrid, _, _, _, _, _ = _helper_for_plots(self, plot_limits, fixed_inputs, resolution)
+    _, _, _, _, free_dims, Xgrid, _, _, _, _, _ = helper_for_plot_data(self, plot_limits, fixed_inputs, resolution)

+    ycols = get_which_data_ycols(self, which_data_ycols)
+    
    update_not_existing_kwargs(kwargs, pl.defaults.confidence_interval)
-    _, percs = helper_predict_with_model(self, Xgrid, plot_raw, apply_link, (lower, upper), which_data_ycols, **predict_kw)
-
-    return dict(gpconfidence=pl.fill_between(canvas, Xgrid, percs[0], percs[1], **kwargs))
-
    
-    
+    if len(free_dims)<=1:
+        if len(free_dims)==1:
+            _, percs = helper_predict_with_model(self, Xgrid, plot_raw, apply_link, 
+                                                 (lower, upper), 
+                                                 ycols, predict_kw)
+    
+            fills = []
+            for d in ycols:
+                fills.append(pl.fill_between(canvas, Xgrid[:,free_dims[0]], percs[0][:,d], percs[1][:,d], **kwargs))
+            return dict(gpconfidence=fills)
+        else:
+            pass #Nothing to plot!
+    else:
+        raise RuntimeError('Can only plot confidence interval in one input dimension')
+
+
+def plot_density(self, plot_limits=None, fixed_inputs=None,
+              resolution=None, plot_raw=False,
+              apply_link=False, 
+              which_data_ycols='all',
+              levels=20,
+              predict_kw=None, 
+              **kwargs):
+    """
+    Plot the confidence interval between the percentiles lower and upper.
+    E.g. the 95% confidence interval is $2.5, 97.5$.
+    Note: Only implemented for one dimension!
+
+    Give the Y_metadata in the predict_kw if you need it.
+   
+    :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
+    :type plot_limits: np.array
+    :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input dimension i should be set to value v.
+    :type fixed_inputs: a list of tuples
+    :param int resolution: The resolution of the prediction [default:200]
+    :param bool plot_raw: plot the latent function (usually denoted f) only?
+    :param dict Y_metadata: the Y_metadata (for e.g. heteroscedastic GPs)
+    :param bool apply_link: whether to apply the link function of the GP to the raw prediction.
+    :param array-like which_data_ycols: which columns of y to plot (array-like or list of ints)
+    :param int levels: the number of levels in the density (number between 1 and 50, where 50 is very smooth and 1 is the same as plot_confidence) 
+    :param dict predict_kw: the keyword arguments for the prediction. If you want to plot a specific kernel give dict(kern=<specific kernel>) in here
+    """
+    canvas, kwargs = pl.get_new_canvas(kwargs)
+    plots = _plot_density(self, canvas, plot_limits, 
+                             fixed_inputs, resolution, plot_raw,  
+                             apply_link, which_data_ycols, 
+                             levels,
+                             predict_kw, **kwargs)
+    return pl.show_canvas(canvas, plots)
+
+def _plot_density(self, canvas, plot_limits=None, fixed_inputs=None,
+              resolution=None, plot_raw=False,
+              apply_link=False, 
+              which_data_ycols=None,
+              levels=20, 
+              predict_kw=None, **kwargs):
+    _, _, _, _, free_dims, Xgrid, x, y, _, _, resolution = helper_for_plot_data(self, plot_limits, fixed_inputs, resolution)
+
+    ycols = get_which_data_ycols(self, which_data_ycols)
+
+    update_not_existing_kwargs(kwargs, pl.defaults.density)
+
+    if len(free_dims)<=1:
+        if len(free_dims)==1:
+            _, percs = helper_predict_with_model(self, Xgrid, plot_raw, 
+                                          apply_link, np.linspace(2.5, 97.5, levels*2), 
+                                          get_which_data_ycols(self, which_data_ycols), 
+                                          predict_kw)
+            # 1D plotting:
+            fills = []
+            for d in ycols:
+                fills.append(pl.fill_gradient(canvas, Xgrid[:, free_dims[0]], [p[:,d] for p in percs], **kwargs))
+            return dict(gpdensity=fills)
+        else:
+            pass # Nothing to plot!
+    else:
+        raise RuntimeError('Can only plot density in one input dimension')
+
+            
--- a/GPy/plotting/gpy_plot/plot_util.py
+++ b/GPy/plotting/gpy_plot/plot_util.py
@ -31,7 +31,7 @@
 import numpy as np
 from scipy import sparse

-def helper_predict_with_model(self, Xgrid, plot_raw, apply_link, which_data_ycols, percentiles, **predict_kw):
+def helper_predict_with_model(self, Xgrid, plot_raw, apply_link, percentiles, which_data_ycols, predict_kw):
    """
    Make the right decisions for prediction with a model 
    based on the standard arguments of plotting.
@ -39,29 +39,73 @@ def helper_predict_with_model(self, Xgrid, plot_raw, apply_link, which_data_ycol
    This is quite complex and will take a while to understand,
    so do not change anything in here lightly!!! 
    """
+    # Put some standards into the predict_kw so that prediction is done automatically:
+    if predict_kw is None:
+        predict_kw = {}
    if 'likelihood' not in predict_kw:
        if plot_raw:
            from ...likelihoods import Gaussian
-            lik = Gaussian(0) # Make the likelihood not add any noise
+            from ...likelihoods.link_functions import Identity
+            lik = Gaussian(Identity(), 0) # Make the likelihood not add any noise
        else:
            lik = None
        predict_kw['likelihood'] = lik
+    if 'Y_metadata' not in predict_kw:
+        predict_kw['Y_metadata'] = self.Y_metadata or {}
+    if 'output_index' not in predict_kw['Y_metadata']:
+        predict_kw['Y_metadata']['output_index'] = Xgrid[:,-1:].astype(np.int)

    mu, _ = self.predict(Xgrid, **predict_kw)
    
    if percentiles is not None:
        percentiles = self.predict_quantiles(Xgrid, quantiles=percentiles, **predict_kw)
-    else: percentiles = {}
+    else: percentiles = []

-    retmu = np.empty((Xgrid.shape[0], len(ycols)))
+    # Filter out the ycolums which we want to plot:
+    retmu = mu[:, which_data_ycols]
+    percs = [p[:, which_data_ycols] for p in percentiles]
    
    if plot_raw and apply_link:
-        for i, d in enumerate(ycols):
-            retmu = self.likelihood.gp_link.transf(mu[:, [i]])
-            for perc in percentiles:
+        for i in range(len(which_data_ycols)):
+            retmu[:, [i]] = self.likelihood.gp_link.transf(mu[:, [i]])
+            for perc in percs:
                perc[:, [i]] = self.likelihood.gp_link.transf(perc[:, [i]])

-    return mu, percentiles
+    return retmu, percs
+
+def helper_for_plot_data(self, plot_limits, fixed_inputs, resolution):
+    """
+    Figure out the data, free_dims and create an Xgrid for
+    the prediction. 
+    """
+    X, Xvar, Y = get_x_y_var(self)
+
+    #work out what the inputs are for plotting (1D or 2D)
+    if fixed_inputs is None:
+        fixed_inputs = []
+    fixed_dims = get_fixed_dims(self, fixed_inputs)
+    free_dims = get_free_dims(self, None, fixed_dims)
+    
+    if len(free_dims) == 1:
+        #define the frame on which to plot
+        resolution = resolution or 200
+        Xnew, xmin, xmax = x_frame1D(X[:,free_dims], plot_limits=plot_limits, resolution=resolution)
+        Xgrid = np.empty((Xnew.shape[0],self.input_dim))
+        Xgrid[:,free_dims] = Xnew
+        for i,v in fixed_inputs:
+            Xgrid[:,i] = v
+        x = Xgrid
+        y = None
+    elif len(free_dims) == 2:
+        #define the frame for plotting on
+        resolution = resolution or 50
+        Xnew, x, y, xmin, xmax = x_frame2D(X[:,free_dims], plot_limits, resolution)
+        Xgrid = np.empty((Xnew.shape[0],self.input_dim))
+        Xgrid[:,free_dims] = Xnew
+        for i,v in fixed_inputs:
+            Xgrid[:,i] = v    
+    return X, Xvar, Y, fixed_dims, free_dims, Xgrid, x, y, xmin, xmax, resolution
+

 def update_not_existing_kwargs(to_update, update_from):
    """
@ -112,8 +156,6 @@ def get_fixed_dims(model, fixed_inputs):
    """
    Work out the fixed dimensions from the fixed_inputs list of tuples.
    """
-    if fixed_inputs is None:
-        fixed_inputs = []
    return np.array([i for i,_ in fixed_inputs])

 def get_which_data_ycols(model, which_data_ycols):