diff --git a/GPy/core/gp.py b/GPy/core/gp.py index e932fabd..60038263 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -516,8 +516,8 @@ class GP(Model): def plot(self, plot_limits=None, which_data_rows='all', which_data_ycols='all', fixed_inputs=[], levels=20, samples=0, fignum=None, ax=None, resolution=None, - plot_raw=False, - linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx', predict_kw=None): + plot_raw=False, linecol=None,fillcol=None, Y_metadata=None, + data_symbol='kx', predict_kw=None, plot_training_data=True): """ Plot the posterior of the GP. - In one dimension, the function is plotted with a shaded region identifying two standard deviations. @@ -554,6 +554,8 @@ class GP(Model): :type Y_metadata: dict :param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx') :type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib. + :param plot_training_data: whether or not to plot the training points + :type plot_training_data: boolean """ assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ..plotting.matplot_dep import models_plots @@ -566,7 +568,85 @@ class GP(Model): which_data_ycols, fixed_inputs, levels, samples, fignum, ax, resolution, plot_raw=plot_raw, Y_metadata=Y_metadata, - data_symbol=data_symbol, predict_kw=predict_kw, **kw) + data_symbol=data_symbol, predict_kw=predict_kw, + plot_training_data=plot_training_data, **kw) + + + def plot_data(self, which_data_rows='all', + which_data_ycols='all', visible_dims=None, + fignum=None, ax=None, data_symbol='kx'): + """ + Plot the training data + - For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed. + + Can plot only part of the data + using which_data_rows and which_data_ycols. + + :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits + :type plot_limits: np.array + :param which_data_rows: which of the training data to plot (default all) + :type which_data_rows: 'all' or a slice object to slice model.X, model.Y + :param which_data_ycols: when the data has several columns (independant outputs), only plot these + :type which_data_ycols: 'all' or a list of integers + :param visible_dims: an array specifying the input dimensions to plot (maximum two) + :type visible_dims: a numpy array + :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D + :type resolution: int + :param levels: number of levels to plot in a contour plot. + :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure + :type levels: int + :param samples: the number of a posteriori samples to plot + :type samples: int + :param fignum: figure to plot on. + :type fignum: figure number + :param ax: axes to plot on. + :type ax: axes handle + :param linecol: color of line to plot [Tango.colorsHex['darkBlue']] + :type linecol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib + :param fillcol: color of fill [Tango.colorsHex['lightBlue']] + :type fillcol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib + :param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx') + :type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib. + """ + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ..plotting.matplot_dep import models_plots + kw = {} + return models_plots.plot_data(self, which_data_rows, + which_data_ycols, visible_dims, + fignum, ax, data_symbol, **kw) + + + def plot_fit_errorbars(self, which_data_rows='all', + which_data_ycols='all', fixed_inputs=[], fignum=None, ax=None, + linecol=None, data_symbol='kx', predict_kw=None, plot_training_data=True): + + """ + Plot the posterior error bars corresponding to the training data + - For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed. + + Can plot only part of the data + using which_data_rows and which_data_ycols. + + :param which_data_rows: which of the training data to plot (default all) + :type which_data_rows: 'all' or a slice object to slice model.X, model.Y + :param which_data_ycols: when the data has several columns (independant outputs), only plot these + :type which_data_rows: 'all' or a list of integers + :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. + :type fixed_inputs: a list of tuples + :param fignum: figure to plot on. + :type fignum: figure number + :param ax: axes to plot on. + :type ax: axes handle + :param plot_training_data: whether or not to plot the training points + :type plot_training_data: boolean + """ + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ..plotting.matplot_dep import models_plots + kw = {} + return models_plots.plot_fit_errorbars(self, which_data_rows, which_data_ycols, fixed_inputs, + fignum, ax, linecol, data_symbol, + predict_kw, plot_training_data, **kw) + def plot_magnification(self, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40, diff --git a/GPy/plotting/matplot_dep/base_plots.py b/GPy/plotting/matplot_dep/base_plots.py index dade87cf..80f114b1 100644 --- a/GPy/plotting/matplot_dep/base_plots.py +++ b/GPy/plotting/matplot_dep/base_plots.py @@ -47,6 +47,27 @@ def gpplot(x, mu, lower, upper, edgecol='#3300FF', fillcol='#33CCFF', ax=None, f return plots +def gperrors(x, mu, lower, upper, edgecol=None, ax=None, fignum=None, **kwargs): + _, axes = ax_default(fignum, ax) + + mu = mu.flatten() + x = x.flatten() + lower = lower.flatten() + upper = upper.flatten() + + plots = [] + + if edgecol is None: + edgecol='#3300FF' + + if not 'alpha' in kwargs.keys(): + kwargs['alpha'] = 0.3 + + plots.append(axes.errorbar(x,mu,yerr=np.vstack([mu-lower,upper-mu]),color=edgecol,**kwargs)) + plots[-1][0].remove() + return plots + + def removeRightTicks(ax=None): ax = ax or pb.gca() for i, line in enumerate(ax.get_yticklines()): diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index d0f6c952..e1ba327e 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -3,19 +3,79 @@ import numpy as np from . import Tango -from base_plots import gpplot, x_frame1D, x_frame2D +from base_plots import gpplot, x_frame1D, x_frame2D,gperrors from ...models.gp_coregionalized_regression import GPCoregionalizedRegression from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression from scipy import sparse from ...core.parameterization.variational import VariationalPosterior from matplotlib import pyplot as plt + +def plot_data(model, which_data_rows='all', + which_data_ycols='all', visible_dims=None, + fignum=None, ax=None, data_symbol='kx',mew=1.5): + """ + Plot the training data + - For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed. + + Can plot only part of the data + using which_data_rows and which_data_ycols. + + :param which_data_rows: which of the training data to plot (default all) + :type which_data_rows: 'all' or a slice object to slice model.X, model.Y + :param which_data_ycols: when the data has several columns (independant outputs), only plot these + :type which_data_rows: 'all' or a list of integers + :param visible_dims: an array specifying the input dimensions to plot (maximum two) + :type visible_dims: a numpy array + :param fignum: figure to plot on. + :type fignum: figure number + :param ax: axes to plot on. + :type ax: axes handle + """ + #deal with optional arguments + if which_data_rows == 'all': + which_data_rows = slice(None) + if which_data_ycols == 'all': + which_data_ycols = np.arange(model.output_dim) + + if ax is None: + fig = plt.figure(num=fignum) + ax = fig.add_subplot(111) + + #data + X = model.X + Y = model.Y + + #work out what the inputs are for plotting (1D or 2D) + if visible_dims is None: + visible_dims = np.arange(model.input_dim) + assert visible_dims.size <= 2, "Visible inputs cannot be larger than two" + free_dims = visible_dims + plots = {} + #one dimensional plotting + if len(free_dims) == 1: + + for d in which_data_ycols: + plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=mew) + + #2D plotting + elif len(free_dims) == 2: + + for d in which_data_ycols: + plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, + Y[which_data_rows, d], cmap=plt.cm.jet, vmin=Y.min(), vmax=Y.max(), linewidth=0.) + + else: + raise NotImplementedError("Cannot define a frame with more than two input dimensions") + return plots + + def plot_fit(model, plot_limits=None, which_data_rows='all', which_data_ycols='all', fixed_inputs=[], levels=20, samples=0, fignum=None, ax=None, resolution=None, plot_raw=False, linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx', - apply_link=False, samples_f=0, plot_uncertain_inputs=True, predict_kw=None): + apply_link=False, samples_f=0, plot_uncertain_inputs=True, predict_kw=None, plot_training_data=True): """ Plot the posterior of the GP. - In one dimension, the function is plotted with a shaded region identifying two standard deviations. @@ -43,7 +103,6 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', :type fignum: figure number :param ax: axes to plot on. :type ax: axes handle - :type output: integer (first output is 0) :param linecol: color of line to plot. :type linecol: :param fillcol: color of fill @@ -52,6 +111,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', :type apply_link: boolean :param samples_f: the number of posteriori f samples to plot p(f*|y) :type samples_f: int + :param plot_training_data: whether or not to plot the training points + :type plot_training_data: boolean """ #deal with optional arguments if which_data_rows == 'all': @@ -116,7 +177,11 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', for d in which_data_ycols: plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol) - if not plot_raw: plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5) + #if not plot_raw: plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5) + if not plot_raw and plot_training_data: + plots['dataplot'] = plot_data(model=model, which_data_rows=which_data_rows, + visible_dims=free_dims, data_symbol=data_symbol, mew=1.5, ax=ax, fignum=fignum) + #optionally plot some samples if samples: #NOTE not tested with fixed_inputs @@ -196,7 +261,9 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', for d in which_data_ycols: m_d = m[:,d].reshape(resolution, resolution).T plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=plt.cm.jet) - if not plot_raw: plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=plt.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) + #if not plot_raw: plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=plt.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) + if not plot_raw and plot_training_data: + plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=plt.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) #set the limits of the plot to some sensible values ax.set_xlim(xmin[0], xmax[0]) @@ -261,3 +328,83 @@ def fixed_inputs(model, non_fixed_inputs, fix_routine='median', as_list=True, X_ return f_inputs else: return X + + +def plot_fit_errorbars(model, which_data_rows='all', + which_data_ycols='all', fixed_inputs=[], + fignum=None, ax=None, + linecol='red', data_symbol='kx', + predict_kw=None, plot_training_data=True): + + """ + Plot the posterior error bars corresponding to the training data + - For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed. + + Can plot only part of the data + using which_data_rows and which_data_ycols. + + :param which_data_rows: which of the training data to plot (default all) + :type which_data_rows: 'all' or a slice object to slice model.X, model.Y + :param which_data_ycols: when the data has several columns (independant outputs), only plot these + :type which_data_rows: 'all' or a list of integers + :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. + :type fixed_inputs: a list of tuples + :param fignum: figure to plot on. + :type fignum: figure number + :param ax: axes to plot on. + :type ax: axes handle + :param plot_training_data: whether or not to plot the training points + :type plot_training_data: boolean + """ + + #deal with optional arguments + if which_data_rows == 'all': + which_data_rows = slice(None) + if which_data_ycols == 'all': + which_data_ycols = np.arange(model.output_dim) + + if ax is None: + fig = plt.figure(num=fignum) + ax = fig.add_subplot(111) + + X = model.X + Y = model.Y + + if predict_kw is None: + predict_kw = {} + + + #work out what the inputs are for plotting (1D or 2D) + fixed_dims = np.array([i for i,v in fixed_inputs]) + free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims) + plots = {} + + #one dimensional plotting + if len(free_dims) == 1: + + m, v = model.predict(X, full_cov=False, Y_metadata=model.Y_metadata, **predict_kw) + fmu, fv = model._raw_predict(X, full_cov=False, **predict_kw) + lower, upper = model.likelihood.predictive_quantiles(fmu, fv, (2.5, 97.5), Y_metadata=model.Y_metadata) + + + for d in which_data_ycols: + plots['gperrors'] = gperrors(X, m[:, d], lower[:, d], upper[:, d], edgecol=linecol, ax=ax, fignum=fignum ) + if plot_training_data: + plots['dataplot'] = plot_data(model=model, which_data_rows=which_data_rows, + visible_dims=free_dims, data_symbol=data_symbol, mew=1.5, ax=ax, fignum=fignum) + + + #set the limits of the plot to some sensible values + ymin, ymax = min(np.append(Y[which_data_rows, which_data_ycols].flatten(), lower)), max(np.append(Y[which_data_rows, which_data_ycols].flatten(), upper)) + ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) + ax.set_xlim(X[:,free_dims].min(), X[:,free_dims].max()) + ax.set_ylim(ymin, ymax) + + + elif len(free_dims) == 2: + raise NotImplementedError("Not implemented yet") + + + else: + raise NotImplementedError("Cannot define a frame with more than two input dimensions") + return plots diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index ed73d133..acceeb16 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -7,48 +7,13 @@ import numpy as np from scipy import linalg -import types -import ctypes -from ctypes import byref, c_char, c_int, c_double # TODO -import scipy -import warnings -import os +from scipy.linalg import lapack, blas + from .config import config import logging from . import linalg_cython -_scipyversion = np.float64((scipy.__version__).split('.')[:2]) -_fix_dpotri_scipy_bug = True -if np.all(_scipyversion >= np.array([0, 14])): - from scipy.linalg import lapack - _fix_dpotri_scipy_bug = False -elif np.all(_scipyversion >= np.array([0, 12])): - #import scipy.linalg.lapack.clapack as lapack - from scipy.linalg import lapack -else: - from scipy.linalg.lapack import flapack as lapack - -if config.getboolean('anaconda', 'installed') and config.getboolean('anaconda', 'MKL'): - try: - anaconda_path = str(config.get('anaconda', 'location')) - mkl_rt = ctypes.cdll.LoadLibrary(os.path.join(anaconda_path, 'DLLs', 'mkl_rt.dll')) - dsyrk = mkl_rt.dsyrk - dsyr = mkl_rt.dsyr - _blas_available = True - print('anaconda installed and mkl is loaded') - except: - _blas_available = False -else: - try: - _blaslib = ctypes.cdll.LoadLibrary(np.core._dotblas.__file__) # @UndefinedVariable - dsyrk = _blaslib.dsyrk_ - dsyr = _blaslib.dsyr_ - _blas_available = True - except AttributeError as e: - _blas_available = False - warnings.warn("warning: caught this exception:" + str(e)) - def force_F_ordered_symmetric(A): """ return a F ordered version of A, assuming A is symmetric @@ -169,9 +134,6 @@ def dpotri(A, lower=1): :returns: A inverse """ - if _fix_dpotri_scipy_bug: - assert lower==1, "scipy linalg behaviour is very weird. please use lower, fortran ordered arrays" - lower = 0 A = force_F_ordered(A) R, info = lapack.dpotri(A, lower=lower) #needs to be zero here, seems to be a scipy bug @@ -300,8 +262,8 @@ def pca(Y, input_dim): Z = linalg.svd(Y - Y.mean(axis=0), full_matrices=False) [X, W] = [Z[0][:, 0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:, 0:input_dim]] v = X.std(axis=0) - X /= v; - W *= v; + X /= v + W *= v return X, W.T def ppca(Y, Q, iterations=100): @@ -347,34 +309,15 @@ def tdot_blas(mat, out=None): out[:] = 0.0 # # Call to DSYRK from BLAS - # If already in Fortran order (rare), and has the right sorts of strides I - # could avoid the copy. I also thought swapping to cblas API would allow use - # of C order. However, I tried that and had errors with large matrices: - # http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot_broken.py mat = np.asfortranarray(mat) - TRANS = c_char('n'.encode('ascii')) - N = c_int(mat.shape[0]) - K = c_int(mat.shape[1]) - LDA = c_int(mat.shape[0]) - UPLO = c_char('l'.encode('ascii')) - ALPHA = c_double(1.0) - A = mat.ctypes.data_as(ctypes.c_void_p) - BETA = c_double(0.0) - C = out.ctypes.data_as(ctypes.c_void_p) - LDC = c_int(np.max(out.strides) // 8) - dsyrk(byref(UPLO), byref(TRANS), byref(N), byref(K), - byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC)) + out = blas.dsyrk(alpha=1.0, a=mat, beta=0.0, c=out, overwrite_c=1, + trans=0, lower=0) symmetrify(out, upper=True) - - return np.ascontiguousarray(out) def tdot(*args, **kwargs): - if _blas_available: - return tdot_blas(*args, **kwargs) - else: - return tdot_numpy(*args, **kwargs) + return tdot_blas(*args, **kwargs) def DSYR_blas(A, x, alpha=1.): """ @@ -386,15 +329,7 @@ def DSYR_blas(A, x, alpha=1.): :param alpha: scalar """ - N = c_int(A.shape[0]) - LDA = c_int(A.shape[0]) - UPLO = c_char('l'.encode('ascii')) - ALPHA = c_double(alpha) - A_ = A.ctypes.data_as(ctypes.c_void_p) - x_ = x.ctypes.data_as(ctypes.c_void_p) - INCX = c_int(1) - dsyr(byref(UPLO), byref(N), byref(ALPHA), - x_, byref(INCX), A_, byref(LDA)) + A = blas.dsyr(lower=0, x=x, a=A, alpha=alpha, overwrite_a=True) symmetrify(A, upper=True) def DSYR_numpy(A, x, alpha=1.): @@ -411,10 +346,8 @@ def DSYR_numpy(A, x, alpha=1.): def DSYR(*args, **kwargs): - if _blas_available: - return DSYR_blas(*args, **kwargs) - else: - return DSYR_numpy(*args, **kwargs) + return DSYR_blas(*args, **kwargs) + def symmetrify(A, upper=False): """