From ba2ea3eb7338d3b304bc760f50058f169c020e6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Men=C3=A9ndez=20Hurtado?= Date: Mon, 24 Aug 2015 12:56:18 +0200 Subject: [PATCH 01/18] FIX: now Scipy 0.16 is required, removing fixes for older versions. Accessing blas through the scipy interface --- GPy/util/linalg.py | 65 +++++++++------------------------------------- 1 file changed, 12 insertions(+), 53 deletions(-) diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index ed73d133..606a9d88 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -7,48 +7,16 @@ import numpy as np from scipy import linalg -import types +from scipy.linalg import lapack, blas + import ctypes from ctypes import byref, c_char, c_int, c_double # TODO -import scipy -import warnings -import os + from .config import config import logging from . import linalg_cython -_scipyversion = np.float64((scipy.__version__).split('.')[:2]) -_fix_dpotri_scipy_bug = True -if np.all(_scipyversion >= np.array([0, 14])): - from scipy.linalg import lapack - _fix_dpotri_scipy_bug = False -elif np.all(_scipyversion >= np.array([0, 12])): - #import scipy.linalg.lapack.clapack as lapack - from scipy.linalg import lapack -else: - from scipy.linalg.lapack import flapack as lapack - -if config.getboolean('anaconda', 'installed') and config.getboolean('anaconda', 'MKL'): - try: - anaconda_path = str(config.get('anaconda', 'location')) - mkl_rt = ctypes.cdll.LoadLibrary(os.path.join(anaconda_path, 'DLLs', 'mkl_rt.dll')) - dsyrk = mkl_rt.dsyrk - dsyr = mkl_rt.dsyr - _blas_available = True - print('anaconda installed and mkl is loaded') - except: - _blas_available = False -else: - try: - _blaslib = ctypes.cdll.LoadLibrary(np.core._dotblas.__file__) # @UndefinedVariable - dsyrk = _blaslib.dsyrk_ - dsyr = _blaslib.dsyr_ - _blas_available = True - except AttributeError as e: - _blas_available = False - warnings.warn("warning: caught this exception:" + str(e)) - def force_F_ordered_symmetric(A): """ return a F ordered version of A, assuming A is symmetric @@ -169,9 +137,6 @@ def dpotri(A, lower=1): :returns: A inverse """ - if _fix_dpotri_scipy_bug: - assert lower==1, "scipy linalg behaviour is very weird. please use lower, fortran ordered arrays" - lower = 0 A = force_F_ordered(A) R, info = lapack.dpotri(A, lower=lower) #needs to be zero here, seems to be a scipy bug @@ -300,8 +265,8 @@ def pca(Y, input_dim): Z = linalg.svd(Y - Y.mean(axis=0), full_matrices=False) [X, W] = [Z[0][:, 0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:, 0:input_dim]] v = X.std(axis=0) - X /= v; - W *= v; + X /= v + W *= v return X, W.T def ppca(Y, Q, iterations=100): @@ -362,19 +327,15 @@ def tdot_blas(mat, out=None): BETA = c_double(0.0) C = out.ctypes.data_as(ctypes.c_void_p) LDC = c_int(np.max(out.strides) // 8) - dsyrk(byref(UPLO), byref(TRANS), byref(N), byref(K), - byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC)) + blas.dsyrk(byref(UPLO), byref(TRANS), byref(N), byref(K), + byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC)) symmetrify(out, upper=True) - return np.ascontiguousarray(out) def tdot(*args, **kwargs): - if _blas_available: - return tdot_blas(*args, **kwargs) - else: - return tdot_numpy(*args, **kwargs) + return tdot_blas(*args, **kwargs) def DSYR_blas(A, x, alpha=1.): """ @@ -393,8 +354,8 @@ def DSYR_blas(A, x, alpha=1.): A_ = A.ctypes.data_as(ctypes.c_void_p) x_ = x.ctypes.data_as(ctypes.c_void_p) INCX = c_int(1) - dsyr(byref(UPLO), byref(N), byref(ALPHA), - x_, byref(INCX), A_, byref(LDA)) + blas.dsyr(byref(UPLO), byref(N), byref(ALPHA), + x_, byref(INCX), A_, byref(LDA)) symmetrify(A, upper=True) def DSYR_numpy(A, x, alpha=1.): @@ -411,10 +372,8 @@ def DSYR_numpy(A, x, alpha=1.): def DSYR(*args, **kwargs): - if _blas_available: - return DSYR_blas(*args, **kwargs) - else: - return DSYR_numpy(*args, **kwargs) + return DSYR_blas(*args, **kwargs) + def symmetrify(A, upper=False): """ From 3746af772921126ad36448379613badaabac6598 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Men=C3=A9ndez=20Hurtado?= Date: Mon, 24 Aug 2015 13:29:40 +0200 Subject: [PATCH 02/18] Fixing confussion between lapack and ctypes interfaces --- GPy/util/linalg.py | 32 +++----------------------------- 1 file changed, 3 insertions(+), 29 deletions(-) diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index 606a9d88..4c5f1b79 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -9,9 +9,6 @@ import numpy as np from scipy import linalg from scipy.linalg import lapack, blas -import ctypes -from ctypes import byref, c_char, c_int, c_double # TODO - from .config import config import logging from . import linalg_cython @@ -312,26 +309,11 @@ def tdot_blas(mat, out=None): out[:] = 0.0 # # Call to DSYRK from BLAS - # If already in Fortran order (rare), and has the right sorts of strides I - # could avoid the copy. I also thought swapping to cblas API would allow use - # of C order. However, I tried that and had errors with large matrices: - # http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot_broken.py mat = np.asfortranarray(mat) - TRANS = c_char('n'.encode('ascii')) - N = c_int(mat.shape[0]) - K = c_int(mat.shape[1]) - LDA = c_int(mat.shape[0]) - UPLO = c_char('l'.encode('ascii')) - ALPHA = c_double(1.0) - A = mat.ctypes.data_as(ctypes.c_void_p) - BETA = c_double(0.0) - C = out.ctypes.data_as(ctypes.c_void_p) - LDC = c_int(np.max(out.strides) // 8) - blas.dsyrk(byref(UPLO), byref(TRANS), byref(N), byref(K), - byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC)) + out = blas.dsyrk(alpha=1.0, a=mat, beta=0.0, c=out, overwrite_c=1, + trans=0, lower=1, trans=0) symmetrify(out, upper=True) - return np.ascontiguousarray(out) def tdot(*args, **kwargs): @@ -347,15 +329,7 @@ def DSYR_blas(A, x, alpha=1.): :param alpha: scalar """ - N = c_int(A.shape[0]) - LDA = c_int(A.shape[0]) - UPLO = c_char('l'.encode('ascii')) - ALPHA = c_double(alpha) - A_ = A.ctypes.data_as(ctypes.c_void_p) - x_ = x.ctypes.data_as(ctypes.c_void_p) - INCX = c_int(1) - blas.dsyr(byref(UPLO), byref(N), byref(ALPHA), - x_, byref(INCX), A_, byref(LDA)) + blas.dsyr(lower=1, x=x, a=A, alpha=alpha) symmetrify(A, upper=True) def DSYR_numpy(A, x, alpha=1.): From 13438f1c10dbabca6c4218a23364e13d29a4685b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Men=C3=A9ndez=20Hurtado?= Date: Mon, 24 Aug 2015 13:46:20 +0200 Subject: [PATCH 03/18] Fixing the blas arguments for DSYRK --- GPy/util/linalg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index 4c5f1b79..acceeb16 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -311,7 +311,7 @@ def tdot_blas(mat, out=None): # # Call to DSYRK from BLAS mat = np.asfortranarray(mat) out = blas.dsyrk(alpha=1.0, a=mat, beta=0.0, c=out, overwrite_c=1, - trans=0, lower=1, trans=0) + trans=0, lower=0) symmetrify(out, upper=True) return np.ascontiguousarray(out) @@ -329,7 +329,7 @@ def DSYR_blas(A, x, alpha=1.): :param alpha: scalar """ - blas.dsyr(lower=1, x=x, a=A, alpha=alpha) + A = blas.dsyr(lower=0, x=x, a=A, alpha=alpha, overwrite_a=True) symmetrify(A, upper=True) def DSYR_numpy(A, x, alpha=1.): From 0d32652c8876d02aed274c69e976a00733a56265 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Fri, 28 Aug 2015 16:26:55 +0100 Subject: [PATCH 04/18] [heteroscedastic gauss] Implemented Heteroscedastic Guassian Lik with @ric70x7 --- GPy/likelihoods/gaussian.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py index 9abb8cde..ee1f2a5d 100644 --- a/GPy/likelihoods/gaussian.py +++ b/GPy/likelihoods/gaussian.py @@ -48,6 +48,7 @@ class Gaussian(Likelihood): def betaY(self,Y,Y_metadata=None): #TODO: ~Ricardo this does not live here + print "Iam Here" return Y/self.gaussian_variance(Y_metadata) def gaussian_variance(self, Y_metadata=None): @@ -321,3 +322,30 @@ class Gaussian(Likelihood): dF_dv = np.ones_like(v)*(-0.5/lik_var) dF_dtheta = -0.5/lik_var + 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/(lik_var**2) return F, dF_dmu, dF_dv, dF_dtheta.reshape(1, Y.shape[0], Y.shape[1]) + +class Heteroscedastic_Gaussian(Gaussian): + def __init__(self, Y_metadata, gp_link=None, variance=1., name='het_Gauss'): + if gp_link is None: + gp_link = link_functions.Identity() + + if not isinstance(gp_link, link_functions.Identity): + print("Warning, Exact inference is not implemeted for non-identity link functions,\ + if you are not already, ensure Laplace inference_method is used") + + super(Heteroscedastic_Gaussian, self).__init__(gp_link, np.ones(Y_metadata['output_index'].shape[0])*variance, name) + + def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None): + return dL_dKdiag[Y_metadata['output_index']][:,0] + + def gaussian_variance(self, Y_metadata=None): + return self.variance[Y_metadata['output_index']] + + def predictive_values(self, mu, var, full_cov=False, Y_metadata=None): + if full_cov: + if var.ndim == 2: + var += np.eye(var.shape[0])*self.variance + if var.ndim == 3: + var += np.atleast_3d(np.eye(var.shape[0])*self.variance) + else: + var += self.variance + return mu, var \ No newline at end of file From 4bd99c674fdb53237019386b8c93c456b96e9e6f Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Fri, 28 Aug 2015 16:28:01 +0100 Subject: [PATCH 05/18] [heteroscedastic gauss] Implemented Heteroscedastic Guassian Lik with @ric70x7 --- GPy/likelihoods/gaussian.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py index ee1f2a5d..4fd80ef5 100644 --- a/GPy/likelihoods/gaussian.py +++ b/GPy/likelihoods/gaussian.py @@ -48,7 +48,7 @@ class Gaussian(Likelihood): def betaY(self,Y,Y_metadata=None): #TODO: ~Ricardo this does not live here - print "Iam Here" + raise RuntimeError, "Please notify the GPy developers, this should not happen" return Y/self.gaussian_variance(Y_metadata) def gaussian_variance(self, Y_metadata=None): From ce9ee6c7585d4030f6a70d65e981b0f5a7ed492b Mon Sep 17 00:00:00 2001 From: James Hensman Date: Tue, 1 Sep 2015 22:38:49 +0100 Subject: [PATCH 06/18] ensuring the shape of the mean vector at predict time fixes bug in EP prediction --- GPy/core/gp.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index ddef0647..a87ac66e 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -201,6 +201,8 @@ class GP(Model): Kx = kern.K(self.X, Xnew) mu = np.dot(Kx.T, self.posterior.woodbury_vector) + if len(mu.shape)==1: + mu = mu.reshape(-1,1) if full_cov: Kxx = kern.K(Xnew) if self.posterior.woodbury_inv.ndim == 2: From 70a9a26d7e12861b3bacc833e17b416c65d53122 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Wed, 2 Sep 2015 09:06:17 +0100 Subject: [PATCH 07/18] [core] updating system, security branching --- GPy/core/gp.py | 105 ++++++++++++++++++ GPy/core/mapping.py | 2 +- GPy/core/sparse_gp.py | 26 +++-- GPy/core/verbose_optimization.py | 2 +- GPy/kern/_src/basis_funcs.py | 52 ++++----- GPy/kern/_src/kern.py | 2 + GPy/kern/_src/kernel_slice_operations.py | 27 ++++- GPy/kern/_src/linear.py | 6 + GPy/kern/_src/rbf.py | 3 + GPy/kern/_src/stationary.py | 64 ++++++++++- GPy/likelihoods/__init__.py | 2 +- GPy/models/bayesian_gplvm.py | 14 +++ GPy/models/gplvm.py | 40 ++++--- .../matplot_dep/dim_reduction_plots.py | 84 +++++++++++++- GPy/util/diag.py | 2 + 15 files changed, 366 insertions(+), 65 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index ddef0647..aba065f9 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -106,6 +106,13 @@ class GP(Model): self.link_parameter(self.likelihood) self.posterior = None + # The predictive variable to be used to predict using the posterior object's + # woodbury_vector and woodbury_inv is defined as predictive_variable + # This is usually just a link to self.X (full GP) or self.Z (sparse GP). + # Make sure to name this variable and the predict functions will "just work" + # as long as the posterior has the right woodbury entries. + self._predictive_variable = self.X + def set_XY(self, X=None, Y=None, trigger_update=True): """ @@ -207,6 +214,7 @@ class GP(Model): var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx)) elif self.posterior.woodbury_inv.ndim == 3: var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2])) + from ..util.linalg import mdot for i in range(var.shape[2]): var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx)) var = var @@ -302,6 +310,103 @@ class GP(Model): return dmu_dX, dv_dX + def predict_jacobian(self, Xnew, kern=None, full_cov=True): + """ + Compute the derivatives of the posterior of the GP. + + Given a set of points at which to predict X* (size [N*,Q]), compute the + mean and variance of the derivative. Resulting arrays are sized: + + dL_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one). + Note that this is the mean and variance of the derivative, + not the derivative of the mean and variance! (See predictive_gradients for that) + + dv_dX* -- [N*, Q], (since all outputs have the same variance) + If there is missing data, it is not implemented for now, but + there will be one output variance per output dimension. + + :param X: The points at which to get the predictive gradients. + :type X: np.ndarray (Xnew x self.input_dim) + :param kern: The kernel to compute the jacobian for. + :param boolean full_cov: whether to return the full covariance of the jacobian. + + :returns: dmu_dX, dv_dX + :rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q,(D)) ] + + Note: We always return sum in input_dim gradients, as the off-diagonals + in the input_dim are not needed for further calculations. + This is a compromise for increase in speed. Mathematically the jacobian would + have another dimension in Q. + """ + if kern is None: + kern = self.kern + + mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim)) + + for i in range(self.output_dim): + mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable) + + dK_dXnew_full = np.empty((self._predictive_variable.shape[0], Xnew.shape[0], Xnew.shape[1])) + for i in range(self._predictive_variable.shape[0]): + dK_dXnew_full[i] = kern.gradients_X([[1.]], Xnew, self._predictive_variable[[i]]) + + def compute_cov_inner(wi): + if full_cov: + # full covariance gradients: + dK2_dXdX = kern.gradients_XX([[1.]], Xnew) + var_jac = dK2_dXdX - np.einsum('qnm,miq->niq', dK_dXnew_full.T.dot(wi), dK_dXnew_full) + else: + dK2_dXdX = kern.gradients_XX_diag([[1.]], Xnew) + var_jac = dK2_dXdX - np.einsum('qim,miq->iq', dK_dXnew_full.T.dot(wi), dK_dXnew_full) + return var_jac + + if self.posterior.woodbury_inv.ndim == 3: + var_jac = [] + for d in range(self.posterior.woodbury_inv.shape[2]): + var_jac.append(compute_cov_inner(self.posterior.woodbury_inv[:, :, d])) + var_jac = np.concatenate(var_jac) + else: + var_jac = compute_cov_inner(self.posterior.woodbury_inv) + return mean_jac, var_jac + + def predict_wishard_embedding(self, Xnew, kern=None): + """ + Predict the wishard embedding G of the GP. This is the density of the + input of the GP defined by the probabilistic function mapping f. + G = J_mean.T*J_mean + output_dim*J_cov. + + :param array-like Xnew: The points at which to evaluate the magnification. + :param :py:class:`~GPy.kern.Kern` kern: The kernel to use for the magnification. + + Supplying only a part of the learning kernel gives insights into the density + of the specific kernel part of the input function. E.g. one can see how dense the + linear part of a kernel is compared to the non-linear part etc. + """ + if kern is None: + kern = self.kern + + mu_jac, var_jac = self.predict_jacobian(Xnew, kern, full_cov=False) + mumuT = np.einsum('iqd,ipd->iqp', mu_jac, mu_jac) + if var_jac.ndim == 3: + Sigma = np.einsum('iqd,ipd->iqp', var_jac, var_jac) + G = mumuT + Sigma + else: + Sigma = np.einsum('iq,ip->iqp', var_jac, var_jac) + G = mumuT + self.output_dim*Sigma + return G + + def predict_magnification(self, Xnew, kern=None): + """ + Predict the magnification factor as + + sqrt(det(G)) + + for each point N in Xnew + """ + from ..util.linalg import jitchol + G = self.predict_wishard_embedding(Xnew, kern) + return np.array([2*np.sqrt(np.exp(np.sum(np.log(np.diag(jitchol(G[n, :, :])))))) for n in range(Xnew.shape[0])]) + def posterior_samples_f(self,X,size=10, full_cov=True): """ Samples the posterior GP at the points X. diff --git a/GPy/core/mapping.py b/GPy/core/mapping.py index 30614384..9853ea8a 100644 --- a/GPy/core/mapping.py +++ b/GPy/core/mapping.py @@ -32,7 +32,7 @@ class Bijective_mapping(Mapping): also back from f to X. The inverse mapping is called g(). """ def __init__(self, input_dim, output_dim, name='bijective_mapping'): - super(Bijective_apping, self).__init__(name=name) + super(Bijective_mapping, self).__init__(name=name) def g(self, f): """Inverse mapping from output domain of the function to the inputs.""" diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index c7c99be6..ac5fb62b 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -59,6 +59,8 @@ class SparseGP(GP): logger.info("Adding Z as parameter") self.link_parameter(self.Z, index=0) self.posterior = None + self._predictive_variable = self.Z + def has_uncertain_inputs(self): return isinstance(self.X, VariationalPosterior) @@ -114,10 +116,10 @@ class SparseGP(GP): Make a prediction for the latent function values. For certain inputs we give back a full_cov of shape NxN, - if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of, + if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of, we take only the diagonal elements across N. - - For uncertain inputs, the SparseGP bound produces a full covariance structure across D, so for full_cov we + + For uncertain inputs, the SparseGP bound produces a full covariance structure across D, so for full_cov we return a NxDxD matrix and in the not full_cov case, we return the diagonal elements across D (NxD). This is for both with and without missing data. See for missing data SparseGP implementation py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'. """ @@ -125,7 +127,7 @@ class SparseGP(GP): if kern is None: kern = self.kern if not isinstance(Xnew, VariationalPosterior): - Kx = kern.K(self.Z, Xnew) + Kx = kern.K(self._predictive_variable, Xnew) mu = np.dot(Kx.T, self.posterior.woodbury_vector) if full_cov: Kxx = kern.K(Xnew) @@ -149,28 +151,28 @@ class SparseGP(GP): if self.mean_function is not None: mu += self.mean_function.f(Xnew) else: - psi0_star = kern.psi0(self.Z, Xnew) - psi1_star = kern.psi1(self.Z, Xnew) + psi0_star = kern.psi0(self._predictive_variable, Xnew) + psi1_star = kern.psi1(self._predictive_variable, Xnew) #psi2_star = kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code. la = self.posterior.woodbury_vector mu = np.dot(psi1_star, la) # TODO: dimensions? - - if full_cov: + + if full_cov: var = np.empty((Xnew.shape[0], la.shape[1], la.shape[1])) di = np.diag_indices(la.shape[1]) - else: + else: var = np.empty((Xnew.shape[0], la.shape[1])) - + for i in range(Xnew.shape[0]): _mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]] - psi2_star = kern.psi2(self.Z, NormalPosterior(_mu, _var)) + psi2_star = kern.psi2(self._predictive_variable, NormalPosterior(_mu, _var)) tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]])) var_ = mdot(la.T, tmp, la) p0 = psi0_star[i] t = np.atleast_3d(self.posterior.woodbury_inv) t2 = np.trace(t.T.dot(psi2_star), axis1=1, axis2=2) - + if full_cov: var_[di] += p0 var_[di] += -t2 diff --git a/GPy/core/verbose_optimization.py b/GPy/core/verbose_optimization.py index 08c5e2dd..57f0c9e6 100644 --- a/GPy/core/verbose_optimization.py +++ b/GPy/core/verbose_optimization.py @@ -146,7 +146,7 @@ class VerboseOptimization(object): seconds = time.time()-self.start #sys.stdout.write(" "*len(self.message)) self.deltat += seconds - if self.deltat > .2: + if self.deltat > .3 or seconds < .3: self.print_out(seconds) self.deltat = 0 diff --git a/GPy/kern/_src/basis_funcs.py b/GPy/kern/_src/basis_funcs.py index a6c1f36c..dc21a687 100644 --- a/GPy/kern/_src/basis_funcs.py +++ b/GPy/kern/_src/basis_funcs.py @@ -11,7 +11,7 @@ class BasisFuncKernel(Kern): def __init__(self, input_dim, variance=1., active_dims=None, ARD=False, name='basis func kernel'): """ Abstract superclass for kernels with explicit basis functions for use in GPy. - + This class does NOT automatically add an offset to the design matrix phi! """ super(BasisFuncKernel, self).__init__(input_dim, active_dims, name) @@ -23,24 +23,24 @@ class BasisFuncKernel(Kern): variance = np.array(variance) self.variance = Param('variance', variance, Logexp()) self.link_parameter(self.variance) - + def parameters_changed(self): self.alpha = np.sqrt(self.variance) self.beta = 1./self.variance - + @Cache_this(limit=3, ignore_args=()) def phi(self, X): return self._phi(X) def _phi(self, X): raise NotImplementedError('Overwrite this _phi function, which maps the input X into the higher dimensional space and returns the design matrix Phi') - + def K(self, X, X2=None): return self._K(X, X2) def Kdiag(self, X, X2=None): return np.diag(self._K(X, X2)) - + def update_gradients_full(self, dL_dK, X, X2=None): if self.ARD: phi1 = self.phi(X) @@ -51,22 +51,22 @@ class BasisFuncKernel(Kern): self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi2) else: self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2)) * self.beta - + def update_gradients_diag(self, dL_dKdiag, X): if self.ARD: phi1 = self.phi(X) self.variance.gradient = np.einsum('i,iq,iq->q', dL_dKdiag, phi1, phi1) else: self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.Kdiag(X)) * self.beta - + def concatenate_offset(self, X): return np.c_[np.ones((X.shape[0], 1)), X] - + def posterior_inf(self, X=None, posterior=None): """ - Do the posterior inference on the parameters given this kernels functions - and the model posterior, which has to be a GPy posterior, usually found at m.posterior, if m is a GPy model. - If not given we search for the the highest parent to be a model, containing the posterior, and for X accordingly. + Do the posterior inference on the parameters given this kernels functions + and the model posterior, which has to be a GPy posterior, usually found at m.posterior, if m is a GPy model. + If not given we search for the the highest parent to be a model, containing the posterior, and for X accordingly. """ if X is None: try: @@ -80,7 +80,7 @@ class BasisFuncKernel(Kern): raise RuntimeError("This kernel is not part of a model and cannot be used for posterior inference") phi_alpha = self.phi(X) * self.variance return (phi_alpha).T.dot(posterior.woodbury_vector), (np.eye(phi_alpha.shape[1])*self.variance - mdot(phi_alpha.T, posterior.woodbury_inv, phi_alpha)) - + @Cache_this(limit=3, ignore_args=()) def _K(self, X, X2): if X2 is None or X is X2: @@ -95,35 +95,35 @@ class BasisFuncKernel(Kern): phi1 = phi1[:, None] phi2 = phi2[:, None] return phi1.dot(phi2.T) - - + + class LinearSlopeBasisFuncKernel(BasisFuncKernel): def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='linear_segment'): """ A linear segment transformation. The segments start at start, \ - are then linear to stop and constant again. The segments are - normalized, so that they have exactly as much mass above - as below the origin. - - Start and stop can be tuples or lists of starts and stops. + are then linear to stop and constant again. The segments are + normalized, so that they have exactly as much mass above + as below the origin. + + Start and stop can be tuples or lists of starts and stops. Behaviour of start stop is as np.where(X self.stop, self.stop, phi) return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1. - + class ChangePointBasisFuncKernel(BasisFuncKernel): def __init__(self, input_dim, changepoint, variance=1., active_dims=None, ARD=False, name='changepoint'): self.changepoint = np.array(changepoint) super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name) - + @Cache_this(limit=3, ignore_args=()) def _phi(self, X): return np.where((X < self.changepoint), -1, 1) @@ -131,7 +131,7 @@ class ChangePointBasisFuncKernel(BasisFuncKernel): class DomainKernel(LinearSlopeBasisFuncKernel): def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='constant_domain'): super(DomainKernel, self).__init__(input_dim, start, stop, variance, active_dims, ARD, name) - + @Cache_this(limit=3, ignore_args=()) def _phi(self, X): phi = np.where((X>self.start)*(Xiq', X2, self.variances, dL_dK) + def gradients_XX(self, dL_dK, X, X2=None): + if X2 is None: + return 2*np.ones(X.shape)*self.variances + else: + return np.ones(X.shape)*self.variances + def gradients_X_diag(self, dL_dKdiag, X): return 2.*self.variances*dL_dKdiag[:,None]*X diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 73f2d0a4..ba7843c6 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -31,6 +31,9 @@ class RBF(Stationary): def dK_dr(self, r): return -r*self.K_of_r(r) + def dK2_drdr(self, r): + return (r**2-1)*self.K_of_r(r) + def __getstate__(self): dc = super(RBF, self).__getstate__() if self.useGPU: diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py index 9c4f1436..ab1ec282 100644 --- a/GPy/kern/_src/stationary.py +++ b/GPy/kern/_src/stationary.py @@ -15,7 +15,7 @@ from ...util.caching import Cache_this try: from . import stationary_cython except ImportError: - print('warning in sationary: failed to import cython module: falling back to numpy') + print('warning in stationary: failed to import cython module: falling back to numpy') config.set('cython', 'working', 'false') @@ -78,6 +78,10 @@ class Stationary(Kern): raise NotImplementedError("implement derivative of the covariance function wrt r to use this class") @Cache_this(limit=20, ignore_args=()) + def dK2_drdr(self, r): + raise NotImplementedError("implement second derivative of covariance wrt r to use this method") + + @Cache_this(limit=5, ignore_args=()) def K(self, X, X2=None): """ Kernel function applied on inputs X and X2. @@ -94,6 +98,11 @@ class Stationary(Kern): #a convenience function, so we can cache dK_dr return self.dK_dr(self._scaled_dist(X, X2)) + @Cache_this(limit=3, ignore_args=()) + def dK2_drdr_via_X(self, X, X2): + #a convenience function, so we can cache dK_dr + return self.dK2_drdr(self._scaled_dist(X, X2)) + def _unscaled_dist(self, X, X2=None): """ Compute the Euclidean distance between each row of X and X2, or between @@ -201,6 +210,59 @@ class Stationary(Kern): else: return self._gradients_X_pure(dL_dK, X, X2) + def gradients_XX(self, dL_dK, X, X2=None): + """ + Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2: + + ..math: + \frac{\partial^2 K}{\partial X\partial X2} + + ..returns: + dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None) + Thus, we return the second derivative in X2. + """ + # The off diagonals in Q are always zero, this should also be true for the Linear kernel... + # According to multivariable chain rule, we can chain the second derivative through r: + # d2K_dXdX2 = dK_dr*d2r_dXdX2 + d2K_drdr * dr_dX * dr_dX2: + invdist = self._inv_dist(X, X2) + invdist2 = invdist**2 + + dL_dr = self.dK_dr_via_X(X, X2) * dL_dK + tmp1 = dL_dr * invdist + + dL_drdr = self.dK2_drdr_via_X(X, X2) * dL_dK + tmp2 = dL_drdr * invdist2 + + l2 = np.ones(X.shape[1]) * self.lengthscale**2 + + if X2 is None: + X2 = X + tmp1 -= np.eye(X.shape[0])*self.variance + else: + tmp1[X==X2.T] -= self.variance + + grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64) + #grad = np.empty(X.shape, dtype=np.float64) + for q in range(self.input_dim): + tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2 + grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q] + #grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q] + #np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q]) + #np.sum( - (tmp2*(tmpdist**2)), axis=1, out=grad[:,q]) + return grad + + def gradients_XX_diag(self, dL_dK, X): + """ + Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2: + + ..math: + \frac{\partial^2 K}{\partial X\partial X2} + + ..returns: + dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ] + """ + return np.ones(X.shape) * self.variance/self.lengthscale**2 + def _gradients_X_pure(self, dL_dK, X, X2=None): invdist = self._inv_dist(X, X2) dL_dr = self.dK_dr_via_X(X, X2) * dL_dK diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index 3157bd5b..20fe43d7 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -1,6 +1,6 @@ from .bernoulli import Bernoulli from .exponential import Exponential -from .gaussian import Gaussian +from .gaussian import Gaussian, Heteroscedastic_Gaussian from .gamma import Gamma from .poisson import Poisson from .student_t import StudentT diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 3ac703fe..ff0d6855 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -137,6 +137,20 @@ class BayesianGPLVM(SparseGP_MPI): fignum, plot_inducing, legend, plot_limits, aspect, updates, predict_kwargs, imshow_kwargs) + def plot_magnification(self, labels=None, which_indices=None, + resolution=50, ax=None, marker='o', s=40, + fignum=None, legend=True, + plot_limits=None, + aspect='auto', updates=False, **kwargs): + import sys + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ..plotting.matplot_dep import dim_reduction_plots + + return dim_reduction_plots.plot_magnification(self, labels, which_indices, + resolution, ax, marker, s, + fignum, False, legend, + plot_limits, aspect, updates, **kwargs) + def do_test_latents(self, Y): """ Compute the latent representation for a set of new points Y diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py index d6f29907..c95c9fa0 100644 --- a/GPy/models/gplvm.py +++ b/GPy/models/gplvm.py @@ -43,19 +43,19 @@ class GPLVM(GP): super(GPLVM, self).parameters_changed() self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None) - def jacobian(self,X): - J = np.zeros((X.shape[0],X.shape[1],self.output_dim)) - for i in range(self.output_dim): - J[:,:,i] = self.kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1], X, self.X) - return J + #def jacobian(self,X): + # J = np.zeros((X.shape[0],X.shape[1],self.output_dim)) + # for i in range(self.output_dim): + # J[:,:,i] = self.kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1], X, self.X) + # return J - def magnification(self,X): - target=np.zeros(X.shape[0]) - #J = np.zeros((X.shape[0],X.shape[1],self.output_dim)) - J = self.jacobian(X) - for i in range(X.shape[0]): - target[i]=np.sqrt(np.linalg.det(np.dot(J[i,:,:],np.transpose(J[i,:,:])))) - return target + #def magnification(self,X): + # target=np.zeros(X.shape[0]) + # #J = np.zeros((X.shape[0],X.shape[1],self.output_dim)) + ## J = self.jacobian(X) + # for i in range(X.shape[0]): + # target[i]=np.sqrt(np.linalg.det(np.dot(J[i,:,:],np.transpose(J[i,:,:])))) + # return target def plot(self): assert self.Y.shape[1] == 2, "too high dimensional to plot. Try plot_latent" @@ -82,5 +82,17 @@ class GPLVM(GP): fignum, False, legend, plot_limits, aspect, updates, **kwargs) - def plot_magnification(self, *args, **kwargs): - return util.plot_latent.plot_magnification(self, *args, **kwargs) + def plot_magnification(self, labels=None, which_indices=None, + resolution=50, ax=None, marker='o', s=40, + fignum=None, legend=True, + plot_limits=None, + aspect='auto', updates=False, **kwargs): + + import sys + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ..plotting.matplot_dep import dim_reduction_plots + + return dim_reduction_plots.plot_magnification(self, labels, which_indices, + resolution, ax, marker, s, + fignum, False, legend, + plot_limits, aspect, updates, **kwargs) diff --git a/GPy/plotting/matplot_dep/dim_reduction_plots.py b/GPy/plotting/matplot_dep/dim_reduction_plots.py index 2c243e13..7d13ef04 100644 --- a/GPy/plotting/matplot_dep/dim_reduction_plots.py +++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py @@ -114,7 +114,7 @@ def plot_latent(model, labels=None, which_indices=None, # create a function which computes the shading of latent space according to the output variance def plot_function(x): - Xtest_full = np.zeros((x.shape[0], model.X.shape[1])) + Xtest_full = np.zeros((x.shape[0], X.shape[1])) Xtest_full[:, [input_1, input_2]] = x _, var = model.predict(Xtest_full, **predict_kwargs) var = var[:, :1] @@ -202,6 +202,7 @@ def plot_latent(model, labels=None, which_indices=None, def plot_magnification(model, labels=None, which_indices=None, resolution=60, ax=None, marker='o', s=40, fignum=None, plot_inducing=False, legend=True, + plot_limits=None, aspect='auto', updates=False): """ :param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc) @@ -217,17 +218,88 @@ def plot_magnification(model, labels=None, which_indices=None, input_1, input_2 = most_significant_input_dimensions(model, which_indices) - # first, plot the output variance as a function of the latent space - Xtest, xx, yy, xmin, xmax = x_frame2D(model.X[:, [input_1, input_2]], resolution=resolution) - Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1])) + #fethch the data points X that we'd like to plot + X = model.X + if isinstance(X, VariationalPosterior): + X = X.mean + else: + X = X + + if X.shape[0] > 1000: + print("Warning: subsampling X, as it has more samples then 1000. X.shape={!s}".format(X.shape)) + subsample = np.random.choice(X.shape[0], size=1000, replace=False) + X = X[subsample] + labels = labels[subsample] + #======================================================================= + # <<>> + # <<>> + # plt.close('all') + # fig, ax = plt.subplots(1,1) + # from GPy.plotting.matplot_dep.dim_reduction_plots import most_significant_input_dimensions + # import matplotlib.patches as mpatches + # i1, i2 = most_significant_input_dimensions(m, None) + # xmin, xmax = 100, -100 + # ymin, ymax = 100, -100 + # legend_handles = [] + # + # X = m.X.mean[:, [i1, i2]] + # X = m.X.variance[:, [i1, i2]] + # + # xmin = X[:,0].min(); xmax = X[:,0].max() + # ymin = X[:,1].min(); ymax = X[:,1].max() + # range_ = [[xmin, xmax], [ymin, ymax]] + # ul = np.unique(labels) + # + # for i, l in enumerate(ul): + # #cdict = dict(red =[(0., colors[i][0], colors[i][0]), (1., colors[i][0], colors[i][0])], + # # green=[(0., colors[i][0], colors[i][1]), (1., colors[i][1], colors[i][1])], + # # blue =[(0., colors[i][0], colors[i][2]), (1., colors[i][2], colors[i][2])], + # # alpha=[(0., 0., .0), (.5, .5, .5), (1., .5, .5)]) + # #cmap = LinearSegmentedColormap('{}'.format(l), cdict) + # cmap = LinearSegmentedColormap.from_list('cmap_{}'.format(str(l)), [colors[i], colors[i]], 255) + # cmap._init() + # #alphas = .5*(1+scipy.special.erf(np.linspace(-2,2, cmap.N+3)))#np.log(np.linspace(np.exp(0), np.exp(1.), cmap.N+3)) + # alphas = (scipy.special.erf(np.linspace(0,2.4, cmap.N+3)))#np.log(np.linspace(np.exp(0), np.exp(1.), cmap.N+3)) + # cmap._lut[:, -1] = alphas + # print l + # x, y = X[labels==l].T + # + # heatmap, xedges, yedges = np.histogram2d(x, y, bins=300, range=range_) + # #heatmap, xedges, yedges = np.histogram2d(x, y, bins=100) + # + # im = ax.imshow(heatmap, extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], cmap=cmap, aspect='auto', interpolation='nearest', label=str(l)) + # legend_handles.append(mpatches.Patch(color=colors[i], label=l)) + # ax.set_xlim(xmin, xmax) + # ax.set_ylim(ymin, ymax) + # plt.legend(legend_handles, [l.get_label() for l in legend_handles]) + # plt.draw() + # plt.show() + #======================================================================= + + #Create an IMshow controller that can re-plot the latent space shading at a good resolution + if plot_limits is None: + xmin, ymin = X[:, [input_1, input_2]].min(0) + xmax, ymax = X[:, [input_1, input_2]].max(0) + x_r, y_r = xmax-xmin, ymax-ymin + xmin -= .1*x_r + xmax += .1*x_r + ymin -= .1*y_r + ymax += .1*y_r + else: + try: + xmin, xmax, ymin, ymax = plot_limits + except (TypeError, ValueError) as e: + raise e.__class__("Wrong plot limits: {} given -> need (xmin, xmax, ymin, ymax)".format(plot_limits)) + def plot_function(x): + Xtest_full = np.zeros((x.shape[0], X.shape[1])) Xtest_full[:, [input_1, input_2]] = x - mf=model.magnification(Xtest_full) + mf = model.predict_magnification(Xtest_full) return mf view = ImshowController(ax, plot_function, - tuple(model.X.min(0)[:, [input_1, input_2]]) + tuple(model.X.max(0)[:, [input_1, input_2]]), + (xmin, ymin, xmax, ymax), resolution, aspect=aspect, interpolation='bilinear', cmap=pb.cm.gray) diff --git a/GPy/util/diag.py b/GPy/util/diag.py index e7c332e2..9a3343f0 100644 --- a/GPy/util/diag.py +++ b/GPy/util/diag.py @@ -46,6 +46,8 @@ def offdiag_view(A, offset=0): return as_strided(Af[(1+offset):], shape=(A.shape[0]-1, A.shape[1]), strides=(A.strides[0] + A.itemsize, A.strides[1])) def _diag_ufunc(A,b,offset,func): + b = np.squeeze(b) + assert b.ndim <= 1, "only implemented for one dimensional arrays" dA = view(A, offset); func(dA,b,dA) return A From 7a5872e11d1ba4410ecf997400963039bd1e26db Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 2 Sep 2015 12:15:06 +0100 Subject: [PATCH 08/18] Model uses the new HeteroscedasticGaussian likelihood --- GPy/models/gp_heteroscedastic_regression.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/GPy/models/gp_heteroscedastic_regression.py b/GPy/models/gp_heteroscedastic_regression.py index 19cb18d8..2fe3b788 100644 --- a/GPy/models/gp_heteroscedastic_regression.py +++ b/GPy/models/gp_heteroscedastic_regression.py @@ -30,10 +30,7 @@ class GPHeteroscedasticRegression(GP): kernel = kern.RBF(X.shape[1]) #Likelihood - #likelihoods_list = [likelihoods.Gaussian(name="Gaussian_noise_%s" %j) for j in range(Ny)] - noise_terms = np.unique(Y_metadata['output_index'].flatten()) - likelihoods_list = [likelihoods.Gaussian(name="Gaussian_noise_%s" %j) for j in noise_terms] - likelihood = likelihoods.MixedNoise(likelihoods_list=likelihoods_list) + likelihood = likelihoods.HeteroscedasticGaussian(Y_metadata) super(GPHeteroscedasticRegression, self).__init__(X,Y,kernel,likelihood, Y_metadata=Y_metadata) From f780693577d0b52555a60061df5dd294b7afee9f Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 2 Sep 2015 13:29:33 +0100 Subject: [PATCH 09/18] New likelihood: HeteroscedasticGaussian --- GPy/likelihoods/__init__.py | 1 + GPy/likelihoods/gaussian.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index 3157bd5b..3c39157d 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -1,6 +1,7 @@ from .bernoulli import Bernoulli from .exponential import Exponential from .gaussian import Gaussian +from .gaussian import HeteroscedasticGaussian from .gamma import Gamma from .poisson import Poisson from .student_t import StudentT diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py index 7b001e17..ef4b26b1 100644 --- a/GPy/likelihoods/gaussian.py +++ b/GPy/likelihoods/gaussian.py @@ -326,7 +326,7 @@ class Gaussian(Likelihood): dF_dtheta = -0.5/lik_var + 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/(lik_var**2) return F, dF_dmu, dF_dv, dF_dtheta.reshape(1, Y.shape[0], Y.shape[1]) -class Heteroscedastic_Gaussian(Gaussian): +class HeteroscedasticGaussian(Gaussian): def __init__(self, Y_metadata, gp_link=None, variance=1., name='het_Gauss'): if gp_link is None: gp_link = link_functions.Identity() @@ -335,7 +335,7 @@ class Heteroscedastic_Gaussian(Gaussian): print("Warning, Exact inference is not implemeted for non-identity link functions,\ if you are not already, ensure Laplace inference_method is used") - super(Heteroscedastic_Gaussian, self).__init__(gp_link, np.ones(Y_metadata['output_index'].shape[0])*variance, name) + super(HeteroscedasticGaussian, self).__init__(gp_link, np.ones(Y_metadata['output_index'].shape[0])*variance, name) def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None): return dL_dKdiag[Y_metadata['output_index']][:,0] @@ -351,4 +351,4 @@ class Heteroscedastic_Gaussian(Gaussian): var += np.atleast_3d(np.eye(var.shape[0])*self.variance) else: var += self.variance - return mu, var \ No newline at end of file + return mu, var From 21f6b92475b05c7caee3e32389c63b655790f25d Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 2 Sep 2015 13:32:06 +0100 Subject: [PATCH 10/18] Change in _diag_ufunc with @mzwiessele --- GPy/util/diag.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GPy/util/diag.py b/GPy/util/diag.py index e7c332e2..9a3343f0 100644 --- a/GPy/util/diag.py +++ b/GPy/util/diag.py @@ -46,6 +46,8 @@ def offdiag_view(A, offset=0): return as_strided(Af[(1+offset):], shape=(A.shape[0]-1, A.shape[1]), strides=(A.strides[0] + A.itemsize, A.strides[1])) def _diag_ufunc(A,b,offset,func): + b = np.squeeze(b) + assert b.ndim <= 1, "only implemented for one dimensional arrays" dA = view(A, offset); func(dA,b,dA) return A From 4bec78704b4fd37cd51578f9c846010f90cd67c2 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 2 Sep 2015 13:32:39 +0100 Subject: [PATCH 11/18] Note added to the docs --- GPy/models/gp_heteroscedastic_regression.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GPy/models/gp_heteroscedastic_regression.py b/GPy/models/gp_heteroscedastic_regression.py index 2fe3b788..63c6352a 100644 --- a/GPy/models/gp_heteroscedastic_regression.py +++ b/GPy/models/gp_heteroscedastic_regression.py @@ -16,6 +16,8 @@ class GPHeteroscedasticRegression(GP): :param X: input observations :param Y: observed values :param kernel: a GPy kernel, defaults to rbf + + NB: This model does not make inference on the noise outside the training set """ def __init__(self, X, Y, kernel=None, Y_metadata=None): From 26bdcfa82e6540098ae8c0592313cf65ab77d048 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Wed, 2 Sep 2015 15:45:55 +0100 Subject: [PATCH 12/18] [hetnoise] import correction --- GPy/likelihoods/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index 20fe43d7..627ef39f 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -1,6 +1,6 @@ from .bernoulli import Bernoulli from .exponential import Exponential -from .gaussian import Gaussian, Heteroscedastic_Gaussian +from .gaussian import Gaussian, HeteroscedasticGaussian from .gamma import Gamma from .poisson import Poisson from .student_t import StudentT From ca60ad3195641ed5c48b00b565f1a4fb97c57431 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Wed, 2 Sep 2015 15:46:40 +0100 Subject: [PATCH 13/18] [magnification] mostly plotting and some model corrections for _predictive_variable --- GPy/core/gp.py | 21 +++++++++++++++++-- GPy/core/sparse_gp.py | 15 +++++++++++++ GPy/core/verbose_optimization.py | 10 ++++----- GPy/kern/_src/kern.py | 4 +++- GPy/models/gplvm.py | 15 ------------- GPy/models/sparse_gp_minibatch.py | 3 ++- .../matplot_dep/dim_reduction_plots.py | 19 +++++++++-------- 7 files changed, 54 insertions(+), 33 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 8267a569..1e285092 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -405,9 +405,10 @@ class GP(Model): for each point N in Xnew """ - from ..util.linalg import jitchol G = self.predict_wishard_embedding(Xnew, kern) - return np.array([2*np.sqrt(np.exp(np.sum(np.log(np.diag(jitchol(G[n, :, :])))))) for n in range(Xnew.shape[0])]) + from ..util.linalg import jitchol + return np.array([np.sqrt(np.exp(2*np.sum(np.log(np.diag(jitchol(G[n, :, :])))))) for n in range(Xnew.shape[0])]) + #return np.array([np.sqrt(np.linalg.det(G[n, :, :])) for n in range(Xnew.shape[0])]) def posterior_samples_f(self,X,size=10, full_cov=True): """ @@ -564,6 +565,22 @@ class GP(Model): plot_raw=plot_raw, Y_metadata=Y_metadata, data_symbol=data_symbol, predict_kw=predict_kw, **kw) + def plot_magnification(self, labels=None, which_indices=None, + resolution=50, ax=None, marker='o', s=40, + fignum=None, legend=True, + plot_limits=None, + aspect='auto', updates=False, **kwargs): + + import sys + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ..plotting.matplot_dep import dim_reduction_plots + + return dim_reduction_plots.plot_magnification(self, labels, which_indices, + resolution, ax, marker, s, + fignum, False, legend, + plot_limits, aspect, updates, **kwargs) + + def input_sensitivity(self, summarize=True): """ Returns the sensitivity for each dimension of this model diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index ac5fb62b..5ab302db 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -181,3 +181,18 @@ class SparseGP(GP): var[i] = np.diag(var_)+p0-t2 return mu, var + + def plot_magnification(self, labels=None, which_indices=None, + resolution=50, ax=None, marker='o', s=40, + fignum=None, legend=True, + plot_limits=None, + aspect='auto', updates=False, plot_inducing=True, **kwargs): + + import sys + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ..plotting.matplot_dep import dim_reduction_plots + + return dim_reduction_plots.plot_magnification(self, labels, which_indices, + resolution, ax, marker, s, + fignum, plot_inducing, legend, + plot_limits, aspect, updates, **kwargs) \ No newline at end of file diff --git a/GPy/core/verbose_optimization.py b/GPy/core/verbose_optimization.py index 57f0c9e6..c4539736 100644 --- a/GPy/core/verbose_optimization.py +++ b/GPy/core/verbose_optimization.py @@ -24,7 +24,6 @@ class VerboseOptimization(object): self.model.add_observer(self, self.print_status) self.status = 'running' self.clear = clear_after_finish - self.deltat = .2 self.update() @@ -80,6 +79,7 @@ class VerboseOptimization(object): def __enter__(self): self.start = time.time() + self._time = self.start return self def print_out(self, seconds): @@ -143,12 +143,12 @@ class VerboseOptimization(object): def print_status(self, me, which=None): self.update() - seconds = time.time()-self.start + t = time.time() + seconds = t-self.start #sys.stdout.write(" "*len(self.message)) - self.deltat += seconds - if self.deltat > .3 or seconds < .3: + if t-self._time > .3 or seconds < .3: self.print_out(seconds) - self.deltat = 0 + self._time = t self.iteration += 1 diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 62be679c..aae4460a 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -59,7 +59,7 @@ class Kern(Parameterized): self._sliced_X = 0 self.useGPU = self._support_GPU and useGPU self._return_psi2_n_flag = ObsAr(np.zeros(1)).astype(bool) - + from .psi_comp import PSICOMP_GH self.psicomp = PSICOMP_GH() @@ -102,6 +102,8 @@ class Kern(Parameterized): raise NotImplementedError def gradients_XX(self, dL_dK, X, X2): raise(NotImplementedError, "This is the second derivative of K wrt X and X2, and not implemented for this kernel") + def gradients_XX_diag(self, dL_dK, X, X2): + raise(NotImplementedError, "This is the diagonal of the second derivative of K wrt X and X2, and not implemented for this kernel") def gradients_X_diag(self, dL_dKdiag, X): raise NotImplementedError diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py index c95c9fa0..d4f4f564 100644 --- a/GPy/models/gplvm.py +++ b/GPy/models/gplvm.py @@ -81,18 +81,3 @@ class GPLVM(GP): resolution, ax, marker, s, fignum, False, legend, plot_limits, aspect, updates, **kwargs) - - def plot_magnification(self, labels=None, which_indices=None, - resolution=50, ax=None, marker='o', s=40, - fignum=None, legend=True, - plot_limits=None, - aspect='auto', updates=False, **kwargs): - - import sys - assert "matplotlib" in sys.modules, "matplotlib package has not been imported." - from ..plotting.matplot_dep import dim_reduction_plots - - return dim_reduction_plots.plot_magnification(self, labels, which_indices, - resolution, ax, marker, s, - fignum, False, legend, - plot_limits, aspect, updates, **kwargs) diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py index 07295255..81bd22a4 100644 --- a/GPy/models/sparse_gp_minibatch.py +++ b/GPy/models/sparse_gp_minibatch.py @@ -44,7 +44,7 @@ class SparseGPMiniBatch(SparseGP): def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp', Y_metadata=None, normalizer=False, missing_data=False, stochastic=False, batchsize=1): - + # pick a sensible inference method if inference_method is None: if isinstance(likelihood, likelihoods.Gaussian): @@ -76,6 +76,7 @@ class SparseGPMiniBatch(SparseGP): logger.info("Adding Z as parameter") self.link_parameter(self.Z, index=0) self.posterior = None + self._predictive_variable = self.Z def has_uncertain_inputs(self): return isinstance(self.X, VariationalPosterior) diff --git a/GPy/plotting/matplot_dep/dim_reduction_plots.py b/GPy/plotting/matplot_dep/dim_reduction_plots.py index 7d13ef04..6db46a81 100644 --- a/GPy/plotting/matplot_dep/dim_reduction_plots.py +++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py @@ -322,11 +322,11 @@ def plot_magnification(model, labels=None, which_indices=None, index = np.nonzero(labels == ul)[0] if model.input_dim == 1: - x = model.X[index, input_1] + x = X[index, input_1] y = np.zeros(index.size) else: - x = model.X[index, input_1] - y = model.X[index, input_2] + x = X[index, input_1] + y = X[index, input_2] ax.scatter(x, y, marker=m, s=s, color=Tango.nextMedium(), label=this_label) ax.set_xlabel('latent dimension %i' % input_1) @@ -335,13 +335,14 @@ def plot_magnification(model, labels=None, which_indices=None, if not np.all(labels == 1.) and legend: ax.legend(loc=0, numpoints=1) - ax.set_xlim(xmin[0], xmax[0]) - ax.set_ylim(xmin[1], xmax[1]) + ax.set_xlim((xmin, xmax)) + ax.set_ylim((ymin, ymax)) ax.grid(b=False) # remove the grid if present, it doesn't look good ax.set_aspect('auto') # set a nice aspect ratio if plot_inducing: - ax.plot(model.Z[:, input_1], model.Z[:, input_2], '^w') + Z = model.Z + ax.scatter(Z[:, input_1], Z[:, input_2], c='w', s=18, marker="^", edgecolor='k', linewidth=.3, alpha=.7) if updates: fig.canvas.show() @@ -386,8 +387,8 @@ def plot_steepest_gradient_map(model, fignum=None, ax=None, which_indices=None, this_label = 'class %i' % i m = marker.next() index = np.nonzero(data_labels == ul)[0] - x = model.X[index, input_1] - y = model.X[index, input_2] + x = X[index, input_1] + y = X[index, input_2] ax.scatter(x, y, marker=m, s=data_s, color=Tango.nextMedium(), label=this_label) ax.set_xlabel('latent dimension %i' % input_1) @@ -395,7 +396,7 @@ def plot_steepest_gradient_map(model, fignum=None, ax=None, which_indices=None, controller = ImAnnotateController(ax, plot_function, - tuple(model.X.min(0)[:, significant_dims]) + tuple(model.X.max(0)[:, significant_dims]), + tuple(X.min(0)[:, significant_dims]) + tuple(X.max(0)[:, significant_dims]), resolution=resolution, aspect=aspect, cmap=get_cmap('jet'), From 8e437e4bda653cc2dabd511a2be49b9e14c7848b Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 2 Sep 2015 19:40:11 +0100 Subject: [PATCH 14/18] New function to plot just the errorbars of the training data --- GPy/core/gp.py | 86 ++++++++++++- GPy/plotting/matplot_dep/base_plots.py | 21 +++ GPy/plotting/matplot_dep/models_plots.py | 157 ++++++++++++++++++++++- 3 files changed, 256 insertions(+), 8 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index a87ac66e..1f5bc136 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -407,8 +407,8 @@ class GP(Model): def plot(self, plot_limits=None, which_data_rows='all', which_data_ycols='all', fixed_inputs=[], levels=20, samples=0, fignum=None, ax=None, resolution=None, - plot_raw=False, - linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx', predict_kw=None): + plot_raw=False, linecol=None,fillcol=None, Y_metadata=None, + data_symbol='kx', predict_kw=None, plot_training_data=True): """ Plot the posterior of the GP. - In one dimension, the function is plotted with a shaded region identifying two standard deviations. @@ -445,6 +445,8 @@ class GP(Model): :type Y_metadata: dict :param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx') :type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib. + :param plot_training_data: whether or not to plot the training points + :type plot_training_data: boolean """ assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ..plotting.matplot_dep import models_plots @@ -457,7 +459,85 @@ class GP(Model): which_data_ycols, fixed_inputs, levels, samples, fignum, ax, resolution, plot_raw=plot_raw, Y_metadata=Y_metadata, - data_symbol=data_symbol, predict_kw=predict_kw, **kw) + data_symbol=data_symbol, predict_kw=predict_kw, + plot_training_data=plot_training_data, **kw) + + + def plot_data(self, which_data_rows='all', + which_data_ycols='all', visible_dims=None, + fignum=None, ax=None, data_symbol='kx'): + """ + Plot the training data + - For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed. + + Can plot only part of the data + using which_data_rows and which_data_ycols. + + :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits + :type plot_limits: np.array + :param which_data_rows: which of the training data to plot (default all) + :type which_data_rows: 'all' or a slice object to slice model.X, model.Y + :param which_data_ycols: when the data has several columns (independant outputs), only plot these + :type which_data_ycols: 'all' or a list of integers + :param visible_dims: an array specifying the input dimensions to plot (maximum two) + :type visible_dims: a numpy array + :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D + :type resolution: int + :param levels: number of levels to plot in a contour plot. + :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure + :type levels: int + :param samples: the number of a posteriori samples to plot + :type samples: int + :param fignum: figure to plot on. + :type fignum: figure number + :param ax: axes to plot on. + :type ax: axes handle + :param linecol: color of line to plot [Tango.colorsHex['darkBlue']] + :type linecol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib + :param fillcol: color of fill [Tango.colorsHex['lightBlue']] + :type fillcol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib + :param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx') + :type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib. + """ + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ..plotting.matplot_dep import models_plots + kw = {} + return models_plots.plot_data(self, which_data_rows, + which_data_ycols, visible_dims, + fignum, ax, data_symbol, **kw) + + + def plot_fit_errorbars(self, which_data_rows='all', + which_data_ycols='all', fixed_inputs=[], fignum=None, ax=None, + linecol=None, data_symbol='kx', predict_kw=None, plot_training_data=True): + + """ + Plot the posterior error bars corresponding to the training data + - For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed. + + Can plot only part of the data + using which_data_rows and which_data_ycols. + + :param which_data_rows: which of the training data to plot (default all) + :type which_data_rows: 'all' or a slice object to slice model.X, model.Y + :param which_data_ycols: when the data has several columns (independant outputs), only plot these + :type which_data_rows: 'all' or a list of integers + :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. + :type fixed_inputs: a list of tuples + :param fignum: figure to plot on. + :type fignum: figure number + :param ax: axes to plot on. + :type ax: axes handle + :param plot_training_data: whether or not to plot the training points + :type plot_training_data: boolean + """ + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ..plotting.matplot_dep import models_plots + kw = {} + return models_plots.plot_fit_errorbars(self, which_data_rows, which_data_ycols, fixed_inputs, + fignum, ax, linecol, data_symbol, + predict_kw, plot_training_data, **kw) + def input_sensitivity(self, summarize=True): """ diff --git a/GPy/plotting/matplot_dep/base_plots.py b/GPy/plotting/matplot_dep/base_plots.py index dade87cf..80f114b1 100644 --- a/GPy/plotting/matplot_dep/base_plots.py +++ b/GPy/plotting/matplot_dep/base_plots.py @@ -47,6 +47,27 @@ def gpplot(x, mu, lower, upper, edgecol='#3300FF', fillcol='#33CCFF', ax=None, f return plots +def gperrors(x, mu, lower, upper, edgecol=None, ax=None, fignum=None, **kwargs): + _, axes = ax_default(fignum, ax) + + mu = mu.flatten() + x = x.flatten() + lower = lower.flatten() + upper = upper.flatten() + + plots = [] + + if edgecol is None: + edgecol='#3300FF' + + if not 'alpha' in kwargs.keys(): + kwargs['alpha'] = 0.3 + + plots.append(axes.errorbar(x,mu,yerr=np.vstack([mu-lower,upper-mu]),color=edgecol,**kwargs)) + plots[-1][0].remove() + return plots + + def removeRightTicks(ax=None): ax = ax or pb.gca() for i, line in enumerate(ax.get_yticklines()): diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index d0f6c952..e1ba327e 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -3,19 +3,79 @@ import numpy as np from . import Tango -from base_plots import gpplot, x_frame1D, x_frame2D +from base_plots import gpplot, x_frame1D, x_frame2D,gperrors from ...models.gp_coregionalized_regression import GPCoregionalizedRegression from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression from scipy import sparse from ...core.parameterization.variational import VariationalPosterior from matplotlib import pyplot as plt + +def plot_data(model, which_data_rows='all', + which_data_ycols='all', visible_dims=None, + fignum=None, ax=None, data_symbol='kx',mew=1.5): + """ + Plot the training data + - For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed. + + Can plot only part of the data + using which_data_rows and which_data_ycols. + + :param which_data_rows: which of the training data to plot (default all) + :type which_data_rows: 'all' or a slice object to slice model.X, model.Y + :param which_data_ycols: when the data has several columns (independant outputs), only plot these + :type which_data_rows: 'all' or a list of integers + :param visible_dims: an array specifying the input dimensions to plot (maximum two) + :type visible_dims: a numpy array + :param fignum: figure to plot on. + :type fignum: figure number + :param ax: axes to plot on. + :type ax: axes handle + """ + #deal with optional arguments + if which_data_rows == 'all': + which_data_rows = slice(None) + if which_data_ycols == 'all': + which_data_ycols = np.arange(model.output_dim) + + if ax is None: + fig = plt.figure(num=fignum) + ax = fig.add_subplot(111) + + #data + X = model.X + Y = model.Y + + #work out what the inputs are for plotting (1D or 2D) + if visible_dims is None: + visible_dims = np.arange(model.input_dim) + assert visible_dims.size <= 2, "Visible inputs cannot be larger than two" + free_dims = visible_dims + plots = {} + #one dimensional plotting + if len(free_dims) == 1: + + for d in which_data_ycols: + plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=mew) + + #2D plotting + elif len(free_dims) == 2: + + for d in which_data_ycols: + plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, + Y[which_data_rows, d], cmap=plt.cm.jet, vmin=Y.min(), vmax=Y.max(), linewidth=0.) + + else: + raise NotImplementedError("Cannot define a frame with more than two input dimensions") + return plots + + def plot_fit(model, plot_limits=None, which_data_rows='all', which_data_ycols='all', fixed_inputs=[], levels=20, samples=0, fignum=None, ax=None, resolution=None, plot_raw=False, linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx', - apply_link=False, samples_f=0, plot_uncertain_inputs=True, predict_kw=None): + apply_link=False, samples_f=0, plot_uncertain_inputs=True, predict_kw=None, plot_training_data=True): """ Plot the posterior of the GP. - In one dimension, the function is plotted with a shaded region identifying two standard deviations. @@ -43,7 +103,6 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', :type fignum: figure number :param ax: axes to plot on. :type ax: axes handle - :type output: integer (first output is 0) :param linecol: color of line to plot. :type linecol: :param fillcol: color of fill @@ -52,6 +111,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', :type apply_link: boolean :param samples_f: the number of posteriori f samples to plot p(f*|y) :type samples_f: int + :param plot_training_data: whether or not to plot the training points + :type plot_training_data: boolean """ #deal with optional arguments if which_data_rows == 'all': @@ -116,7 +177,11 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', for d in which_data_ycols: plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol) - if not plot_raw: plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5) + #if not plot_raw: plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5) + if not plot_raw and plot_training_data: + plots['dataplot'] = plot_data(model=model, which_data_rows=which_data_rows, + visible_dims=free_dims, data_symbol=data_symbol, mew=1.5, ax=ax, fignum=fignum) + #optionally plot some samples if samples: #NOTE not tested with fixed_inputs @@ -196,7 +261,9 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', for d in which_data_ycols: m_d = m[:,d].reshape(resolution, resolution).T plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=plt.cm.jet) - if not plot_raw: plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=plt.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) + #if not plot_raw: plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=plt.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) + if not plot_raw and plot_training_data: + plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=plt.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) #set the limits of the plot to some sensible values ax.set_xlim(xmin[0], xmax[0]) @@ -261,3 +328,83 @@ def fixed_inputs(model, non_fixed_inputs, fix_routine='median', as_list=True, X_ return f_inputs else: return X + + +def plot_fit_errorbars(model, which_data_rows='all', + which_data_ycols='all', fixed_inputs=[], + fignum=None, ax=None, + linecol='red', data_symbol='kx', + predict_kw=None, plot_training_data=True): + + """ + Plot the posterior error bars corresponding to the training data + - For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed. + + Can plot only part of the data + using which_data_rows and which_data_ycols. + + :param which_data_rows: which of the training data to plot (default all) + :type which_data_rows: 'all' or a slice object to slice model.X, model.Y + :param which_data_ycols: when the data has several columns (independant outputs), only plot these + :type which_data_rows: 'all' or a list of integers + :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. + :type fixed_inputs: a list of tuples + :param fignum: figure to plot on. + :type fignum: figure number + :param ax: axes to plot on. + :type ax: axes handle + :param plot_training_data: whether or not to plot the training points + :type plot_training_data: boolean + """ + + #deal with optional arguments + if which_data_rows == 'all': + which_data_rows = slice(None) + if which_data_ycols == 'all': + which_data_ycols = np.arange(model.output_dim) + + if ax is None: + fig = plt.figure(num=fignum) + ax = fig.add_subplot(111) + + X = model.X + Y = model.Y + + if predict_kw is None: + predict_kw = {} + + + #work out what the inputs are for plotting (1D or 2D) + fixed_dims = np.array([i for i,v in fixed_inputs]) + free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims) + plots = {} + + #one dimensional plotting + if len(free_dims) == 1: + + m, v = model.predict(X, full_cov=False, Y_metadata=model.Y_metadata, **predict_kw) + fmu, fv = model._raw_predict(X, full_cov=False, **predict_kw) + lower, upper = model.likelihood.predictive_quantiles(fmu, fv, (2.5, 97.5), Y_metadata=model.Y_metadata) + + + for d in which_data_ycols: + plots['gperrors'] = gperrors(X, m[:, d], lower[:, d], upper[:, d], edgecol=linecol, ax=ax, fignum=fignum ) + if plot_training_data: + plots['dataplot'] = plot_data(model=model, which_data_rows=which_data_rows, + visible_dims=free_dims, data_symbol=data_symbol, mew=1.5, ax=ax, fignum=fignum) + + + #set the limits of the plot to some sensible values + ymin, ymax = min(np.append(Y[which_data_rows, which_data_ycols].flatten(), lower)), max(np.append(Y[which_data_rows, which_data_ycols].flatten(), upper)) + ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) + ax.set_xlim(X[:,free_dims].min(), X[:,free_dims].max()) + ax.set_ylim(ymin, ymax) + + + elif len(free_dims) == 2: + raise NotImplementedError("Not implemented yet") + + + else: + raise NotImplementedError("Cannot define a frame with more than two input dimensions") + return plots From 839e3dc6f03444f916cd4ffd75b1ce3f272b4fc5 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 3 Sep 2015 09:33:07 +0100 Subject: [PATCH 15/18] [magnification] plot_magnification expanded --- GPy/core/gp.py | 19 ++++---- GPy/core/sparse_gp.py | 15 ------ GPy/kern/_src/add.py | 16 +++++-- GPy/kern/_src/kern.py | 2 +- GPy/kern/_src/static.py | 5 ++ GPy/models/bayesian_gplvm.py | 14 ------ .../matplot_dep/dim_reduction_plots.py | 46 ++++++++++++------- .../controllers/axis_event_controller.py | 7 ++- 8 files changed, 64 insertions(+), 60 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 1e285092..e932fabd 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -371,7 +371,7 @@ class GP(Model): var_jac = compute_cov_inner(self.posterior.woodbury_inv) return mean_jac, var_jac - def predict_wishard_embedding(self, Xnew, kern=None): + def predict_wishard_embedding(self, Xnew, kern=None, mean=True, covariance=True): """ Predict the wishard embedding G of the GP. This is the density of the input of the GP defined by the probabilistic function mapping f. @@ -391,13 +391,16 @@ class GP(Model): mumuT = np.einsum('iqd,ipd->iqp', mu_jac, mu_jac) if var_jac.ndim == 3: Sigma = np.einsum('iqd,ipd->iqp', var_jac, var_jac) - G = mumuT + Sigma else: - Sigma = np.einsum('iq,ip->iqp', var_jac, var_jac) - G = mumuT + self.output_dim*Sigma + Sigma = self.output_dim*np.einsum('iq,ip->iqp', var_jac, var_jac) + G = 0. + if mean: + G += mumuT + if covariance: + G += Sigma return G - def predict_magnification(self, Xnew, kern=None): + def predict_magnification(self, Xnew, kern=None, mean=True, covariance=True): """ Predict the magnification factor as @@ -405,7 +408,7 @@ class GP(Model): for each point N in Xnew """ - G = self.predict_wishard_embedding(Xnew, kern) + G = self.predict_wishard_embedding(Xnew, kern, mean, covariance) from ..util.linalg import jitchol return np.array([np.sqrt(np.exp(2*np.sum(np.log(np.diag(jitchol(G[n, :, :])))))) for n in range(Xnew.shape[0])]) #return np.array([np.sqrt(np.linalg.det(G[n, :, :])) for n in range(Xnew.shape[0])]) @@ -569,7 +572,7 @@ class GP(Model): resolution=50, ax=None, marker='o', s=40, fignum=None, legend=True, plot_limits=None, - aspect='auto', updates=False, **kwargs): + aspect='auto', updates=False, plot_inducing=True, kern=None, **kwargs): import sys assert "matplotlib" in sys.modules, "matplotlib package has not been imported." @@ -577,7 +580,7 @@ class GP(Model): return dim_reduction_plots.plot_magnification(self, labels, which_indices, resolution, ax, marker, s, - fignum, False, legend, + fignum, plot_inducing, legend, plot_limits, aspect, updates, **kwargs) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 5ab302db..ac5fb62b 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -181,18 +181,3 @@ class SparseGP(GP): var[i] = np.diag(var_)+p0-t2 return mu, var - - def plot_magnification(self, labels=None, which_indices=None, - resolution=50, ax=None, marker='o', s=40, - fignum=None, legend=True, - plot_limits=None, - aspect='auto', updates=False, plot_inducing=True, **kwargs): - - import sys - assert "matplotlib" in sys.modules, "matplotlib package has not been imported." - from ..plotting.matplot_dep import dim_reduction_plots - - return dim_reduction_plots.plot_magnification(self, labels, which_indices, - resolution, ax, marker, s, - fignum, plot_inducing, legend, - plot_limits, aspect, updates, **kwargs) \ No newline at end of file diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 696a8b04..132c53d2 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -14,7 +14,7 @@ class Add(CombinationKernel): This kernel will take over the active dims of it's subkernels passed in. """ - def __init__(self, subkerns, name='add'): + def __init__(self, subkerns, name='sum'): for i, kern in enumerate(subkerns[:]): if isinstance(kern, Add): del subkerns[i] @@ -71,11 +71,21 @@ class Add(CombinationKernel): target = np.zeros(X.shape) [target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts] return target - + + def gradients_XX(self, dL_dK, X, X2): + target = 0. + [target.__iadd__(p.gradients_XX(dL_dK, X, X2)) for p in self.parts] + return target + + def gradients_XX_diag(self, dL_dKdiag, X): + target = np.zeros(X.shape) + [target.__iadd__(p.gradients_XX_diag(dL_dKdiag, X)) for p in self.parts] + return target + @Cache_this(limit=2, force_kwargs=['which_parts']) def psi0(self, Z, variational_posterior): return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts)) - + @Cache_this(limit=2, force_kwargs=['which_parts']) def psi1(self, Z, variational_posterior): return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts)) diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index aae4460a..52745ddf 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -102,7 +102,7 @@ class Kern(Parameterized): raise NotImplementedError def gradients_XX(self, dL_dK, X, X2): raise(NotImplementedError, "This is the second derivative of K wrt X and X2, and not implemented for this kernel") - def gradients_XX_diag(self, dL_dK, X, X2): + def gradients_XX_diag(self, dL_dKdiag, X): raise(NotImplementedError, "This is the diagonal of the second derivative of K wrt X and X2, and not implemented for this kernel") def gradients_X_diag(self, dL_dKdiag, X): raise NotImplementedError diff --git a/GPy/kern/_src/static.py b/GPy/kern/_src/static.py index 64d14018..b32077e9 100644 --- a/GPy/kern/_src/static.py +++ b/GPy/kern/_src/static.py @@ -24,6 +24,11 @@ class Static(Kern): def gradients_X_diag(self, dL_dKdiag, X): return np.zeros(X.shape) + def gradients_XX(self, dL_dK, X, X2): + return np.zeros((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64) + def gradients_XX_diag(self, dL_dKdiag, X): + return np.zeros(X.shape) + def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): return np.zeros(Z.shape) diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index ff0d6855..3ac703fe 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -137,20 +137,6 @@ class BayesianGPLVM(SparseGP_MPI): fignum, plot_inducing, legend, plot_limits, aspect, updates, predict_kwargs, imshow_kwargs) - def plot_magnification(self, labels=None, which_indices=None, - resolution=50, ax=None, marker='o', s=40, - fignum=None, legend=True, - plot_limits=None, - aspect='auto', updates=False, **kwargs): - import sys - assert "matplotlib" in sys.modules, "matplotlib package has not been imported." - from ..plotting.matplot_dep import dim_reduction_plots - - return dim_reduction_plots.plot_magnification(self, labels, which_indices, - resolution, ax, marker, s, - fignum, False, legend, - plot_limits, aspect, updates, **kwargs) - def do_test_latents(self, Y): """ Compute the latent representation for a set of new points Y diff --git a/GPy/plotting/matplot_dep/dim_reduction_plots.py b/GPy/plotting/matplot_dep/dim_reduction_plots.py index 6db46a81..a36f168d 100644 --- a/GPy/plotting/matplot_dep/dim_reduction_plots.py +++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py @@ -9,7 +9,8 @@ import itertools try: import Tango from matplotlib.cm import get_cmap - import pylab as pb + from matplotlib import pyplot as pb + from matplotlib import cm except: pass @@ -137,7 +138,7 @@ def plot_latent(model, labels=None, which_indices=None, view = ImshowController(ax, plot_function, (xmin, ymin, xmax, ymax), resolution, aspect=aspect, interpolation='bilinear', - cmap=pb.cm.binary, **imshow_kwargs) + cmap=cm.binary, **imshow_kwargs) # make sure labels are in order of input: labels = np.asarray(labels) @@ -192,18 +193,18 @@ def plot_latent(model, labels=None, which_indices=None, if updates: try: - ax.figure.canvas.show() + fig.canvas.show() except Exception as e: print("Could not invoke show: {}".format(e)) - raw_input('Enter to continue') - view.deactivate() + #raw_input('Enter to continue') + return view return ax def plot_magnification(model, labels=None, which_indices=None, resolution=60, ax=None, marker='o', s=40, fignum=None, plot_inducing=False, legend=True, plot_limits=None, - aspect='auto', updates=False): + aspect='auto', updates=False, mean=True, covariance=True, kern=None): """ :param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc) :param resolution: the resolution of the grid on which to evaluate the predictive variance @@ -211,6 +212,8 @@ def plot_magnification(model, labels=None, which_indices=None, if ax is None: fig = pb.figure(num=fignum) ax = fig.add_subplot(111) + else: + fig = ax.figure Tango.reset() if labels is None: @@ -295,13 +298,13 @@ def plot_magnification(model, labels=None, which_indices=None, def plot_function(x): Xtest_full = np.zeros((x.shape[0], X.shape[1])) Xtest_full[:, [input_1, input_2]] = x - mf = model.predict_magnification(Xtest_full) + mf = model.predict_magnification(Xtest_full, kern=kern, mean=mean, covariance=covariance) return mf view = ImshowController(ax, plot_function, (xmin, ymin, xmax, ymax), resolution, aspect=aspect, interpolation='bilinear', - cmap=pb.cm.gray) + cmap=cm.gray) # make sure labels are in order of input: ulabels = [] @@ -317,7 +320,7 @@ def plot_magnification(model, labels=None, which_indices=None, elif type(ul) is np.int64: this_label = 'class %i' % ul else: - this_label = 'class %i' % i + this_label = unicode(ul) m = marker.next() index = np.nonzero(labels == ul)[0] @@ -327,7 +330,7 @@ def plot_magnification(model, labels=None, which_indices=None, else: x = X[index, input_1] y = X[index, input_2] - ax.scatter(x, y, marker=m, s=s, color=Tango.nextMedium(), label=this_label) + ax.scatter(x, y, marker=m, s=s, c=Tango.nextMedium(), label=this_label, linewidth=.2, edgecolor='k', alpha=.9) ax.set_xlabel('latent dimension %i' % input_1) ax.set_ylabel('latent dimension %i' % input_2) @@ -337,18 +340,27 @@ def plot_magnification(model, labels=None, which_indices=None, ax.set_xlim((xmin, xmax)) ax.set_ylim((ymin, ymax)) - ax.grid(b=False) # remove the grid if present, it doesn't look good - ax.set_aspect('auto') # set a nice aspect ratio - if plot_inducing: + if plot_inducing and hasattr(model, 'Z'): Z = model.Z ax.scatter(Z[:, input_1], Z[:, input_2], c='w', s=18, marker="^", edgecolor='k', linewidth=.3, alpha=.7) - if updates: - fig.canvas.show() - raw_input('Enter to continue') + try: + fig.canvas.draw() + fig.tight_layout() + fig.canvas.draw() + except Exception as e: + print("Could not invoke tight layout: {}".format(e)) + pass - pb.title('Magnification Factor') + if updates: + try: + fig.canvas.draw() + fig.canvas.show() + except Exception as e: + print("Could not invoke show: {}".format(e)) + #raw_input('Enter to continue') + return view return ax diff --git a/GPy/plotting/matplot_dep/latent_space_visualizations/controllers/axis_event_controller.py b/GPy/plotting/matplot_dep/latent_space_visualizations/controllers/axis_event_controller.py index 62b622c5..15fcd098 100644 --- a/GPy/plotting/matplot_dep/latent_space_visualizations/controllers/axis_event_controller.py +++ b/GPy/plotting/matplot_dep/latent_space_visualizations/controllers/axis_event_controller.py @@ -9,6 +9,9 @@ class AxisEventController(object): def __init__(self, ax): self.ax = ax self.activate() + def __del__(self): + self.deactivate() + return self def deactivate(self): for cb_class in self.ax.callbacks.callbacks.values(): for cb_num in cb_class.keys(): @@ -81,9 +84,9 @@ class BufferedAxisChangedController(AxisChangedController): def __init__(self, ax, plot_function, plot_limits, resolution=50, update_lim=None, **kwargs): """ Buffered axis changed controller. Controls the buffer and handles update events for when the axes changed. - + Updated plotting will be after first reload (first time will be within plot limits, after that the limits will be buffered) - + :param plot_function: function to use for creating image for plotting (return ndarray-like) plot_function gets called with (2D!) Xtest grid if replotting required From f6d07ff76acaab59950ff75630a3a5183bca38fe Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 3 Sep 2015 10:19:57 +0100 Subject: [PATCH 16/18] [magnification] added static kernel support and faster derivative computations --- GPy/core/gp.py | 9 ++++++--- GPy/kern/_src/add.py | 5 ++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 60038263..c278d509 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -352,13 +352,16 @@ class GP(Model): for i in range(self._predictive_variable.shape[0]): dK_dXnew_full[i] = kern.gradients_X([[1.]], Xnew, self._predictive_variable[[i]]) + if full_cov: + dK2_dXdX = kern.gradients_XX([[1.]], Xnew) + else: + dK2_dXdX = kern.gradients_XX_diag([[1.]], Xnew) + def compute_cov_inner(wi): if full_cov: # full covariance gradients: - dK2_dXdX = kern.gradients_XX([[1.]], Xnew) var_jac = dK2_dXdX - np.einsum('qnm,miq->niq', dK_dXnew_full.T.dot(wi), dK_dXnew_full) else: - dK2_dXdX = kern.gradients_XX_diag([[1.]], Xnew) var_jac = dK2_dXdX - np.einsum('qim,miq->iq', dK_dXnew_full.T.dot(wi), dK_dXnew_full) return var_jac @@ -568,7 +571,7 @@ class GP(Model): which_data_ycols, fixed_inputs, levels, samples, fignum, ax, resolution, plot_raw=plot_raw, Y_metadata=Y_metadata, - data_symbol=data_symbol, predict_kw=predict_kw, + data_symbol=data_symbol, predict_kw=predict_kw, plot_training_data=plot_training_data, **kw) diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 132c53d2..3c97a08a 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -73,7 +73,10 @@ class Add(CombinationKernel): return target def gradients_XX(self, dL_dK, X, X2): - target = 0. + if X2 is None: + target = np.zeros((X.shape[0], X.shape[0], X.shape[1])) + else: + target = np.zeros((X.shape[0], X2.shape[0], X.shape[1])) [target.__iadd__(p.gradients_XX(dL_dK, X, X2)) for p in self.parts] return target From 7376627895996d33ba32e2ea3a38151f2227f06f Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 3 Sep 2015 10:23:09 +0100 Subject: [PATCH 17/18] [magnification] static corrections --- GPy/kern/_src/static.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GPy/kern/_src/static.py b/GPy/kern/_src/static.py index b32077e9..e7bee6c2 100644 --- a/GPy/kern/_src/static.py +++ b/GPy/kern/_src/static.py @@ -25,6 +25,8 @@ class Static(Kern): return np.zeros(X.shape) def gradients_XX(self, dL_dK, X, X2): + if X2 is None: + X2 = X return np.zeros((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64) def gradients_XX_diag(self, dL_dKdiag, X): return np.zeros(X.shape) From 3dbc32f4b19c6487018ea567d74bf0922fb2e32d Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 3 Sep 2015 12:59:51 +0100 Subject: [PATCH 18/18] [add] renamed>sum --- GPy/kern/_src/kern.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 52745ddf..d5482b80 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -180,7 +180,7 @@ class Kern(Parameterized): def __iadd__(self, other): return self.add(other) - def add(self, other, name='add'): + def add(self, other, name='sum'): """ Add another kernel to this one.