diff --git a/GPy/core/gp.py b/GPy/core/gp.py index ddef0647..aba065f9 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -106,6 +106,13 @@ class GP(Model): self.link_parameter(self.likelihood) self.posterior = None + # The predictive variable to be used to predict using the posterior object's + # woodbury_vector and woodbury_inv is defined as predictive_variable + # This is usually just a link to self.X (full GP) or self.Z (sparse GP). + # Make sure to name this variable and the predict functions will "just work" + # as long as the posterior has the right woodbury entries. + self._predictive_variable = self.X + def set_XY(self, X=None, Y=None, trigger_update=True): """ @@ -207,6 +214,7 @@ class GP(Model): var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx)) elif self.posterior.woodbury_inv.ndim == 3: var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2])) + from ..util.linalg import mdot for i in range(var.shape[2]): var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx)) var = var @@ -302,6 +310,103 @@ class GP(Model): return dmu_dX, dv_dX + def predict_jacobian(self, Xnew, kern=None, full_cov=True): + """ + Compute the derivatives of the posterior of the GP. + + Given a set of points at which to predict X* (size [N*,Q]), compute the + mean and variance of the derivative. Resulting arrays are sized: + + dL_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one). + Note that this is the mean and variance of the derivative, + not the derivative of the mean and variance! (See predictive_gradients for that) + + dv_dX* -- [N*, Q], (since all outputs have the same variance) + If there is missing data, it is not implemented for now, but + there will be one output variance per output dimension. + + :param X: The points at which to get the predictive gradients. + :type X: np.ndarray (Xnew x self.input_dim) + :param kern: The kernel to compute the jacobian for. + :param boolean full_cov: whether to return the full covariance of the jacobian. + + :returns: dmu_dX, dv_dX + :rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q,(D)) ] + + Note: We always return sum in input_dim gradients, as the off-diagonals + in the input_dim are not needed for further calculations. + This is a compromise for increase in speed. Mathematically the jacobian would + have another dimension in Q. + """ + if kern is None: + kern = self.kern + + mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim)) + + for i in range(self.output_dim): + mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable) + + dK_dXnew_full = np.empty((self._predictive_variable.shape[0], Xnew.shape[0], Xnew.shape[1])) + for i in range(self._predictive_variable.shape[0]): + dK_dXnew_full[i] = kern.gradients_X([[1.]], Xnew, self._predictive_variable[[i]]) + + def compute_cov_inner(wi): + if full_cov: + # full covariance gradients: + dK2_dXdX = kern.gradients_XX([[1.]], Xnew) + var_jac = dK2_dXdX - np.einsum('qnm,miq->niq', dK_dXnew_full.T.dot(wi), dK_dXnew_full) + else: + dK2_dXdX = kern.gradients_XX_diag([[1.]], Xnew) + var_jac = dK2_dXdX - np.einsum('qim,miq->iq', dK_dXnew_full.T.dot(wi), dK_dXnew_full) + return var_jac + + if self.posterior.woodbury_inv.ndim == 3: + var_jac = [] + for d in range(self.posterior.woodbury_inv.shape[2]): + var_jac.append(compute_cov_inner(self.posterior.woodbury_inv[:, :, d])) + var_jac = np.concatenate(var_jac) + else: + var_jac = compute_cov_inner(self.posterior.woodbury_inv) + return mean_jac, var_jac + + def predict_wishard_embedding(self, Xnew, kern=None): + """ + Predict the wishard embedding G of the GP. This is the density of the + input of the GP defined by the probabilistic function mapping f. + G = J_mean.T*J_mean + output_dim*J_cov. + + :param array-like Xnew: The points at which to evaluate the magnification. + :param :py:class:`~GPy.kern.Kern` kern: The kernel to use for the magnification. + + Supplying only a part of the learning kernel gives insights into the density + of the specific kernel part of the input function. E.g. one can see how dense the + linear part of a kernel is compared to the non-linear part etc. + """ + if kern is None: + kern = self.kern + + mu_jac, var_jac = self.predict_jacobian(Xnew, kern, full_cov=False) + mumuT = np.einsum('iqd,ipd->iqp', mu_jac, mu_jac) + if var_jac.ndim == 3: + Sigma = np.einsum('iqd,ipd->iqp', var_jac, var_jac) + G = mumuT + Sigma + else: + Sigma = np.einsum('iq,ip->iqp', var_jac, var_jac) + G = mumuT + self.output_dim*Sigma + return G + + def predict_magnification(self, Xnew, kern=None): + """ + Predict the magnification factor as + + sqrt(det(G)) + + for each point N in Xnew + """ + from ..util.linalg import jitchol + G = self.predict_wishard_embedding(Xnew, kern) + return np.array([2*np.sqrt(np.exp(np.sum(np.log(np.diag(jitchol(G[n, :, :])))))) for n in range(Xnew.shape[0])]) + def posterior_samples_f(self,X,size=10, full_cov=True): """ Samples the posterior GP at the points X. diff --git a/GPy/core/mapping.py b/GPy/core/mapping.py index 30614384..9853ea8a 100644 --- a/GPy/core/mapping.py +++ b/GPy/core/mapping.py @@ -32,7 +32,7 @@ class Bijective_mapping(Mapping): also back from f to X. The inverse mapping is called g(). """ def __init__(self, input_dim, output_dim, name='bijective_mapping'): - super(Bijective_apping, self).__init__(name=name) + super(Bijective_mapping, self).__init__(name=name) def g(self, f): """Inverse mapping from output domain of the function to the inputs.""" diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index c7c99be6..ac5fb62b 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -59,6 +59,8 @@ class SparseGP(GP): logger.info("Adding Z as parameter") self.link_parameter(self.Z, index=0) self.posterior = None + self._predictive_variable = self.Z + def has_uncertain_inputs(self): return isinstance(self.X, VariationalPosterior) @@ -114,10 +116,10 @@ class SparseGP(GP): Make a prediction for the latent function values. For certain inputs we give back a full_cov of shape NxN, - if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of, + if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of, we take only the diagonal elements across N. - - For uncertain inputs, the SparseGP bound produces a full covariance structure across D, so for full_cov we + + For uncertain inputs, the SparseGP bound produces a full covariance structure across D, so for full_cov we return a NxDxD matrix and in the not full_cov case, we return the diagonal elements across D (NxD). This is for both with and without missing data. See for missing data SparseGP implementation py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'. """ @@ -125,7 +127,7 @@ class SparseGP(GP): if kern is None: kern = self.kern if not isinstance(Xnew, VariationalPosterior): - Kx = kern.K(self.Z, Xnew) + Kx = kern.K(self._predictive_variable, Xnew) mu = np.dot(Kx.T, self.posterior.woodbury_vector) if full_cov: Kxx = kern.K(Xnew) @@ -149,28 +151,28 @@ class SparseGP(GP): if self.mean_function is not None: mu += self.mean_function.f(Xnew) else: - psi0_star = kern.psi0(self.Z, Xnew) - psi1_star = kern.psi1(self.Z, Xnew) + psi0_star = kern.psi0(self._predictive_variable, Xnew) + psi1_star = kern.psi1(self._predictive_variable, Xnew) #psi2_star = kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code. la = self.posterior.woodbury_vector mu = np.dot(psi1_star, la) # TODO: dimensions? - - if full_cov: + + if full_cov: var = np.empty((Xnew.shape[0], la.shape[1], la.shape[1])) di = np.diag_indices(la.shape[1]) - else: + else: var = np.empty((Xnew.shape[0], la.shape[1])) - + for i in range(Xnew.shape[0]): _mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]] - psi2_star = kern.psi2(self.Z, NormalPosterior(_mu, _var)) + psi2_star = kern.psi2(self._predictive_variable, NormalPosterior(_mu, _var)) tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]])) var_ = mdot(la.T, tmp, la) p0 = psi0_star[i] t = np.atleast_3d(self.posterior.woodbury_inv) t2 = np.trace(t.T.dot(psi2_star), axis1=1, axis2=2) - + if full_cov: var_[di] += p0 var_[di] += -t2 diff --git a/GPy/core/verbose_optimization.py b/GPy/core/verbose_optimization.py index 08c5e2dd..57f0c9e6 100644 --- a/GPy/core/verbose_optimization.py +++ b/GPy/core/verbose_optimization.py @@ -146,7 +146,7 @@ class VerboseOptimization(object): seconds = time.time()-self.start #sys.stdout.write(" "*len(self.message)) self.deltat += seconds - if self.deltat > .2: + if self.deltat > .3 or seconds < .3: self.print_out(seconds) self.deltat = 0 diff --git a/GPy/kern/_src/basis_funcs.py b/GPy/kern/_src/basis_funcs.py index a6c1f36c..dc21a687 100644 --- a/GPy/kern/_src/basis_funcs.py +++ b/GPy/kern/_src/basis_funcs.py @@ -11,7 +11,7 @@ class BasisFuncKernel(Kern): def __init__(self, input_dim, variance=1., active_dims=None, ARD=False, name='basis func kernel'): """ Abstract superclass for kernels with explicit basis functions for use in GPy. - + This class does NOT automatically add an offset to the design matrix phi! """ super(BasisFuncKernel, self).__init__(input_dim, active_dims, name) @@ -23,24 +23,24 @@ class BasisFuncKernel(Kern): variance = np.array(variance) self.variance = Param('variance', variance, Logexp()) self.link_parameter(self.variance) - + def parameters_changed(self): self.alpha = np.sqrt(self.variance) self.beta = 1./self.variance - + @Cache_this(limit=3, ignore_args=()) def phi(self, X): return self._phi(X) def _phi(self, X): raise NotImplementedError('Overwrite this _phi function, which maps the input X into the higher dimensional space and returns the design matrix Phi') - + def K(self, X, X2=None): return self._K(X, X2) def Kdiag(self, X, X2=None): return np.diag(self._K(X, X2)) - + def update_gradients_full(self, dL_dK, X, X2=None): if self.ARD: phi1 = self.phi(X) @@ -51,22 +51,22 @@ class BasisFuncKernel(Kern): self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi2) else: self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2)) * self.beta - + def update_gradients_diag(self, dL_dKdiag, X): if self.ARD: phi1 = self.phi(X) self.variance.gradient = np.einsum('i,iq,iq->q', dL_dKdiag, phi1, phi1) else: self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.Kdiag(X)) * self.beta - + def concatenate_offset(self, X): return np.c_[np.ones((X.shape[0], 1)), X] - + def posterior_inf(self, X=None, posterior=None): """ - Do the posterior inference on the parameters given this kernels functions - and the model posterior, which has to be a GPy posterior, usually found at m.posterior, if m is a GPy model. - If not given we search for the the highest parent to be a model, containing the posterior, and for X accordingly. + Do the posterior inference on the parameters given this kernels functions + and the model posterior, which has to be a GPy posterior, usually found at m.posterior, if m is a GPy model. + If not given we search for the the highest parent to be a model, containing the posterior, and for X accordingly. """ if X is None: try: @@ -80,7 +80,7 @@ class BasisFuncKernel(Kern): raise RuntimeError("This kernel is not part of a model and cannot be used for posterior inference") phi_alpha = self.phi(X) * self.variance return (phi_alpha).T.dot(posterior.woodbury_vector), (np.eye(phi_alpha.shape[1])*self.variance - mdot(phi_alpha.T, posterior.woodbury_inv, phi_alpha)) - + @Cache_this(limit=3, ignore_args=()) def _K(self, X, X2): if X2 is None or X is X2: @@ -95,35 +95,35 @@ class BasisFuncKernel(Kern): phi1 = phi1[:, None] phi2 = phi2[:, None] return phi1.dot(phi2.T) - - + + class LinearSlopeBasisFuncKernel(BasisFuncKernel): def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='linear_segment'): """ A linear segment transformation. The segments start at start, \ - are then linear to stop and constant again. The segments are - normalized, so that they have exactly as much mass above - as below the origin. - - Start and stop can be tuples or lists of starts and stops. + are then linear to stop and constant again. The segments are + normalized, so that they have exactly as much mass above + as below the origin. + + Start and stop can be tuples or lists of starts and stops. Behaviour of start stop is as np.where(X self.stop, self.stop, phi) return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1. - + class ChangePointBasisFuncKernel(BasisFuncKernel): def __init__(self, input_dim, changepoint, variance=1., active_dims=None, ARD=False, name='changepoint'): self.changepoint = np.array(changepoint) super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name) - + @Cache_this(limit=3, ignore_args=()) def _phi(self, X): return np.where((X < self.changepoint), -1, 1) @@ -131,7 +131,7 @@ class ChangePointBasisFuncKernel(BasisFuncKernel): class DomainKernel(LinearSlopeBasisFuncKernel): def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='constant_domain'): super(DomainKernel, self).__init__(input_dim, start, stop, variance, active_dims, ARD, name) - + @Cache_this(limit=3, ignore_args=()) def _phi(self, X): phi = np.where((X>self.start)*(Xiq', X2, self.variances, dL_dK) + def gradients_XX(self, dL_dK, X, X2=None): + if X2 is None: + return 2*np.ones(X.shape)*self.variances + else: + return np.ones(X.shape)*self.variances + def gradients_X_diag(self, dL_dKdiag, X): return 2.*self.variances*dL_dKdiag[:,None]*X diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 73f2d0a4..ba7843c6 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -31,6 +31,9 @@ class RBF(Stationary): def dK_dr(self, r): return -r*self.K_of_r(r) + def dK2_drdr(self, r): + return (r**2-1)*self.K_of_r(r) + def __getstate__(self): dc = super(RBF, self).__getstate__() if self.useGPU: diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py index 9c4f1436..ab1ec282 100644 --- a/GPy/kern/_src/stationary.py +++ b/GPy/kern/_src/stationary.py @@ -15,7 +15,7 @@ from ...util.caching import Cache_this try: from . import stationary_cython except ImportError: - print('warning in sationary: failed to import cython module: falling back to numpy') + print('warning in stationary: failed to import cython module: falling back to numpy') config.set('cython', 'working', 'false') @@ -78,6 +78,10 @@ class Stationary(Kern): raise NotImplementedError("implement derivative of the covariance function wrt r to use this class") @Cache_this(limit=20, ignore_args=()) + def dK2_drdr(self, r): + raise NotImplementedError("implement second derivative of covariance wrt r to use this method") + + @Cache_this(limit=5, ignore_args=()) def K(self, X, X2=None): """ Kernel function applied on inputs X and X2. @@ -94,6 +98,11 @@ class Stationary(Kern): #a convenience function, so we can cache dK_dr return self.dK_dr(self._scaled_dist(X, X2)) + @Cache_this(limit=3, ignore_args=()) + def dK2_drdr_via_X(self, X, X2): + #a convenience function, so we can cache dK_dr + return self.dK2_drdr(self._scaled_dist(X, X2)) + def _unscaled_dist(self, X, X2=None): """ Compute the Euclidean distance between each row of X and X2, or between @@ -201,6 +210,59 @@ class Stationary(Kern): else: return self._gradients_X_pure(dL_dK, X, X2) + def gradients_XX(self, dL_dK, X, X2=None): + """ + Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2: + + ..math: + \frac{\partial^2 K}{\partial X\partial X2} + + ..returns: + dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None) + Thus, we return the second derivative in X2. + """ + # The off diagonals in Q are always zero, this should also be true for the Linear kernel... + # According to multivariable chain rule, we can chain the second derivative through r: + # d2K_dXdX2 = dK_dr*d2r_dXdX2 + d2K_drdr * dr_dX * dr_dX2: + invdist = self._inv_dist(X, X2) + invdist2 = invdist**2 + + dL_dr = self.dK_dr_via_X(X, X2) * dL_dK + tmp1 = dL_dr * invdist + + dL_drdr = self.dK2_drdr_via_X(X, X2) * dL_dK + tmp2 = dL_drdr * invdist2 + + l2 = np.ones(X.shape[1]) * self.lengthscale**2 + + if X2 is None: + X2 = X + tmp1 -= np.eye(X.shape[0])*self.variance + else: + tmp1[X==X2.T] -= self.variance + + grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64) + #grad = np.empty(X.shape, dtype=np.float64) + for q in range(self.input_dim): + tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2 + grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q] + #grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q] + #np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q]) + #np.sum( - (tmp2*(tmpdist**2)), axis=1, out=grad[:,q]) + return grad + + def gradients_XX_diag(self, dL_dK, X): + """ + Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2: + + ..math: + \frac{\partial^2 K}{\partial X\partial X2} + + ..returns: + dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ] + """ + return np.ones(X.shape) * self.variance/self.lengthscale**2 + def _gradients_X_pure(self, dL_dK, X, X2=None): invdist = self._inv_dist(X, X2) dL_dr = self.dK_dr_via_X(X, X2) * dL_dK diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index 3157bd5b..20fe43d7 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -1,6 +1,6 @@ from .bernoulli import Bernoulli from .exponential import Exponential -from .gaussian import Gaussian +from .gaussian import Gaussian, Heteroscedastic_Gaussian from .gamma import Gamma from .poisson import Poisson from .student_t import StudentT diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 3ac703fe..ff0d6855 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -137,6 +137,20 @@ class BayesianGPLVM(SparseGP_MPI): fignum, plot_inducing, legend, plot_limits, aspect, updates, predict_kwargs, imshow_kwargs) + def plot_magnification(self, labels=None, which_indices=None, + resolution=50, ax=None, marker='o', s=40, + fignum=None, legend=True, + plot_limits=None, + aspect='auto', updates=False, **kwargs): + import sys + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ..plotting.matplot_dep import dim_reduction_plots + + return dim_reduction_plots.plot_magnification(self, labels, which_indices, + resolution, ax, marker, s, + fignum, False, legend, + plot_limits, aspect, updates, **kwargs) + def do_test_latents(self, Y): """ Compute the latent representation for a set of new points Y diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py index d6f29907..c95c9fa0 100644 --- a/GPy/models/gplvm.py +++ b/GPy/models/gplvm.py @@ -43,19 +43,19 @@ class GPLVM(GP): super(GPLVM, self).parameters_changed() self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None) - def jacobian(self,X): - J = np.zeros((X.shape[0],X.shape[1],self.output_dim)) - for i in range(self.output_dim): - J[:,:,i] = self.kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1], X, self.X) - return J + #def jacobian(self,X): + # J = np.zeros((X.shape[0],X.shape[1],self.output_dim)) + # for i in range(self.output_dim): + # J[:,:,i] = self.kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1], X, self.X) + # return J - def magnification(self,X): - target=np.zeros(X.shape[0]) - #J = np.zeros((X.shape[0],X.shape[1],self.output_dim)) - J = self.jacobian(X) - for i in range(X.shape[0]): - target[i]=np.sqrt(np.linalg.det(np.dot(J[i,:,:],np.transpose(J[i,:,:])))) - return target + #def magnification(self,X): + # target=np.zeros(X.shape[0]) + # #J = np.zeros((X.shape[0],X.shape[1],self.output_dim)) + ## J = self.jacobian(X) + # for i in range(X.shape[0]): + # target[i]=np.sqrt(np.linalg.det(np.dot(J[i,:,:],np.transpose(J[i,:,:])))) + # return target def plot(self): assert self.Y.shape[1] == 2, "too high dimensional to plot. Try plot_latent" @@ -82,5 +82,17 @@ class GPLVM(GP): fignum, False, legend, plot_limits, aspect, updates, **kwargs) - def plot_magnification(self, *args, **kwargs): - return util.plot_latent.plot_magnification(self, *args, **kwargs) + def plot_magnification(self, labels=None, which_indices=None, + resolution=50, ax=None, marker='o', s=40, + fignum=None, legend=True, + plot_limits=None, + aspect='auto', updates=False, **kwargs): + + import sys + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ..plotting.matplot_dep import dim_reduction_plots + + return dim_reduction_plots.plot_magnification(self, labels, which_indices, + resolution, ax, marker, s, + fignum, False, legend, + plot_limits, aspect, updates, **kwargs) diff --git a/GPy/plotting/matplot_dep/dim_reduction_plots.py b/GPy/plotting/matplot_dep/dim_reduction_plots.py index 2c243e13..7d13ef04 100644 --- a/GPy/plotting/matplot_dep/dim_reduction_plots.py +++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py @@ -114,7 +114,7 @@ def plot_latent(model, labels=None, which_indices=None, # create a function which computes the shading of latent space according to the output variance def plot_function(x): - Xtest_full = np.zeros((x.shape[0], model.X.shape[1])) + Xtest_full = np.zeros((x.shape[0], X.shape[1])) Xtest_full[:, [input_1, input_2]] = x _, var = model.predict(Xtest_full, **predict_kwargs) var = var[:, :1] @@ -202,6 +202,7 @@ def plot_latent(model, labels=None, which_indices=None, def plot_magnification(model, labels=None, which_indices=None, resolution=60, ax=None, marker='o', s=40, fignum=None, plot_inducing=False, legend=True, + plot_limits=None, aspect='auto', updates=False): """ :param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc) @@ -217,17 +218,88 @@ def plot_magnification(model, labels=None, which_indices=None, input_1, input_2 = most_significant_input_dimensions(model, which_indices) - # first, plot the output variance as a function of the latent space - Xtest, xx, yy, xmin, xmax = x_frame2D(model.X[:, [input_1, input_2]], resolution=resolution) - Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1])) + #fethch the data points X that we'd like to plot + X = model.X + if isinstance(X, VariationalPosterior): + X = X.mean + else: + X = X + + if X.shape[0] > 1000: + print("Warning: subsampling X, as it has more samples then 1000. X.shape={!s}".format(X.shape)) + subsample = np.random.choice(X.shape[0], size=1000, replace=False) + X = X[subsample] + labels = labels[subsample] + #======================================================================= + # <<>> + # <<>> + # plt.close('all') + # fig, ax = plt.subplots(1,1) + # from GPy.plotting.matplot_dep.dim_reduction_plots import most_significant_input_dimensions + # import matplotlib.patches as mpatches + # i1, i2 = most_significant_input_dimensions(m, None) + # xmin, xmax = 100, -100 + # ymin, ymax = 100, -100 + # legend_handles = [] + # + # X = m.X.mean[:, [i1, i2]] + # X = m.X.variance[:, [i1, i2]] + # + # xmin = X[:,0].min(); xmax = X[:,0].max() + # ymin = X[:,1].min(); ymax = X[:,1].max() + # range_ = [[xmin, xmax], [ymin, ymax]] + # ul = np.unique(labels) + # + # for i, l in enumerate(ul): + # #cdict = dict(red =[(0., colors[i][0], colors[i][0]), (1., colors[i][0], colors[i][0])], + # # green=[(0., colors[i][0], colors[i][1]), (1., colors[i][1], colors[i][1])], + # # blue =[(0., colors[i][0], colors[i][2]), (1., colors[i][2], colors[i][2])], + # # alpha=[(0., 0., .0), (.5, .5, .5), (1., .5, .5)]) + # #cmap = LinearSegmentedColormap('{}'.format(l), cdict) + # cmap = LinearSegmentedColormap.from_list('cmap_{}'.format(str(l)), [colors[i], colors[i]], 255) + # cmap._init() + # #alphas = .5*(1+scipy.special.erf(np.linspace(-2,2, cmap.N+3)))#np.log(np.linspace(np.exp(0), np.exp(1.), cmap.N+3)) + # alphas = (scipy.special.erf(np.linspace(0,2.4, cmap.N+3)))#np.log(np.linspace(np.exp(0), np.exp(1.), cmap.N+3)) + # cmap._lut[:, -1] = alphas + # print l + # x, y = X[labels==l].T + # + # heatmap, xedges, yedges = np.histogram2d(x, y, bins=300, range=range_) + # #heatmap, xedges, yedges = np.histogram2d(x, y, bins=100) + # + # im = ax.imshow(heatmap, extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], cmap=cmap, aspect='auto', interpolation='nearest', label=str(l)) + # legend_handles.append(mpatches.Patch(color=colors[i], label=l)) + # ax.set_xlim(xmin, xmax) + # ax.set_ylim(ymin, ymax) + # plt.legend(legend_handles, [l.get_label() for l in legend_handles]) + # plt.draw() + # plt.show() + #======================================================================= + + #Create an IMshow controller that can re-plot the latent space shading at a good resolution + if plot_limits is None: + xmin, ymin = X[:, [input_1, input_2]].min(0) + xmax, ymax = X[:, [input_1, input_2]].max(0) + x_r, y_r = xmax-xmin, ymax-ymin + xmin -= .1*x_r + xmax += .1*x_r + ymin -= .1*y_r + ymax += .1*y_r + else: + try: + xmin, xmax, ymin, ymax = plot_limits + except (TypeError, ValueError) as e: + raise e.__class__("Wrong plot limits: {} given -> need (xmin, xmax, ymin, ymax)".format(plot_limits)) + def plot_function(x): + Xtest_full = np.zeros((x.shape[0], X.shape[1])) Xtest_full[:, [input_1, input_2]] = x - mf=model.magnification(Xtest_full) + mf = model.predict_magnification(Xtest_full) return mf view = ImshowController(ax, plot_function, - tuple(model.X.min(0)[:, [input_1, input_2]]) + tuple(model.X.max(0)[:, [input_1, input_2]]), + (xmin, ymin, xmax, ymax), resolution, aspect=aspect, interpolation='bilinear', cmap=pb.cm.gray) diff --git a/GPy/util/diag.py b/GPy/util/diag.py index e7c332e2..9a3343f0 100644 --- a/GPy/util/diag.py +++ b/GPy/util/diag.py @@ -46,6 +46,8 @@ def offdiag_view(A, offset=0): return as_strided(Af[(1+offset):], shape=(A.shape[0]-1, A.shape[1]), strides=(A.strides[0] + A.itemsize, A.strides[1])) def _diag_ufunc(A,b,offset,func): + b = np.squeeze(b) + assert b.ndim <= 1, "only implemented for one dimensional arrays" dA = view(A, offset); func(dA,b,dA) return A