Merge branch 'devel' of github.com:SheffieldML/GPy into devel

2026-06-02 14:45:15 +02:00 · 2015-04-23 17:00:25 +01:00 · 2015-04-23 17:00:25 +01:00 · ac4972ff99
commit ac4972ff99
parent 7abeed51de 9b54f831b3
6 changed files with 350 additions and 33 deletions
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@ -728,6 +728,254 @@ class DGPLVM(Prior):
        return 'DGPLVM_prior_Raq'
 # ******************************************
 from parameterized import Parameterized
 from .. import Param
 class DGPLVM_Lamda(Prior, Parameterized):
    """
    Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
    :param sigma2: constant
    .. Note:: DGPLVM for Classification paper implementation
    """
    domain = _REAL
    # _instances = []
    # def __new__(cls, mu, sigma): # Singleton:
    #     if cls._instances:
    #         cls._instances[:] = [instance for instance in cls._instances if instance()]
    #         for instance in cls._instances:
    #             if instance().mu == mu and instance().sigma == sigma:
    #                 return instance()
    #     o = super(Prior, cls).__new__(cls, mu, sigma)
    #     cls._instances.append(weakref.ref(o))
    #     return cls._instances[-1]()
    def __init__(self, sigma2, lbl, x_shape, lamda, name='DP_prior'):
        super(DGPLVM_Lamda, self).__init__(name=name)
        self.sigma2 = sigma2
        # self.x = x
        self.lbl = lbl
 	self.lamda = lamda
        self.classnum = lbl.shape[1]
        self.datanum = lbl.shape[0]
        self.x_shape = x_shape
        self.dim = x_shape[1]
 	self.lamda = Param('lamda', np.diag(lamda))
        self.link_parameter(self.lamda)
    def get_class_label(self, y):
        for idx, v in enumerate(y):
            if v == 1:
                return idx
        return -1
    # This function assigns each data point to its own class
    # and returns the dictionary which contains the class name and parameters.
    def compute_cls(self, x):
        cls = {}
        # Appending each data point to its proper class
        for j in xrange(self.datanum):
            class_label = self.get_class_label(self.lbl[j])
            if class_label not in cls:
                cls[class_label] = []
            cls[class_label].append(x[j])
        return cls
    # This function computes mean of each class. The mean is calculated through each dimension
    def compute_Mi(self, cls):
        M_i = np.zeros((self.classnum, self.dim))
        for i in cls:
            # Mean of each class
 	    class_i = cls[i]
            M_i[i] = np.mean(class_i, axis=0)
        return M_i
    # Adding data points as tuple to the dictionary so that we can access indices
    def compute_indices(self, x):
        data_idx = {}
        for j in xrange(self.datanum):
            class_label = self.get_class_label(self.lbl[j])
            if class_label not in data_idx:
                data_idx[class_label] = []
            t = (j, x[j])
            data_idx[class_label].append(t)
        return data_idx
    # Adding indices to the list so we can access whole the indices
    def compute_listIndices(self, data_idx):
        lst_idx = []
        lst_idx_all = []
        for i in data_idx:
            if len(lst_idx) == 0:
                pass
                #Do nothing, because it is the first time list is created so is empty
            else:
                lst_idx = []
            # Here we put indices of each class in to the list called lst_idx_all
            for m in xrange(len(data_idx[i])):
                lst_idx.append(data_idx[i][m][0])
            lst_idx_all.append(lst_idx)
        return lst_idx_all
    # This function calculates between classes variances
    def compute_Sb(self, cls, M_i, M_0):
        Sb = np.zeros((self.dim, self.dim))
        for i in cls:
            B = (M_i[i] - M_0).reshape(self.dim, 1)
            B_trans = B.transpose()
            Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans)
        return Sb
    # This function calculates within classes variances
    def compute_Sw(self, cls, M_i):
        Sw = np.zeros((self.dim, self.dim))
        for i in cls:
            N_i = float(len(cls[i]))
            W_WT = np.zeros((self.dim, self.dim))
            for xk in cls[i]:
                W = (xk - M_i[i])
                W_WT += np.outer(W, W)
            Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT)
        return Sw
    # Calculating beta and Bi for Sb
    def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
        import pdb
        # pdb.set_trace()
        B_i = np.zeros((self.classnum, self.dim))
        Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
        for i in data_idx:
            # pdb.set_trace()
            # Calculating Bi
            B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
        for k in xrange(self.datanum):
            for i in data_idx:
                N_i = float(len(data_idx[i]))
                if k in lst_idx_all[i]:
                    beta = (float(1) / N_i) - (float(1) / self.datanum)
                    Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
                else:
                    beta = -(float(1) / self.datanum)
                    Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
        Sig_beta_B_i_all = Sig_beta_B_i_all.transpose()
        return Sig_beta_B_i_all
    # Calculating W_j s separately so we can access all the W_j s anytime
    def compute_wj(self, data_idx, M_i):
        W_i = np.zeros((self.datanum, self.dim))
        for i in data_idx:
            N_i = float(len(data_idx[i]))
            for tpl in data_idx[i]:
                xj = tpl[1]
                j = tpl[0]
                W_i[j] = (xj - M_i[i])
        return W_i
    # Calculating alpha and Wj for Sw
    def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i):
        Sig_alpha_W_i = np.zeros((self.datanum, self.dim))
        for i in data_idx:
            N_i = float(len(data_idx[i]))
            for tpl in data_idx[i]:
                k = tpl[0]
                for j in lst_idx_all[i]:
                    if k == j:
                        alpha = 1 - (float(1) / N_i)
                        Sig_alpha_W_i[k] += (alpha * W_i[j])
                    else:
                        alpha = 0 - (float(1) / N_i)
                        Sig_alpha_W_i[k] += (alpha * W_i[j])
        Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i)
        return Sig_alpha_W_i
    # This function calculates log of our prior
    def lnpdf(self, x):
        x = x.reshape(self.x_shape)
 	#!!!!!!!!!!!!!!!!!!!!!!!!!!!
 	#self.lamda.values[:] = self.lamda.values/self.lamda.values.sum()	
 	xprime = x.dot(np.diagflat(self.lamda))
 	x = xprime
 	# print x
        cls = self.compute_cls(x)
        M_0 = np.mean(x, axis=0)
        M_i = self.compute_Mi(cls)
        Sb = self.compute_Sb(cls, M_i, M_0)
        Sw = self.compute_Sw(cls, M_i)
        # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
        #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
        #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
 	Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
        return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
    # This function calculates derivative of the log of prior function
    def lnpdf_grad(self, x):
        x = x.reshape(self.x_shape)
 	xprime = x.dot(np.diagflat(self.lamda))
 	x = xprime
 	# print x
        cls = self.compute_cls(x)
        M_0 = np.mean(x, axis=0)
        M_i = self.compute_Mi(cls)
        Sb = self.compute_Sb(cls, M_i, M_0)
        Sw = self.compute_Sw(cls, M_i)
        data_idx = self.compute_indices(x)
        lst_idx_all = self.compute_listIndices(data_idx)
        Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
        W_i = self.compute_wj(data_idx, M_i)
        Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)
        # Calculating inverse of Sb and its transpose and minus
        # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
        #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
        #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
 	Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
        Sb_inv_N_trans = np.transpose(Sb_inv_N)
        Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
        Sw_trans = np.transpose(Sw)
        # Calculating DJ/DXk
        DJ_Dxk = 2 * (
            Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
                Sig_alpha_W_i))
        # Calculating derivative of the log of the prior
        DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
        DPxprim_Dx = np.diagflat(self.lamda).dot(DPx_Dx)
        # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
        DPxprim_Dx = DPxprim_Dx.T
    	DPxprim_Dlamda = DPx_Dx.dot(x)
    	# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
    	DPxprim_Dlamda = DPxprim_Dlamda.T
 	self.lamda.gradient = np.diag(DPxprim_Dlamda)
 	# print DPxprim_Dx
    	return DPxprim_Dx
    # def frb(self, x):
    #     from functools import partial
    #     from GPy.models import GradientChecker
    #     f = partial(self.lnpdf)
    #     df = partial(self.lnpdf_grad)
    #     grad = GradientChecker(f, df, x, 'X')
    #     grad.checkgrad(verbose=1)
    def rvs(self, n):
        return np.random.rand(n)  # A WRONG implementation
    def __str__(self):
        return 'DGPLVM_prior_Raq_Lamda'
 # ******************************************
 class DGPLVM_T(Prior):
    """
@ -780,11 +1028,12 @@ class DGPLVM_T(Prior):
        return cls
    # This function computes mean of each class. The mean is calculated through each dimension
-    def compute_Mi(self, cls, vec):
+    def compute_Mi(self, cls):
        M_i = np.zeros((self.classnum, self.dim))
        for i in cls:
            # Mean of each class
-            class_i = np.multiply(cls[i],vec)
+	    # class_i = np.multiply(cls[i],vec)
            class_i = cls[i]
            M_i[i] = np.mean(class_i, axis=0)
        return M_i
@ -890,9 +1139,12 @@ class DGPLVM_T(Prior):
    # This function calculates log of our prior
    def lnpdf(self, x):
        x = x.reshape(self.x_shape)
 	xprim = x.dot(self.vec)
 	x = xprim
 	# print x  
        cls = self.compute_cls(x)
        M_0 = np.mean(x, axis=0)
-        M_i = self.compute_Mi(cls, self.vec)
+        M_i = self.compute_Mi(cls)
        Sb = self.compute_Sb(cls, M_i, M_0)
        Sw = self.compute_Sw(cls, M_i)
        # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
@ -905,9 +1157,12 @@ class DGPLVM_T(Prior):
    # This function calculates derivative of the log of prior function
    def lnpdf_grad(self, x):
 	x = x.reshape(self.x_shape)
 	xprim = x.dot(self.vec)
 	x = xprim 
 	# print x       
 	cls = self.compute_cls(x)
        M_0 = np.mean(x, axis=0)
-        M_i = self.compute_Mi(cls, self.vec)
+        M_i = self.compute_Mi(cls)
        Sb = self.compute_Sb(cls, M_i, M_0)
        Sw = self.compute_Sw(cls, M_i)
        data_idx = self.compute_indices(x)
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@ -10,6 +10,7 @@ from .parameterization.variational import VariationalPosterior, NormalPosterior
 from ..util.linalg import mdot
 import logging
 import itertools
 logger = logging.getLogger("sparse gp")
 class SparseGP(GP):
@ -135,7 +136,13 @@ class SparseGP(GP):
                var = var
            else:
                Kxx = kern.Kdiag(Xnew)
-                var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
+                if self.posterior.woodbury_inv.ndim == 2:
                    var = Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0)
                elif self.posterior.woodbury_inv.ndim == 3:
                    var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
                    for i in range(var.shape[1]):
                        var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
                var = var
            #add in the mean function
            if self.mean_function is not None:
                mu += self.mean_function.f(Xnew)
--- a/GPy/kern/_src/basis_funcs.py
+++ b/GPy/kern/_src/basis_funcs.py
@ -5,33 +5,59 @@ from ...core.parameterization.param import Param
 from ...core.parameterization.transformations import Logexp
 import numpy as np
 from ...util.caching import Cache_this
-from ...util.linalg import tdot
+from ...util.linalg import tdot, mdot
 class BasisFuncKernel(Kern):
-    def __init__(self, input_dim, variance=1., active_dims=None, name='basis func kernel'):
+    def __init__(self, input_dim, variance=1., active_dims=None, ARD=False, name='basis func kernel'):
        """
        Abstract superclass for kernels with explicit basis functions for use in GPy.
        This class does NOT automatically add an offset to the design matrix phi!
        """
        super(BasisFuncKernel, self).__init__(input_dim, active_dims, name)
        self.ARD = ARD
        if self.ARD:
            phi_test = self._phi(np.random.normal(0, 1, (1, self.input_dim)))
            variance = variance * np.ones(phi_test.shape[1])
        else:
            variance = np.array(variance)
        self.variance = Param('variance', variance, Logexp())
        self.link_parameter(self.variance)
    def parameters_changed(self):
        self.alpha = np.sqrt(self.variance)
        self.beta = 1./self.variance
    @Cache_this(limit=3, ignore_args=())
    def phi(self, X):
-        raise NotImplementedError('Overwrite this phi function, which maps the input X into the higher dimensional space and forms the design matrix Phi')
+        return self._phi(X)
    def _phi(self, X):
        raise NotImplementedError('Overwrite this _phi function, which maps the input X into the higher dimensional space and returns the design matrix Phi')
    def K(self, X, X2=None):
-        return self.variance * self._K(X, X2)
+        return self._K(X, X2)
    def Kdiag(self, X, X2=None):
-        return self.variance * np.diag(self._K(X, X2))
+        return np.diag(self._K(X, X2))
    def update_gradients_full(self, dL_dK, X, X2=None):
-        self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2))
+        if self.ARD:
            phi1 = self.phi(X)
            if X2 is None or X is X2:
                self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi1)
            else:
                phi2 = self.phi(X2)
                self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi2)
        else:
            self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2)) * self.beta
    def update_gradients_diag(self, dL_dKdiag, X):
-        self.variance.gradient = np.einsum('i,i', dL_dKdiag, self._K(X))
+        if self.ARD:
            phi1 = self.phi(X)
            self.variance.gradient = np.einsum('i,iq,iq->q', dL_dKdiag, phi1, phi1)
        else:
            self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.Kdiag(X)) * self.beta
    def concatenate_offset(self, X):
        return np.c_[np.ones((X.shape[0], 1)), X]
@ -52,19 +78,19 @@ class BasisFuncKernel(Kern):
                posterior = self._highest_parent_.posterior
            except NameError:
                raise RuntimeError("This kernel is not part of a model and cannot be used for posterior inference")
-        phi = self.phi(X)
+        phi_alpha = self.phi(X) * self.variance
-        return self.variance * phi.T.dot(posterior.woodbury_vector), self.variance * (1 - self.variance * phi.T.dot(posterior.woodbury_inv.dot(phi)))
+        return (phi_alpha).T.dot(posterior.woodbury_vector), (np.eye(phi_alpha.shape[1])*self.variance - mdot(phi_alpha.T, posterior.woodbury_inv, phi_alpha))
    @Cache_this(limit=3, ignore_args=())
    def _K(self, X, X2):
        if X2 is None or X is X2:
-            phi = self.phi(X)
+            phi = self.phi(X) * self.alpha
            if phi.ndim != 2:
                phi = phi[:, None]
            return tdot(phi)
        else:
-            phi1 = self.phi(X)
+            phi1 = self.phi(X) * self.alpha
-            phi2 = self.phi(X2)
+            phi2 = self.phi(X2) * self.alpha
            if phi1.ndim != 2:
                phi1 = phi1[:, None]
                phi2 = phi2[:, None]
@ -72,30 +98,41 @@ class BasisFuncKernel(Kern):
 class LinearSlopeBasisFuncKernel(BasisFuncKernel):
-    def __init__(self, input_dim, start, stop, variance=1., active_dims=None, name='linear_segment'):
+    def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='linear_segment'):
-        super(LinearSlopeBasisFuncKernel, self).__init__(input_dim, variance, active_dims, name)
+        """
        A linear segment transformation. The segments start at start, \
        are then linear to stop and constant again. The segments are 
        normalized, so that they have exactly as much mass above 
        as below the origin. 
        Start and stop can be tuples or lists of starts and stops. 
        Behaviour of start stop is as np.where(X<start) would do.
        """
        self.start = np.array(start)
        self.stop = np.array(stop)
        super(LinearSlopeBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
    @Cache_this(limit=3, ignore_args=())
-    def phi(self, X):
+    def _phi(self, X):
        phi = np.where(X < self.start, self.start, X)
        phi = np.where(phi > self.stop, self.stop, phi)
-        return ((phi-self.start)/(self.stop-self.start))-.5
+        return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1.
        return self.concatenate_offset(phi)  # ((phi-self.start)/(self.stop-self.start))-.5
 class ChangePointBasisFuncKernel(BasisFuncKernel):
-    def __init__(self, input_dim, changepoint, variance=1., active_dims=None, name='changepoint'):
+    def __init__(self, input_dim, changepoint, variance=1., active_dims=None, ARD=False, name='changepoint'):
        super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, name)
        self.changepoint = changepoint
        super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
    @Cache_this(limit=3, ignore_args=())
-    def phi(self, X):
+    def _phi(self, X):
-        return self.concatenate_offset(np.where((X < self.changepoint), -1, 1))
+        return np.where((X < self.changepoint), -1, 1)
 class DomainKernel(LinearSlopeBasisFuncKernel):
    def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='constant_domain'):
        super(DomainKernel, self).__init__(input_dim, start, stop, variance, active_dims, ARD, name)
    @Cache_this(limit=3, ignore_args=())
-    def phi(self, X):
+    def _phi(self, X):
-        phi = np.where((X>self.start)*(X<self.stop), 1., 0.)
+        phi = np.where((X>self.start)*(X<self.stop), 1, 0)
        return phi#((phi-self.start)/(self.stop-self.start))-.5
        return self.concatenate_offset(phi)  # ((phi-self.start)/(self.stop-self.start))-.5
--- a/GPy/kern/_src/periodic.py
+++ b/GPy/kern/_src/periodic.py
@ -68,8 +68,6 @@ class Periodic(Kern):
        return np.diag(self.K(X))
 class PeriodicExponential(Periodic):
    """
    Kernel of the periodic subspace (up to a given frequency) of a exponential
--- a/GPy/models/init.py
+++ b/GPy/models/init.py
@ -21,3 +21,4 @@ from .gp_kronecker_gaussian_regression import GPKroneckerGaussianRegression
 from .gp_var_gauss import GPVariationalGaussianApproximation
 from .one_vs_all_classification import OneVsAllClassification
 from .one_vs_all_sparse_classification import OneVsAllSparseClassification
 from .dpgplvm import DPBayesianGPLVM
--- a/GPy/models/dpgplvm.py
+++ b/GPy/models/dpgplvm.py
@ -0,0 +1,19 @@
 # Copyright (c) 2015 the GPy Austhors (see AUTHORS.txt)
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
 from .. import kern
 from bayesian_gplvm import BayesianGPLVM
 from ..core.parameterization.variational import NormalPosterior, NormalPrior
 class DPBayesianGPLVM(BayesianGPLVM):
    """
    Bayesian Gaussian Process Latent Variable Model with Descriminative prior
    """
    def __init__(self, Y, input_dim, X_prior, X=None, X_variance=None, init='PCA', num_inducing=10,
                 Z=None, kernel=None, inference_method=None, likelihood=None,
                 name='bayesian gplvm', mpi_comm=None, normalizer=None,
                 missing_data=False, stochastic=False, batchsize=1):
        super(DPBayesianGPLVM,self).__init__(Y=Y, input_dim=input_dim, X=X, X_variance=X_variance, init=init, 					num_inducing=num_inducing, Z=Z, kernel=kernel, inference_method=inference_method, likelihood=likelihood, mpi_comm=mpi_comm, normalizer=normalizer, missing_data=missing_data, stochastic=stochastic, batchsize=batchsize, name='dp bayesian gplvm')
 	self.X.mean.set_prior(X_prior)
        self.link_parameter(X_prior)