BGPLVM working with rbf+white

2026-07-20 16:51:05 +02:00 · 2013-01-29 14:02:41 +00:00 · 2013-01-29 14:02:41 +00:00 · 936d08723e
commit 936d08723e
parent 8b6e244cf1
5 changed files with 105 additions and 32 deletions
--- a/GPy/examples/oil_flow_demo.py
+++ b/GPy/examples/oil_flow_demo.py
@ -20,26 +20,28 @@ def plot_oil(X, theta, labels, label):
    plt.plot(flow_type[:,0], flow_type[:,1], 'bx')
    plt.title(label)
-data = pickle.load(open('../util/datasets/oil_flow_3classes.pickle', 'r'))
+data = pickle.load(open('../../../GPy_assembla/datasets/oil_flow_3classes.pickle', 'r'))
 Y = data['DataTrn']
 N, D = Y.shape
-selected = np.random.permutation(N)[:200]
+selected = np.random.permutation(N)#[:200]
 labels = data['DataTrnLbls'][selected]
 Y = Y[selected]
 N, D = Y.shape
 Y -= Y.mean(axis=0)
-Y /= Y.std(axis=0)
+#Y /= Y.std(axis=0)
-Q = 2
+Q = 7
-m1 = GPy.models.sparse_GPLVM(Y, Q, M = 15)
+k = GPy.kern.rbf_ARD(Q) + GPy.kern.white(Q)
-m1.constrain_positive('(rbf|bias|noise)')
+m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M = 12)
-m1.constrain_bounded('white', 1e-6, 1.0)
+m.constrain_positive('(rbf|bias|S|white|noise)')
 # m.constrain_bounded('white', 1e-6, 100.0)
 # m.constrain_bounded('noise', 1e-4, 1000.0)
-plot_oil(m1.X, np.array([1,1]), labels, 'PCA initialization')
+plot_oil(m.X, np.array([1,1]), labels, 'PCA initialization')
 # m.optimize(messages = True)
-m1.optimize('bfgs', messages = True)
+m.optimize('tnc', messages = True)
-plot_oil(m1.X, np.array([1,1]), labels, 'sparse GPLVM')
+plot_oil(m.X, m.kern.parts[0].lengthscales, labels, 'B-GPLVM')
 # pb.figure()
 # m.plot()
 # pb.title('PCA initialisation')
@ -47,7 +49,7 @@ plot_oil(m1.X, np.array([1,1]), labels, 'sparse GPLVM')
 # m.optimize(messages = 1)
 # m.plot()
 # pb.title('After optimisation')
-m = GPy.models.GPLVM(Y, Q)
+# m = GPy.models.GPLVM(Y, Q)
-m.constrain_positive('(white|rbf|bias|noise)')
+# m.constrain_positive('(white|rbf|bias|noise)')
-m.optimize()
+# m.optimize()
-plot_oil(m.X, np.array([1,1]), labels, 'GPLVM')
+# plot_oil(m.X, np.array([1,1]), labels, 'GPLVM')
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@ -95,7 +95,7 @@ class rbf(kernpart):
        target += self.variance
    def dpsi0_dtheta(self,partial,Z,mu,S,target):
-        target[0] += 1.
+        target[0] += np.sum(partial)
    def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S):
        pass
@ -113,7 +113,9 @@ class rbf(kernpart):
    def dpsi1_dZ(self,partial,Z,mu,S,target):
        self._psi_computations(Z,mu,S)
-        target += np.sum(partial[:,:,None]*-self._psi1[:,:,None]*self._psi1_dist/self.lengthscale2/self._psi1_denom,0)
+        denominator = (self.lengthscale2*(self._psi1_denom))
        dpsi1_dZ = - self._psi1[:,:,None] * ((self._psi1_dist/denominator))
        target += np.sum(partial.T[:,:,None] * dpsi1_dZ, 0)
    def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
        self._psi_computations(Z,mu,S)
@ -132,13 +134,14 @@ class rbf(kernpart):
        d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom)
        d_length = d_length.sum(0)
        target[0] += np.sum(partial*d_var)
-        target[1] += np.sum(d_length*partial)
+        target[1] += np.sum(d_length*partial[:,:,None])
    def dpsi2_dZ(self,partial,Z,mu,S,target):
        """Returns shape N,M,M,Q"""
        self._psi_computations(Z,mu,S)
-        dZ = self._psi2[:,:,:,None]/self.lengthscale2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom)
+        term1 = 0.5*self._psi2_Zdist/self.lengthscale2 # M, M, Q
-        target += np.sum(partial[None,:,:,None]*dZ,0).sum(1)
+        term2 = self._psi2_mudist/self._psi2_denom/self.lengthscale2 # N, M, M, Q
        dZ = self._psi2[:,:,:,None] * (term1[None] + term2) 
        target += (partial[None,:,:,None]*dZ).sum(0).sum(0)
    def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
        """Think N,M,M,Q """
@ -175,4 +178,3 @@ class rbf(kernpart):
            self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M
            self._Z, self._mu, self._S = Z, mu,S
--- a/GPy/kern/rbf_ARD.py
+++ b/GPy/kern/rbf_ARD.py
@ -74,7 +74,7 @@ class rbf_ARD(kernpart):
        target += self.variance
    def dpsi0_dtheta(self,partial,Z,mu,S,target):
-        target[0] += 1.
+        target[0] += np.sum(partial)
    def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S):
        pass
@ -93,7 +93,9 @@ class rbf_ARD(kernpart):
    def dpsi1_dZ(self,partial,Z,mu,S,target):
        self._psi_computations(Z,mu,S)
        # np.add(target,-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,target)
-        target += np.sum(partial[:,:,None]*-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,0)
+        denominator = (self.lengthscales2*(self._psi1_denom))
        dpsi1_dZ = - self._psi1[:,:,None] * ((self._psi1_dist/denominator))
        target += np.sum(partial.T[:,:,None] * dpsi1_dZ, 0)
    def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
        """return shapes are N,M,Q"""
@ -118,8 +120,10 @@ class rbf_ARD(kernpart):
    def dpsi2_dZ(self,partial,Z,mu,S,target):
        """Returns shape N,M,M,Q"""
        self._psi_computations(Z,mu,S)
-        dZ = self._psi2[:,:,:,None]/self.lengthscales2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom)
+        term1 = 0.5*self._psi2_Zdist/self.lengthscales2 # M, M, Q
-        target += np.sum(partial[None,:,:,None]*dZ,0).sum(1)
+        term2 = self._psi2_mudist/self._psi2_denom/self.lengthscales2 # N, M, M, Q
        dZ = self._psi2[:,:,:,None] * (term1[None] + term2) 
        target += (partial[None,:,:,None]*dZ).sum(0).sum(0)
    def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
        """Think N,M,M,Q """
@ -247,5 +251,3 @@ if __name__=='__main__':
        return f,df
    print "psi2_theta_test"
    checkgrad(psi2_theta_test,np.random.rand(1+Q),args=(k,))
--- a/GPy/models/BGPLVM.py
+++ b/GPy/models/BGPLVM.py
@ -0,0 +1,68 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
 import pylab as pb
 import sys, pdb
 from GPLVM import GPLVM
 from sparse_GP_regression import sparse_GP_regression
 from GPy.util.linalg import pdinv
 class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
    """
    Bayesian Gaussian Process Latent Variable Model
    :param Y: observed data
    :type Y: np.ndarray
    :param Q: latent dimensionality
    :type Q: int
    :param init: initialisation method for the latent space
    :type init: 'PCA'|'random'
    """
    def __init__(self, Y, Q, init='PCA', **kwargs):
        X = self.initialise_latent(init, Q, Y)
        S = np.ones_like(X) * 1e-2# 
        sparse_GP_regression.__init__(self, X, Y, X_uncertainty = S, **kwargs)
    def get_param_names(self):
        X_names = sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
        S_names = sum([['S_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
        return (X_names + S_names + sparse_GP_regression.get_param_names(self))
    def get_param(self):
        """
        Horizontally stacks the parameters in order to present them to the optimizer.
        The resulting 1-D array has this structure:
        ===============================================================
        |       mu       |        S        |    Z    | beta |  theta  |
        ===============================================================
        """
        return np.hstack((self.X.flatten(), self.X_uncertainty.flatten(), sparse_GP_regression.get_param(self)))
    def set_param(self,x):
        N, Q = self.N, self.Q
        self.X = x[:self.X.size].reshape(N,Q).copy()
        self.X_uncertainty = x[(N*Q):(2*N*Q)].reshape(N,Q).copy()
        sparse_GP_regression.set_param(self, x[(2*N*Q):])
    def dL_dmuS(self):
        dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_uncertainty)
        dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi0_dmuS(self.dL_dpsi0,self.Z,self.X,self.X_uncertainty)
        dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2,self.Z,self.X,self.X_uncertainty)
        dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
        dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
        return np.hstack((dL_dmu.flatten(), dL_dS.flatten()))
    def log_likelihood_gradients(self):
        return np.hstack((self.dL_dmuS().flatten(), sparse_GP_regression.log_likelihood_gradients(self)))
    def plot(self):
        GPLVM.plot(self)
        #passing Z without a small amout of jitter will induce the white kernel where we don;t want it!
        mu, var = sparse_GP_regression.predict(self, self.Z+np.random.randn(*self.Z.shape)*0.0001)
        pb.plot(mu[:, 0] , mu[:, 1], 'ko')
--- a/GPy/models/sparse_GP_regression.py
+++ b/GPy/models/sparse_GP_regression.py
@ -37,7 +37,7 @@ class sparse_GP_regression(GP_regression):
    """
    def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False):
-        self.scale_factor = 10.0
+        self.scale_factor = 1000.0
        self.beta = beta
        if Z is None:
            self.Z = np.random.permutation(X.copy())[:M]
@ -70,7 +70,6 @@ class sparse_GP_regression(GP_regression):
            self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum()
            self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
            self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
            # raise NotImplementedError, "scale psi2 (in kern?)"
            self.psi2_beta_scaled = self.psi2*(self.beta/self.scale_factor**2)
        else:
            self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum()
@ -163,10 +162,10 @@ class sparse_GP_regression(GP_regression):
        """
        The derivative of the bound wrt the inducing inputs Z
        """
-        dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z,)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
+        dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
        if self.has_uncertain_inputs:
-            dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
+            dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty)
-            dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty)
+            dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes'
        else:
            #re-cast computations in psi2 back to psi1:
            dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1)