diff --git a/GPy/models/FITC.py b/GPy/models/FITC.py index 17df03a7..521367c4 100644 --- a/GPy/models/FITC.py +++ b/GPy/models/FITC.py @@ -24,11 +24,18 @@ class FITC(sparse_GP): #ERASEME - #N = likelihood.Y.size - #self.beta_star = np.random.rand(N,1) - #self.Kmm_ = kernel.K(self.Z).copy() - #self.Kmmi_,a,b,c = pdinv(self.Kmm_) - #self.psi1_ = kernel.K(self.Z,X).copy() + N = likelihood.Y.size + self.beta_star = np.random.rand(N,1) + self.Kmm_ = kernel.K(self.Z).copy() + self.Kmmi_,a,b,c = pdinv(self.Kmm_) + self.psi1_ = kernel.K(self.Z,X).copy() + + Haux = np.random.rand(self.M,self.M) + self.matA = np.dot(Haux,Haux.T) + np.eye(self.M)*100. + self.matV = np.random.rand(N,1) + self.H_ = np.dot(Haux,Haux.T) + np.eye(self.M)*3. + self.Hi_, l1,l2,l3 = pdinv(self.H_) + #self.V_star_ = np.random.rand(N,1) sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False) @@ -63,28 +70,28 @@ class FITC(sparse_GP): self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1) Lmipsi1 = np.dot(self.Lmi,self.psi1) - self.Qnn = np.dot(Lmipsi1.T,Lmipsi1) + self.Qnn = np.dot(Lmipsi1.T,Lmipsi1).copy() self.Diag0 = self.psi0 - np.diag(self.Qnn) #TODO eraseme - """ self.psi1 = self.psi1_ self.Lm = jitchol(self.Kmm_) self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1) Lmipsi1 = np.dot(self.Lmi,self.psi1) - #self.true_psi1 = self.kern.K(self.Z,self.X) + self.true_psi1 = self.kern.K(self.Z,self.X) #self.Qnn = mdot(self.true_psi1.T,self.Lmi.T,self.Lmi,self.true_psi1) - self.Kmmi, a,b,c = pdinv(self.Kmm) - self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1) + #self.Kmmi, a,b,c = pdinv(self.Kmm) + #self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1) #self.Diag0 = self.psi0 #- np.diag(self.Qnn) - self.Diag0 = - np.diag(self.Qnn) + #self.Diag0 = - np.diag(self.Qnn) #Kmmi,Lm,Lmi,logdetK = pdinv(self.Kmm) #self.Lambda = self.Kmmi_ + mdot(self.Kmmi_,self.psi1_,self.beta_star*self.psi1_.T,self.Kmmi_) + np.eye(self.M)*100 #self.Lambdai, LLm, LLmi, self.logdetLambda = pdinv(self.Lambda) - """ #TODO uncomment - self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #Includes Diag0 in the precision + #self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #Includes Diag0 in the precision + self.true_beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #Includes Diag0 in the precision + self.true_V_star = self.true_beta_star * self.likelihood.Y self.V_star = self.beta_star * self.likelihood.Y # The rather complex computations of self.A @@ -138,7 +145,6 @@ class FITC(sparse_GP): self.dyby_dtheta = dyby_dtheta # dlogB_dtheta : C - #C_B dC_B = -.5*Kmmi C_B = self.kern.dK_dtheta(dC_B,self.Z) #check @@ -176,6 +182,77 @@ class FITC(sparse_GP): self.dlogB_dtheta = C_B + C_AA + C_ABA + C_ABB + C_ABCA + C_ABCB + C_ABCC + # dD_dtheta + + #FIXME + H = self.Kmm + mdot(self.psi1,self.beta_star*self.psi1.T) + H = self.Kmm_ + mdot(self.psi1,self.beta_star*self.psi1.T) + H = self.Kmm_ + mdot(self.psi1,self.true_beta_star*self.psi1.T) + Hi, LH, LHi, logdetH = pdinv(H) + + + self.q1 = .5* mdot(self.V_star.T,self.true_psi1.T,Hi,self.true_psi1,self.V_star) + self.q1 = .5* mdot(self.true_V_star.T,self.psi1.T,Hi,self.psi1,self.true_V_star) + #self.q1 = .5* mdot(self.true_V_star.T,self.true_psi1.T,Hi,self.true_psi1,self.true_V_star) + #self.q2 = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) + + # D_B + gamma_1 = mdot(VVT,self.psi1.T,Hi) #TODO restore + #gamma_1 = mdot(VVT,self.true_psi1.T,Hi) + dD_B = gamma_1 + D_B = self.kern.dK_dtheta(dD_B,self.X,self.Z) #check + + # D_C + #dD_CA = -.5 * mdot(Hi,self.psi1,gamma_1) #TODO restore + dD_CA = -.5 * mdot(Hi,self.true_psi1,gamma_1) + D_CA = self.kern.dK_dtheta(dD_CA,self.Z) #check + + # D_CB + dD_CBA = - mdot(psi1beta.T,Hi,self.psi1,gamma_1) + D_CBA = self.kern.dK_dtheta(dD_CBA,self.X,self.Z) + # D_CBB + pHip = mdot(self.psi1.T,Hi,self.psi1) + gamma_2 = mdot(self.beta_star*pHip,self.V_star) + D_CBBA = .5 * self.kern.dKdiag_dtheta(gamma_2**2,self.X) + + D_CBBB = 0 + for psi1_n,gamma_n,X_n in zip(self.psi1.T,gamma_2,self.X): + dD_CBBB = - gamma_n**2 * np.dot(psi1_n[None,:],Kmmi) + D_CBBB += self.kern.dK_dtheta(dD_CBBB,X_n[:,None],self.Z) + + D_CBBC = 0 + for psi1_n,gamma_n,X_n in zip(self.psi1.T,gamma_2,self.X): + psin_K = np.dot(psi1_n[None,:],Kmmi) + tmp = np.dot(psin_K.T,psin_K) + dD_CBBC = .5*gamma_n**2 * tmp + D_CBBC += self.kern.dK_dtheta(dD_CBBC,self.Z) + + # D_A + pHip = mdot(self.psi1.T,Hi,self.psi1) #TODO remove defined above + gamma_3 = self.true_V_star * mdot(self.true_V_star.T,pHip*self.true_beta_star).T + #gamma_3 = self.V_star * mdot(self.V_star.T,pHip*self.beta_star).T + dD_AA = - gamma_3 + D_AA = self.kern.dKdiag_dtheta(dD_AA,self.X) #check + + D_AB = 0 + #for psi1_n,gamma_n,X_n in zip(self.psi1.T,gamma_3,self.X): + for psi1_n,gamma_n,X_n in zip(self.true_psi1.T,gamma_3,self.X): + dD_AB = 2. * gamma_n * np.dot(psi1_n[None,:],Kmmi) + D_AB += self.kern.dK_dtheta(dD_AB,X_n[:,None],self.Z) #check + + D_AC = 0 + #for psi1_n,gamma_n,X_n in zip(self.psi1.T,gamma_3,self.X): + for psi1_n,gamma_n,X_n in zip(self.true_psi1.T,gamma_3,self.X): + psin_K = np.dot(psi1_n[None,:],Kmmi) + tmp = np.dot(psin_K.T,psin_K) + dD_AC = - gamma_n * tmp + D_AC += self.kern.dK_dtheta(dD_AC,self.Z) #check + + #self.dD_dtheta = D_AA + D_AB + D_AC + D_B + D_CA + D_CBA + D_CBBA + D_CBBB + D_CBBC + self.dD_dtheta = D_AA + D_AB + D_AC + #self.q1 = .5* mdot(self.V_star_.T,self.true_psi1.T,self.Hi_,self.true_psi1,self.V_star_) + + # the partial derivative vector for the likelihood if self.likelihood.Nparams == 0: @@ -194,8 +271,10 @@ class FITC(sparse_GP): def log_likelihood(self): """ Compute the (lower bound on the) log marginal likelihood """ - A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y) - C = -self.D * (np.sum(np.log(np.diag(self.LB)))) + #A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y) + #C = -self.D * (np.sum(np.log(np.diag(self.LB)))) + #D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) + D = self.q1 """ A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y) #B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A)) @@ -203,7 +282,8 @@ class FITC(sparse_GP): D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) return A + C + D # +B """ - return A+C + #return A+C + return D def _log_likelihood_gradients(self): @@ -214,8 +294,9 @@ class FITC(sparse_GP): #dL_dtheta = self.dlogB_dtheta #dL_dtheta = self.dyby_dtheta #dL_dtheta = self.dlogbeta_dtheta + self.dyby_dtheta - dL_dtheta = self.dlogB_dtheta - dL_dtheta = self.dlogbeta_dtheta + self.dyby_dtheta + self.dlogB_dtheta + #dL_dtheta = self.dlogbeta_dtheta + self.dyby_dtheta + self.dlogB_dtheta + dL_dtheta = self.dD_dtheta + """ dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z) if self.has_uncertain_inputs: