From 1b5eed890a88959fff40bd300644357817df1dc0 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 28 Nov 2013 10:54:13 +0000 Subject: [PATCH 01/10] documenting --- GPy/models.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/GPy/models.py b/GPy/models.py index 3b2683ea..76d14819 100644 --- a/GPy/models.py +++ b/GPy/models.py @@ -1,13 +1,15 @@ ''' -GPy Models -========== +.. module:: GPy.models Implementations for common models used in GP regression and classification. The different models can be viewed in :mod:`GPy.models_modules`, which holds detailed explanations for the different models. -:warning: This module is a convienince module for endusers to use. For developers -see :mod:`GPy.models_modules`, which holds the implementions for each model. +.. note:: + This module is a convienince module for endusers to use. For developers + see :mod:`GPy.models_modules`, which holds the implementions for each model.: + +.. moduleauthor:: Max Zwiessele ''' __updated__ = '2013-11-28' From c793e5d916eb6bca254099a1ae8eeb7f0103d3b7 Mon Sep 17 00:00:00 2001 From: mu Date: Wed, 11 Dec 2013 16:24:35 +0000 Subject: [PATCH 02/10] UY dkdtheta --- GPy/kern/parts/ODE_UY.py | 60 +++++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 13 deletions(-) diff --git a/GPy/kern/parts/ODE_UY.py b/GPy/kern/parts/ODE_UY.py index bb736cc5..3ddf174b 100644 --- a/GPy/kern/parts/ODE_UY.py +++ b/GPy/kern/parts/ODE_UY.py @@ -186,20 +186,29 @@ class ODE_UY(Kernpart): def dK_dtheta(self, dL_dK, X, X2, target): """derivative of the covariance matrix with respect to the parameters.""" - if X2 is None: X2 = X - dist = np.abs(X - X2.T) - + X,slices = X[:,:-1],index_to_slices(X[:,-1]) if X2 is None: X2,slices2 = X,slices else: X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) - - + #rdist = X[:,0][:,None] - X2[:,0][:,None].T + rdist = X - X2.T ly=1/self.lengthscaleY lu=np.sqrt(3)/self.lengthscaleU - #ly=self.lengthscaleY - #lu=self.lengthscaleU + + rd=rdist.shape[0] + dktheta1 = np.zeros([rd,rd]) + dktheta2 = np.zeros([rd,rd]) + dkdvar = np.zeros([rd,rd]) + + # dk dtheta for UU + UUdtheta1 = lambda dist: np.exp(-lu* dist)*dist + (-dist)*np.exp(-lu* dist)*(1+lu*dist) + UUdtheta2 = lambda dist: 0 + UUdvar = lambda dist: (1 + lu *dist) * np.exp(-lu* dist) + + + # dk dtheta for YY dk1theta1 = lambda dist: np.exp(-ly*dist)*2*(-lu)/(lu+ly)**3 #c=np.sqrt(3) @@ -216,7 +225,7 @@ class ODE_UY(Kernpart): dk3theta1 = lambda dist: np.exp(-dist*lu)*(lu+ly)**(-2)*((2*lu+ly+dist*lu**2+lu*ly*dist)*(-dist-2/(lu+ly))+2+2*lu*dist+ly*dist) - dktheta1 = lambda dist: self.varianceU*self.varianceY*(dk1theta1+dk2theta1+dk3theta1) + #dktheta1 = lambda dist: self.varianceU*self.varianceY*(dk1theta1+dk2theta1+dk3theta1) @@ -230,14 +239,20 @@ class ODE_UY(Kernpart): dk3theta2 = lambda dist: np.exp(-dist*lu) * (-3*lu-ly-dist*lu**2-lu*ly*dist)/(lu+ly)**3 - dktheta2 = lambda dist: self.varianceU*self.varianceY*(dk1theta2 + dk2theta2 +dk3theta2) + #dktheta2 = lambda dist: self.varianceU*self.varianceY*(dk1theta2 + dk2theta2 +dk3theta2) k1 = lambda dist: np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2 k2 = lambda dist: (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2 k3 = lambda dist: np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 ) - dkdvar = k1+k2+k3 + #dkdvar = k1+k2+k3 + + + # dk dtheta for UY + + + for i, s1 in enumerate(slices): @@ -246,16 +261,35 @@ class ODE_UY(Kernpart): for ss2 in s2: if i==0 and j==0: #target[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2])) + #dktheta1[ss1,ss2] = + #dktheta2[ss1,ss2] = + #dkdvar[ss1,ss2] = + dktheta1[ss1,ss2] = self.varianceU*self.varianceY*UUdtheta1(rdist[ss1,ss2]) + dktheta2[ss1,ss2] = 0 + dkdvar[ss1,ss2] = self.varianceY*UUdvar(rdist[ss1,ss2]) elif i==0 and j==1: #target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) ) + #dktheta1[ss1,ss2] = + #dktheta2[ss1,ss2] = + #dkdvar[ss1,ss2] = + dktheta1[ss1,ss2] = self.varianceU*self.varianceY*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))+dk3theta1(np.abs(rdist[ss1,ss2]))) + dktheta2[ss1,ss2] = self.varianceU*self.varianceY*(dk1theta2(np.abs(rdist[ss1,ss2])) + dk2theta2(np.abs(rdist[ss1,ss2])) +dk3theta2(np.abs(rdist[ss1,ss2]))) + dkdvar[ss1,ss2] = k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) elif i==1 and j==1: #target[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2])) + dktheta1[ss1,ss2] = self.varianceU*self.varianceY*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))+dk3theta1(np.abs(rdist[ss1,ss2]))) + dktheta2[ss1,ss2] = self.varianceU*self.varianceY*(dk1theta2(np.abs(rdist[ss1,ss2])) + dk2theta2(np.abs(rdist[ss1,ss2])) +dk3theta2(np.abs(rdist[ss1,ss2]))) + dkdvar[ss1,ss2] = k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) else: #target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[s1[0],s2[0]]) ) ) + #dktheta1[ss1,ss2] = + #dktheta2[ss1,ss2] = + #dkdvar[ss1,ss2] = + dktheta1[ss1,ss2] = self.varianceU*self.varianceY*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))+dk3theta1(np.abs(rdist[ss1,ss2]))) + dktheta2[ss1,ss2] = self.varianceU*self.varianceY*(dk1theta2(np.abs(rdist[ss1,ss2])) + dk2theta2(np.abs(rdist[ss1,ss2])) +dk3theta2(np.abs(rdist[ss1,ss2]))) + dkdvar[ss1,ss2] = k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) - - - + #stop target[0] += np.sum(self.varianceY*dkdvar * dL_dK) From 054b98d55b381281faeaf1631f231c43a3776059 Mon Sep 17 00:00:00 2001 From: mu Date: Fri, 13 Dec 2013 13:39:28 +0000 Subject: [PATCH 03/10] UY dkdtheta --- GPy/kern/parts/ODE_UY.py | 68 +++++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/GPy/kern/parts/ODE_UY.py b/GPy/kern/parts/ODE_UY.py index 3ddf174b..66f36e2f 100644 --- a/GPy/kern/parts/ODE_UY.py +++ b/GPy/kern/parts/ODE_UY.py @@ -114,20 +114,28 @@ class ODE_UY(Kernpart): Vu=self.varianceU Vy=self.varianceY + # kernel for kuu matern3/2 kuu = lambda dist:Vu * (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist)) + # kernel for kyy k1 = lambda dist:np.exp(-ly*np.abs(dist))*(2*lu+ly)/(lu+ly)**2 k2 = lambda dist:(np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2 k3 = lambda dist:np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 ) kyy = lambda dist:Vu*Vy*(k1(dist) + k2(dist) + k3(dist)) + + # cross covariance function kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly))) + + # cross covariance kyu kyup = lambda dist:Vu*Vy*(k1(dist)+k2(dist)) #t>0 kyu kyun = lambda dist:Vu*Vy*(kyu3(dist)) #t<0 kyu + # cross covariance kuy kuyp = lambda dist:Vu*Vy*(kyu3(dist)) #t>0 kuy kuyn = lambda dist:Vu*Vy*(k1(dist)+k2(dist)) #t<0 kuy + for i, s1 in enumerate(slices): for j, s2 in enumerate(slices2): for ss1 in s1: @@ -135,12 +143,13 @@ class ODE_UY(Kernpart): if i==0 and j==0: target[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2])) elif i==0 and j==1: - target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) ) + #target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) ) + target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[ss1,ss2]) ) ) elif i==1 and j==1: target[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2])) else: - target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[s1[0],s2[0]]) ) ) - + #target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[s1[0],s2[0]]) ) ) + target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[ss1,ss2]) ) ) #KUU = kuu(np.abs(rdist[:iu,:iu])) @@ -205,8 +214,8 @@ class ODE_UY(Kernpart): # dk dtheta for UU UUdtheta1 = lambda dist: np.exp(-lu* dist)*dist + (-dist)*np.exp(-lu* dist)*(1+lu*dist) UUdtheta2 = lambda dist: 0 - UUdvar = lambda dist: (1 + lu *dist) * np.exp(-lu* dist) - + #UUdvar = lambda dist: (1 + lu*dist)*np.exp(-lu*dist) + UUdvar = lambda dist: (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist)) # dk dtheta for YY @@ -241,18 +250,33 @@ class ODE_UY(Kernpart): #dktheta2 = lambda dist: self.varianceU*self.varianceY*(dk1theta2 + dk2theta2 +dk3theta2) - - + # kyy kernel + #k1 = lambda dist: np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2 + #k2 = lambda dist: (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2 + #k3 = lambda dist: np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 ) k1 = lambda dist: np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2 k2 = lambda dist: (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2 k3 = lambda dist: np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 ) #dkdvar = k1+k2+k3 + #cross covariance kernel + kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly))) # dk dtheta for UY + dkcrtheta2 = lambda dist: np.exp(-lu*dist) * ( (-1)*(lu+ly)**(-2)*(1+lu*dist+lu*(lu+ly)**(-1)) + (lu+ly)**(-1)*(-lu)*(lu+ly)**(-2) ) + dkcrtheta1 = lambda dist: np.exp(-lu*dist)*(lu+ly)**(-1)* ( (-dist)*(1+dist*lu+lu*(lu+ly)**(-1)) - (lu+ly)**(-1)*(1+dist*lu+lu*(lu+ly)**(-1)) +dist+(lu+ly)**(-1)-lu*(lu+ly)**(-2) ) + #dkuyp dtheta + #dkuyp dtheta1 = self.varianceU*self.varianceY* (dk1theta1() + dk2theta1()) + #dkuyp dtheta2 = self.varianceU*self.varianceY* (dk1theta2() + dk2theta2()) + #dkuyp dVar = k1() + k2() - + #dkyup dtheta + #dkyun dtheta1 = self.varianceU*self.varianceY* (dk1theta1() + dk2theta1()) + #dkyun dtheta2 = self.varianceU*self.varianceY* (dk1theta2() + dk2theta2()) + #dkyup dVar = k1() + k2() # + + for i, s1 in enumerate(slices): @@ -261,34 +285,28 @@ class ODE_UY(Kernpart): for ss2 in s2: if i==0 and j==0: #target[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2])) - #dktheta1[ss1,ss2] = - #dktheta2[ss1,ss2] = - #dkdvar[ss1,ss2] = - dktheta1[ss1,ss2] = self.varianceU*self.varianceY*UUdtheta1(rdist[ss1,ss2]) + dktheta1[ss1,ss2] = self.varianceU*self.varianceY*UUdtheta1(np.abs(rdist[ss1,ss2])) dktheta2[ss1,ss2] = 0 - dkdvar[ss1,ss2] = self.varianceY*UUdvar(rdist[ss1,ss2]) + dkdvar[ss1,ss2] = UUdvar(np.abs(rdist[ss1,ss2])) elif i==0 and j==1: #target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) ) #dktheta1[ss1,ss2] = #dktheta2[ss1,ss2] = - #dkdvar[ss1,ss2] = - dktheta1[ss1,ss2] = self.varianceU*self.varianceY*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))+dk3theta1(np.abs(rdist[ss1,ss2]))) - dktheta2[ss1,ss2] = self.varianceU*self.varianceY*(dk1theta2(np.abs(rdist[ss1,ss2])) + dk2theta2(np.abs(rdist[ss1,ss2])) +dk3theta2(np.abs(rdist[ss1,ss2]))) - dkdvar[ss1,ss2] = k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) + #dkdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) ) + dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , dkcrtheta1(np.abs(rdist[ss1,ss2])) ,self.varianceU*self.varianceY*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) ) + dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , dkcrtheta2(np.abs(rdist[ss1,ss2])) ,self.varianceU*self.varianceY*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) ) + dkdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyu3(np.abs(rdist[ss1,ss2])) ,k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2])) ) + #stop elif i==1 and j==1: #target[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2])) dktheta1[ss1,ss2] = self.varianceU*self.varianceY*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))+dk3theta1(np.abs(rdist[ss1,ss2]))) dktheta2[ss1,ss2] = self.varianceU*self.varianceY*(dk1theta2(np.abs(rdist[ss1,ss2])) + dk2theta2(np.abs(rdist[ss1,ss2])) +dk3theta2(np.abs(rdist[ss1,ss2]))) - dkdvar[ss1,ss2] = k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) + dkdvar[ss1,ss2] = (k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) ) else: #target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[s1[0],s2[0]]) ) ) - #dktheta1[ss1,ss2] = - #dktheta2[ss1,ss2] = - #dkdvar[ss1,ss2] = - dktheta1[ss1,ss2] = self.varianceU*self.varianceY*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))+dk3theta1(np.abs(rdist[ss1,ss2]))) - dktheta2[ss1,ss2] = self.varianceU*self.varianceY*(dk1theta2(np.abs(rdist[ss1,ss2])) + dk2theta2(np.abs(rdist[ss1,ss2])) +dk3theta2(np.abs(rdist[ss1,ss2]))) - dkdvar[ss1,ss2] = k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) - + dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.varianceU*self.varianceY*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) , dkcrtheta1(np.abs(rdist[ss1,ss2])) ) + dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.varianceU*self.varianceY*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) , dkcrtheta2(np.abs(rdist[ss1,ss2])) ) + dkdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2])), kyu3(np.abs(rdist[ss1,ss2])) ) #stop From 4d82f303676bf9698a81d50eae8bcc51d4f4fb3b Mon Sep 17 00:00:00 2001 From: Andreas Date: Fri, 13 Dec 2013 14:01:01 +0000 Subject: [PATCH 04/10] Small changes in svigp --- GPy/core/svigp.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/GPy/core/svigp.py b/GPy/core/svigp.py index c5ea9c6b..94edad93 100644 --- a/GPy/core/svigp.py +++ b/GPy/core/svigp.py @@ -52,7 +52,6 @@ class SVIGP(GPBase): self.Y = self.likelihood.Y.copy() self.Z = Z self.num_inducing = Z.shape[0] - self.batchcounter = 0 self.epochs = 0 self.iterations = 0 @@ -318,12 +317,12 @@ class SVIGP(GPBase): #Iterate! for i in range(iterations): - + #store the current configuration for plotting later self._param_trace.append(self._get_params()) self._ll_trace.append(self.log_likelihood() + self.log_prior()) - #load a batch + #load a batch and do the appropriate computations (kernel matrices, etc) self.load_batch() #compute the (stochastic) gradient @@ -333,7 +332,8 @@ class SVIGP(GPBase): #compute the steps in all parameters vb_step = self.vb_steplength*natgrads[0] - if (self.epochs>=1):#only move the parameters after the first epoch + #only move the parameters after the first epoch and only if the steplength is nonzero + if (self.epochs>=1) and (self.param_steplength > 0): param_step = self.momentum*param_step + self.param_steplength*grads else: param_step = 0. @@ -355,6 +355,8 @@ class SVIGP(GPBase): if self.epochs > 10: self._adapt_steplength() + self._vb_steplength_trace.append(self.vb_steplength) + self._param_steplength_trace.append(self.param_steplength) self.iterations += 1 @@ -363,17 +365,20 @@ class SVIGP(GPBase): if self.adapt_vb_steplength: # self._adaptive_vb_steplength() self._adaptive_vb_steplength_KL() - self._vb_steplength_trace.append(self.vb_steplength) - assert self.vb_steplength > 0 + #self._vb_steplength_trace.append(self.vb_steplength) + assert self.vb_steplength >= 0 if self.adapt_param_steplength: self._adaptive_param_steplength() # self._adaptive_param_steplength_log() # self._adaptive_param_steplength_from_vb() - self._param_steplength_trace.append(self.param_steplength) + #self._param_steplength_trace.append(self.param_steplength) def _adaptive_param_steplength(self): - decr_factor = 0.02 + if hasattr(self, 'adapt_param_steplength_decr'): + decr_factor = self.adapt_param_steplength_decr + else: + decr_factor = 0.02 g_tp = self._transform_gradients(self._log_likelihood_gradients()) self.gbar_tp = (1-1/self.tau_tp)*self.gbar_tp + 1/self.tau_tp * g_tp self.hbar_tp = (1-1/self.tau_tp)*self.hbar_tp + 1/self.tau_tp * np.dot(g_tp.T, g_tp) From fa08d20f583562e29ae1d1c5409a406f2da6d17c Mon Sep 17 00:00:00 2001 From: mu Date: Fri, 13 Dec 2013 14:10:03 +0000 Subject: [PATCH 05/10] ODE UY dkdtheta --- GPy/kern/parts/ODE_UY.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/GPy/kern/parts/ODE_UY.py b/GPy/kern/parts/ODE_UY.py index 66f36e2f..f6bfd37d 100644 --- a/GPy/kern/parts/ODE_UY.py +++ b/GPy/kern/parts/ODE_UY.py @@ -209,7 +209,8 @@ class ODE_UY(Kernpart): rd=rdist.shape[0] dktheta1 = np.zeros([rd,rd]) dktheta2 = np.zeros([rd,rd]) - dkdvar = np.zeros([rd,rd]) + dkUdvar = np.zeros([rd,rd]) + dkYdvar = np.zeros([rd,rd]) # dk dtheta for UU UUdtheta1 = lambda dist: np.exp(-lu* dist)*dist + (-dist)*np.exp(-lu* dist)*(1+lu*dist) @@ -287,7 +288,8 @@ class ODE_UY(Kernpart): #target[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2])) dktheta1[ss1,ss2] = self.varianceU*self.varianceY*UUdtheta1(np.abs(rdist[ss1,ss2])) dktheta2[ss1,ss2] = 0 - dkdvar[ss1,ss2] = UUdvar(np.abs(rdist[ss1,ss2])) + dkUdvar[ss1,ss2] = UUdvar(np.abs(rdist[ss1,ss2])) + dkYdvar[ss1,ss2] = 0 elif i==0 and j==1: #target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) ) #dktheta1[ss1,ss2] = @@ -295,23 +297,24 @@ class ODE_UY(Kernpart): #dkdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) ) dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , dkcrtheta1(np.abs(rdist[ss1,ss2])) ,self.varianceU*self.varianceY*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) ) dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , dkcrtheta2(np.abs(rdist[ss1,ss2])) ,self.varianceU*self.varianceY*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) ) - dkdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyu3(np.abs(rdist[ss1,ss2])) ,k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2])) ) - #stop + dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyu3(np.abs(rdist[ss1,ss2])) ,k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2])) ) + dkYdvar[ss1,ss2] = dkUdvar[ss1,ss2] elif i==1 and j==1: #target[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2])) dktheta1[ss1,ss2] = self.varianceU*self.varianceY*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))+dk3theta1(np.abs(rdist[ss1,ss2]))) dktheta2[ss1,ss2] = self.varianceU*self.varianceY*(dk1theta2(np.abs(rdist[ss1,ss2])) + dk2theta2(np.abs(rdist[ss1,ss2])) +dk3theta2(np.abs(rdist[ss1,ss2]))) - dkdvar[ss1,ss2] = (k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) ) + dkUdvar[ss1,ss2] = (k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) ) + dkYdvar[ss1,ss2] = dkUdvar[ss1,ss2] else: #target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[s1[0],s2[0]]) ) ) dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.varianceU*self.varianceY*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) , dkcrtheta1(np.abs(rdist[ss1,ss2])) ) dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.varianceU*self.varianceY*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) , dkcrtheta2(np.abs(rdist[ss1,ss2])) ) - dkdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2])), kyu3(np.abs(rdist[ss1,ss2])) ) - #stop + dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2])), kyu3(np.abs(rdist[ss1,ss2])) ) + dkYdvar[ss1,ss2] = dkUdvar[ss1,ss2] - target[0] += np.sum(self.varianceY*dkdvar * dL_dK) - target[1] += np.sum(self.varianceU*dkdvar * dL_dK) + target[0] += np.sum(self.varianceY*dkUdvar * dL_dK) + target[1] += np.sum(self.varianceU*dkYdvar * dL_dK) target[2] += np.sum(dktheta1*(-np.sqrt(3)*self.lengthscaleU**(-2)) * dL_dK) target[3] += np.sum(dktheta2*(-self.lengthscaleY**(-2)) * dL_dK) From 415e3256c0767a938ecab3e855a3d2d2c85d2adf Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 16 Dec 2013 11:35:47 +0000 Subject: [PATCH 06/10] subarray indexing --- GPy/util/subarray_and_sorting.py | 56 ++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 GPy/util/subarray_and_sorting.py diff --git a/GPy/util/subarray_and_sorting.py b/GPy/util/subarray_and_sorting.py new file mode 100644 index 00000000..49385771 --- /dev/null +++ b/GPy/util/subarray_and_sorting.py @@ -0,0 +1,56 @@ +''' +.. module:: GPy.util.subarray_and_sorting + +.. moduleauthor:: Max Zwiessele + +''' +__updated__ = '2013-12-02' + +import numpy as np + +def common_subarrays(X, axis=0): + """ + Find common subarrays of 2 dimensional X, where axis is the axis to apply the search over. + Common subarrays are returned as a dictionary of pairs, where + the subarray is a tuple representing the subarray and the index is the index + for the subarray in X, where index is the index to the remaining axis. + + :param :class:`np.ndarray` X: 2d array to check for common subarrays in + :param int axis: axis to apply subarray detection over. + When the index is 0, compare rows, columns, otherwise. + + Examples: + ========= + + In a 2d array: + >>> import numpy as np + >>> X = np.zeros((3,6), dtype=bool) + >>> X[[1,1,1],[0,4,5]] = 1; X[1:,[2,3]] = 1 + >>> X + array([[False, False, False, False, False, False], + [ True, False, True, True, True, True], + [False, False, True, True, False, False]], dtype=bool) + >>> d = common_subarrays(X,axis=1) + >>> len(d) + 3 + >>> X[:, d[tuple(X[:,0])]] + array([[False, False, False], + [ True, True, True], + [False, False, False]], dtype=bool) + >>> d[tuple(X[:,4])] == d[tuple(X[:,0])] == [0, 4, 5] + True + >>> d[tuple(X[:,1])] + [1] + """ + from collections import defaultdict + from itertools import count + from operator import iadd + assert X.ndim == 2 and axis in (0,1), "Only implemented for 2D arrays" + subarrays = defaultdict(list) + cnt = count() + np.apply_along_axis(lambda x: iadd(subarrays[tuple(x)], [cnt.next()]), 1-axis, X) + return subarrays + +if __name__ == '__main__': + import doctest + doctest.testmod() \ No newline at end of file From 60b299bd5d12c5453ef94e989427bcf76b8302f3 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 16 Dec 2013 11:36:01 +0000 Subject: [PATCH 07/10] diagonal operations --- GPy/util/diag.py | 114 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 GPy/util/diag.py diff --git a/GPy/util/diag.py b/GPy/util/diag.py new file mode 100644 index 00000000..3d6b4dc9 --- /dev/null +++ b/GPy/util/diag.py @@ -0,0 +1,114 @@ +''' +.. module:: GPy.util.diag + +.. moduleauthor:: Max Zwiessele + +''' +__updated__ = '2013-12-03' + +import numpy as np + +def view(A, offset=0): + """ + Get a view on the diagonal elements of a 2D array. + + This is actually a view (!) on the diagonal of the array, so you can + in-place adjust the view. + + :param :class:`ndarray` A: 2 dimensional numpy array + :param int offset: view offset to give back (negative entries allowed) + :rtype: :class:`ndarray` view of diag(A) + + >>> import numpy as np + >>> X = np.arange(9).reshape(3,3) + >>> view(X) + array([0, 4, 8]) + >>> d = view(X) + >>> d += 2 + >>> view(X) + array([ 2, 6, 10]) + >>> view(X, offset=-1) + array([3, 7]) + >>> subtract(X, 3, offset=-1) + array([[ 2, 1, 2], + [ 0, 6, 5], + [ 6, 4, 10]]) + """ + from numpy.lib.stride_tricks import as_strided + assert A.ndim == 2, "only implemented for 2 dimensions" + assert A.shape[0] == A.shape[1], "attempting to get the view of non-square matrix?!" + if offset > 0: + return as_strided(A[0, offset:], shape=(A.shape[0] - offset, ), strides=((A.shape[0]+1)*A.itemsize, )) + elif offset < 0: + return as_strided(A[-offset:, 0], shape=(A.shape[0] + offset, ), strides=((A.shape[0]+1)*A.itemsize, )) + else: + return as_strided(A, shape=(A.shape[0], ), strides=((A.shape[0]+1)*A.itemsize, )) + +def _diag_ufunc(A,b,offset,func): + dA = view(A, offset); func(dA,b,dA) + return A + +def times(A, b, offset=0): + """ + Times the view of A with b in place (!). + Returns modified A + Broadcasting is allowed, thus b can be scalar. + + if offset is not zero, make sure b is of right shape! + + :param ndarray A: 2 dimensional array + :param ndarray-like b: either one dimensional or scalar + :param int offset: same as in view. + :rtype: view of A, which is adjusted inplace + """ + return _diag_ufunc(A, b, offset, np.multiply) +multiply = times + +def divide(A, b, offset=0): + """ + Divide the view of A by b in place (!). + Returns modified A + Broadcasting is allowed, thus b can be scalar. + + if offset is not zero, make sure b is of right shape! + + :param ndarray A: 2 dimensional array + :param ndarray-like b: either one dimensional or scalar + :param int offset: same as in view. + :rtype: view of A, which is adjusted inplace + """ + return _diag_ufunc(A, b, offset, np.divide) + +def add(A, b, offset=0): + """ + Add b to the view of A in place (!). + Returns modified A. + Broadcasting is allowed, thus b can be scalar. + + if offset is not zero, make sure b is of right shape! + + :param ndarray A: 2 dimensional array + :param ndarray-like b: either one dimensional or scalar + :param int offset: same as in view. + :rtype: view of A, which is adjusted inplace + """ + return _diag_ufunc(A, b, offset, np.add) + +def subtract(A, b, offset=0): + """ + Subtract b from the view of A in place (!). + Returns modified A. + Broadcasting is allowed, thus b can be scalar. + + if offset is not zero, make sure b is of right shape! + + :param ndarray A: 2 dimensional array + :param ndarray-like b: either one dimensional or scalar + :param int offset: same as in view. + :rtype: view of A, which is adjusted inplace + """ + return _diag_ufunc(A, b, offset, np.subtract) + +if __name__ == '__main__': + import doctest + doctest.testmod() \ No newline at end of file From f9c9e8e1d5177376a258cd8a937396ac04279654 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 16 Dec 2013 11:36:23 +0000 Subject: [PATCH 08/10] ppca added, ppca missing data not working yet --- GPy/util/linalg.py | 122 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 120 insertions(+), 2 deletions(-) diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index e3e421f6..842178e2 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -217,7 +217,7 @@ def multiple_pdinv(A): return np.dstack(invs), np.array(halflogdets) -def PCA(Y, input_dim): +def pca(Y, input_dim): """ Principal component analysis: maximum likelihood solution by SVD @@ -230,7 +230,7 @@ def PCA(Y, input_dim): """ if not np.allclose(Y.mean(axis=0), 0.0): - print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)" + print "Y is not zero mean, centering it locally (GPy.util.linalg.pca)" # Y -= Y.mean(axis=0) @@ -241,6 +241,124 @@ def PCA(Y, input_dim): W *= v; return X, W.T +def ppca(Y, Q, iterations=100): + """ + EM implementation for probabilistic pca. + + :param array-like Y: Observed Data + :param int Q: Dimensionality for reduced array + :param int iterations: number of iterations for EM + """ + from numpy.ma import dot as madot + N, D = Y.shape + # Initialise W randomly + W = np.random.randn(D, Q) * 1e-3 + Y = np.ma.masked_invalid(Y, copy=0) + mu = Y.mean(0) + Ycentered = Y - mu + try: + for _ in range(iterations): + exp_x = np.asarray_chkfinite(np.linalg.solve(W.T.dot(W), madot(W.T, Ycentered.T))).T + W = np.asarray_chkfinite(np.linalg.solve(exp_x.T.dot(exp_x), madot(exp_x.T, Ycentered))).T + except np.linalg.linalg.LinAlgError: + #"converged" + pass + return np.asarray_chkfinite(exp_x), np.asarray_chkfinite(W) + +def ppca_missing_data_at_random(Y, Q, iters=100): + """ + EM implementation of Probabilistic pca for when there is missing data. + + Taken from + + .. math: + \\mathbf{Y} = \mathbf{XW} + \\epsilon \\text{, where} + \\epsilon = \\mathcal{N}(0, \\sigma^2 \mathbf{I}) + + :returns: X, W, sigma^2 + """ + from numpy.ma import dot as madot + import diag + from GPy.util.subarray_and_sorting import common_subarrays + import time + debug = 1 + # Initialise W randomly + N, D = Y.shape + W = np.random.randn(Q, D) * 1e-3 + Y = np.ma.masked_invalid(Y, copy=1) + nu = 1. + #num_obs_i = 1./Y.count() + Ycentered = Y - Y.mean(0) + + X = np.zeros((N,Q)) + cs = common_subarrays(Y.mask) + cr = common_subarrays(Y.mask, 1) + Sigma = np.zeros((N, Q, Q)) + Sigma2 = np.zeros((N, Q, Q)) + mu = np.zeros(D) + if debug: + import matplotlib.pyplot as pylab + fig = pylab.figure("FIT MISSING DATA"); + ax = fig.gca() + ax.cla() + lines = pylab.plot(np.zeros((N,Q)).dot(W)) + W2 = np.zeros((Q,D)) + + for i in range(iters): +# Sigma = np.linalg.solve(diag.add(madot(W,W.T), nu), diag.times(np.eye(Q),nu)) +# exp_x = madot(madot(Ycentered, W.T),Sigma)/nu +# Ycentered = (Y - exp_x.dot(W).mean(0)) +# #import ipdb;ipdb.set_trace() +# #Ycentered = mu +# W = np.linalg.solve(madot(exp_x.T,exp_x) + Sigma, madot(exp_x.T, Ycentered)) +# nu = (((Ycentered - madot(exp_x, W))**2).sum(0) + madot(W.T,madot(Sigma,W)).sum(0)).sum()/N + for csi, (mask, index) in enumerate(cs.iteritems()): + mask = ~np.array(mask) + Sigma2[index, :, :] = nu * np.linalg.inv(diag.add(W2[:,mask].dot(W2[:,mask].T), nu)) + #X[index,:] = madot((Sigma[csi]/nu),madot(W,Ycentered[index].T))[:,0] + X2 = ((Sigma2/nu) * (madot(Ycentered,W2.T).base)[:,:,None]).sum(-1) + mu2 = (Y - X.dot(W)).mean(0) + for n in range(N): + Sigma[n] = nu * np.linalg.inv(diag.add(W[:,~Y.mask[n]].dot(W[:,~Y.mask[n]].T), nu)) + X[n, :] = (Sigma[n]/nu).dot(W[:,~Y.mask[n]].dot(Ycentered[n,~Y.mask[n]].T)) + for d in range(D): + mu[d] = (Y[~Y.mask[:,d], d] - X[~Y.mask[:,d]].dot(W[:, d])).mean() + Ycentered = (Y - mu) + nu3 = 0. + for cri, (mask, index) in enumerate(cr.iteritems()): + mask = ~np.array(mask) + W2[:,index] = np.linalg.solve(X[mask].T.dot(X[mask]) + Sigma[mask].sum(0), madot(X[mask].T, Ycentered[mask,index]))[:,None] + W2[:,index] = np.linalg.solve(X.T.dot(X) + Sigma.sum(0), madot(X.T, Ycentered[:,index])) + #nu += (((Ycentered[mask,index] - X[mask].dot(W[:,index]))**2).sum(0) + W[:,index].T.dot(Sigma[mask].sum(0).dot(W[:,index])).sum(0)).sum() + nu3 += (((Ycentered[index] - X.dot(W[:,index]))**2).sum(0) + W[:,index].T.dot(Sigma.sum(0).dot(W[:,index])).sum(0)).sum() + nu3 /= N + nu = 0. + nu2 = 0. + W = np.zeros((Q,D)) + for j in range(D): + W[:,j] = np.linalg.solve(X[~Y.mask[:,j]].T.dot(X[~Y.mask[:,j]]) + Sigma[~Y.mask[:,j]].sum(0), madot(X[~Y.mask[:,j]].T, Ycentered[~Y.mask[:,j],j])) + nu2f = np.tensordot(W[:,j].T, Sigma[~Y.mask[:,j],:,:], [0,1]).dot(W[:,j]) + nu2s = W[:,j].T.dot(Sigma[~Y.mask[:,j],:,:].sum(0).dot(W[:,j])) + nu2 += (((Ycentered[~Y.mask[:,j],j] - X[~Y.mask[:,j],:].dot(W[:,j]))**2) + nu2f).sum() + for i in range(N): + if not Y.mask[i,j]: + nu += ((Ycentered[i,j] - X[i,:].dot(W[:,j]))**2) + W[:,j].T.dot(Sigma[i,:,:].dot(W[:,j])) + nu /= N + nu2 /= N + nu4 = (((Ycentered - X.dot(W))**2).sum(0) + W.T.dot(Sigma.sum(0).dot(W)).sum(0)).sum()/N + import ipdb;ipdb.set_trace() + if debug: + #print Sigma[0] + print "nu:", nu, "sum(X):", X.sum() + pred_y = X.dot(W) + for x, l in zip(pred_y.T, lines): + l.set_ydata(x) + ax.autoscale_view() + ax.set_ylim(pred_y.min(), pred_y.max()) + fig.canvas.draw() + time.sleep(.3) + return np.asarray_chkfinite(X), np.asarray_chkfinite(W), nu + def tdot_numpy(mat, out=None): return np.dot(mat, mat.T, out) From a6725b55e11e68ba5d97ac37bd2a4e34864031f6 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 16 Dec 2013 11:37:42 +0000 Subject: [PATCH 09/10] pca adjustements to lvm models --- GPy/models_modules/bayesian_gplvm.py | 92 +++++++++++++++++++++++----- GPy/models_modules/gplvm.py | 21 +++---- GPy/models_modules/mrd.py | 14 ++--- 3 files changed, 94 insertions(+), 33 deletions(-) diff --git a/GPy/models_modules/bayesian_gplvm.py b/GPy/models_modules/bayesian_gplvm.py index 90e54111..57e50955 100644 --- a/GPy/models_modules/bayesian_gplvm.py +++ b/GPy/models_modules/bayesian_gplvm.py @@ -2,17 +2,18 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np +import itertools +from matplotlib import pyplot + from ..core.sparse_gp import SparseGP from ..likelihoods import Gaussian from .. import kern -import itertools -from matplotlib.colors import colorConverter -from GPy.inference.optimization import SCG -from GPy.util import plot_latent, linalg -from .gplvm import GPLVM -from GPy.util.plot_latent import most_significant_input_dimensions -from matplotlib import pyplot -from GPy.core.model import Model +from ..inference.optimization import SCG +from ..util import plot_latent, linalg +from .gplvm import GPLVM, initialise_latent +from ..util.plot_latent import most_significant_input_dimensions +from ..core.model import Model +from ..util.subarray_and_sorting import common_subarrays class BayesianGPLVM(SparseGP, GPLVM): """ @@ -34,7 +35,7 @@ class BayesianGPLVM(SparseGP, GPLVM): likelihood = likelihood_or_Y if X == None: - X = self.initialise_latent(init, input_dim, likelihood.Y) + X = initialise_latent(init, input_dim, likelihood.Y) self.init = init if X_variance is None: @@ -308,14 +309,36 @@ class BayesianGPLVMWithMissingData(Model): :type init: 'PCA' | 'random' """ def __init__(self, likelihood_or_Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, - Z=None, kernel=None, missing=np.nan, **kwargs): + Z=None, kernel=None, **kwargs): + #======================================================================= + # Filter Y, such that same missing data is at same positions. + # If full rows are missing, delete them entirely! if type(likelihood_or_Y) is np.ndarray: - likelihood = Gaussian(likelihood_or_Y) + Y = likelihood_or_Y + likelihood = Gaussian + params = 1. + normalize=None else: - likelihood = likelihood_or_Y + Y = likelihood_or_Y.Y + likelihood = likelihood_or_Y.__class__ + params = likelihood_or_Y._get_params() + if isinstance(likelihood_or_Y, Gaussian): + normalize = True + scale = likelihood_or_Y._scale + offset = likelihood_or_Y._offset + # Get common subrows + filter_ = np.isnan(Y) + self.subarray_indices = common_subarrays(filter_,axis=1) + likelihoods = [likelihood(Y[~np.array(v,dtype=bool),:][:,ind]) for v,ind in self.subarray_indices.iteritems()] + for l in likelihoods: + l._set_params(params) + if normalize: # get normalization in common + l._scale = scale + l._offset = offset + #======================================================================= if X == None: - X = self.initialise_latent(init, input_dim, likelihood.Y) + X = initialise_latent(init, input_dim, Y[:,np.any(np.isnan(Y),1)]) self.init = init if X_variance is None: @@ -328,13 +351,52 @@ class BayesianGPLVMWithMissingData(Model): if kernel is None: kernel = kern.rbf(input_dim) # + kern.white(input_dim) - SparseGP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs) + self.submodels = [BayesianGPLVM(l, input_dim, X, X_variance, init, num_inducing, Z, kernel) for l in likelihoods] + self.gref = self.submodels[0] + #:type self.gref: BayesianGPLVM self.ensure_default_constraints() + def log_likelihood(self): + ll = -self.gref.KL_divergence() + for g in self.submodels: + ll += SparseGP.log_likelihood(g) + return ll + + def _log_likelihood_gradients(self): + dLdmu, dLdS = reduce(lambda a, b: [a[0] + b[0], a[1] + b[1]], (g.dL_dmuS() for g in self.bgplvms)) + dKLmu, dKLdS = self.gref.dKL_dmuS() + dLdmu -= dKLmu + dLdS -= dKLdS + dLdmuS = np.hstack((dLdmu.flatten(), dLdS.flatten())).flatten() + dldzt1 = reduce(lambda a, b: a + b, (SparseGP._log_likelihood_gradients(g)[:self.gref.num_inducing*self.gref.input_dim] for g in self.submodels)) + + return np.hstack((dLdmuS, + dldzt1, + np.hstack([np.hstack([g.dL_dtheta(), + g.likelihood._gradients(\ + partial=g.partial_for_likelihood)]) \ + for g in self.submodels]))) + + def getstate(self): + return Model.getstate(self)+[self.submodels,self.subarray_indices] + + def setstate(self, state): + self.subarray_indices = state.pop() + self.submodels = state.pop() + self.gref = self.submodels[0] + Model.setstate(self, state) + self._set_params(self._get_params()) + def _get_param_names(self): X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) - return (X_names + S_names + SparseGP._get_param_names(self)) + return (X_names + S_names + SparseGP._get_param_names(self.gref)) + + def _get_params(self): + return self.gref._get_params() + def _set_params(self, x): + [g._set_params(x) for g in self.submodels] + pass diff --git a/GPy/models_modules/gplvm.py b/GPy/models_modules/gplvm.py index f27f861c..541b3176 100644 --- a/GPy/models_modules/gplvm.py +++ b/GPy/models_modules/gplvm.py @@ -10,6 +10,13 @@ from ..core import GP from ..likelihoods import Gaussian from .. import util +def initialise_latent(init, input_dim, Y): + Xr = np.random.randn(Y.shape[0], input_dim) + if init == 'pca': + from ..util.linalg import pca + PC = pca(Y, input_dim)[0] + Xr[:PC.shape[0], :PC.shape[1]] = PC + return Xr class GPLVM(GP): """ @@ -20,12 +27,12 @@ class GPLVM(GP): :param input_dim: latent dimensionality :type input_dim: int :param init: initialisation method for the latent space - :type init: 'PCA'|'random' + :type init: 'pca'|'random' """ - def __init__(self, Y, input_dim, init='PCA', X=None, kernel=None, normalize_Y=False): + def __init__(self, Y, input_dim, init='pca', X=None, kernel=None, normalize_Y=False): if X is None: - X = self.initialise_latent(init, input_dim, Y) + X = initialise_latent(init, input_dim, Y) if kernel is None: kernel = kern.rbf(input_dim, ARD=input_dim > 1) + kern.bias(input_dim, np.exp(-2)) likelihood = Gaussian(Y, normalize=normalize_Y, variance=np.exp(-2.)) @@ -33,14 +40,6 @@ class GPLVM(GP): self.set_prior('.*X', priors.Gaussian(0, 1)) self.ensure_default_constraints() - def initialise_latent(self, init, input_dim, Y): - Xr = np.random.randn(Y.shape[0], input_dim) - if init == 'PCA': - from ..util.linalg import PCA - PC = PCA(Y, input_dim)[0] - Xr[:PC.shape[0], :PC.shape[1]] = PC - return Xr - def _get_param_names(self): return sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) + GP._get_param_names(self) diff --git a/GPy/models_modules/mrd.py b/GPy/models_modules/mrd.py index b9c99a64..2376993d 100644 --- a/GPy/models_modules/mrd.py +++ b/GPy/models_modules/mrd.py @@ -5,7 +5,7 @@ Created on 10 Apr 2013 ''' from GPy.core import Model from GPy.core import SparseGP -from GPy.util.linalg import PCA +from GPy.util.linalg import pca import numpy import itertools import pylab @@ -26,8 +26,8 @@ class MRD(Model): :type input_dim: int :param initx: initialisation method for the latent space : - * 'concat' - PCA on concatenation of all datasets - * 'single' - Concatenation of PCA on datasets, respectively + * 'concat' - pca on concatenation of all datasets + * 'single' - Concatenation of pca on datasets, respectively * 'random' - Random draw from a normal :type initx: ['concat'|'single'|'random'] @@ -42,7 +42,7 @@ class MRD(Model): """ def __init__(self, likelihood_or_Y_list, input_dim, num_inducing=10, names=None, - kernels=None, initx='PCA', + kernels=None, initx='pca', initz='permute', _debug=False, **kw): if names is None: self.names = ["{}".format(i) for i in range(len(likelihood_or_Y_list))] @@ -237,7 +237,7 @@ class MRD(Model): partial=g.partial_for_likelihood)]) \ for g in self.bgplvms]))) - def _init_X(self, init='PCA', likelihood_list=None): + def _init_X(self, init='pca', likelihood_list=None): if likelihood_list is None: likelihood_list = self.likelihood_list Ylist = [] @@ -248,11 +248,11 @@ class MRD(Model): Ylist.append(likelihood_or_Y.Y) del likelihood_list if init in "PCA_concat": - X = PCA(numpy.hstack(Ylist), self.input_dim)[0] + X = pca(numpy.hstack(Ylist), self.input_dim)[0] elif init in "PCA_single": X = numpy.zeros((Ylist[0].shape[0], self.input_dim)) for qs, Y in itertools.izip(numpy.array_split(numpy.arange(self.input_dim), len(Ylist)), Ylist): - X[:, qs] = PCA(Y, len(qs))[0] + X[:, qs] = pca(Y, len(qs))[0] else: # init == 'random': X = numpy.random.randn(Ylist[0].shape[0], self.input_dim) self.X = X From 23ff53f7f81f61723454e3ce1250246156120a1b Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 16 Dec 2013 11:39:39 +0000 Subject: [PATCH 10/10] BGPLVM with missing data --- GPy/core/gp_base.py | 6 +-- GPy/examples/dimensionality_reduction.py | 65 +++++++++++++++++++++++- GPy/models.py | 2 +- 3 files changed, 67 insertions(+), 6 deletions(-) diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py index 548e2924..981ebbbb 100644 --- a/GPy/core/gp_base.py +++ b/GPy/core/gp_base.py @@ -218,8 +218,8 @@ class GPBase(Model): Y = self.likelihood.data for d in which_data_ycols: m_d = m[:,d].reshape(resolution, resolution).T - ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) - ax.scatter(self.X[which_data_rows, free_dims[0]], self.X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) + contour = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) + scatter = ax.scatter(self.X[which_data_rows, free_dims[0]], self.X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) #set the limits of the plot to some sensible values ax.set_xlim(xmin[0], xmax[0]) @@ -227,7 +227,7 @@ class GPBase(Model): if samples: warnings.warn("Samples are rather difficult to plot for 2D inputs...") - + return contour, scatter else: raise NotImplementedError, "Cannot define a frame with more than two input dimensions" diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 9120805c..1a199519 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -52,6 +52,67 @@ def bgplvm_test_model(seed=default_seed, optimize=0, verbose=1, plot=0): return m +def bgplvm_test_model_missing_data(seed=default_seed, optimize=0, verbose=1, plot=0): + """ + model for testing purposes. Samples from a GP with rbf kernel and learns + the samples with a new kernel. Normally not for optimization, just model cheking + """ + from GPy.likelihoods.gaussian import Gaussian + import GPy, numpy as np + + num_inputs = 13 + num_inducing = 5 + if plot: + output_dim = 1 + input_dim = 2 + else: + input_dim = 2 + output_dim = 25 + + # generate GPLVM-like data + X = _np.random.rand(num_inputs, input_dim) + lengthscales = _np.random.rand(input_dim) + k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True) + + GPy.kern.white(input_dim, 0.01)) + K = k.K(X) + Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, output_dim).T + lik = Gaussian(Y, normalize=True) + + k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) + # k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) + # k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001) + # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True) + # k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0) + # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True) + + m = GPy.models.BayesianGPLVM(lik, input_dim, kernel=k, num_inducing=num_inducing) + #=========================================================================== + # randomly obstruct data with percentage p + p = .8 + Y_obstruct = Y.copy() + Y_obstruct[np.random.uniform(size=(Y.shape)) < p] = np.nan + #=========================================================================== + m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing) + m.lengthscales = lengthscales + + if plot: + import matplotlib.pyplot as pb + m.plot() + pb.title('PCA initialisation') + m2.plot() + pb.title('PCA initialisation') + + if optimize: + m.optimize('scg', messages=verbose) + m2.optimize('scg', messages=verbose) + if plot: + m.plot() + pb.title('After optimisation') + m2.plot() + pb.title('After optimisation') + + return m, m2 + def gplvm_oil_100(optimize=1, verbose=1, plot=1): import GPy data = GPy.util.datasets.oil_100() @@ -205,7 +266,7 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False): Ylist = [Y1, Y2, Y3] if plot_sim: - import pylab + import pylab, matplotlib.cm as cm import itertools fig = pylab.figure("MRD Simulation Data", figsize=(8, 6)) fig.clf() @@ -216,7 +277,7 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False): ax.legend() for i, Y in enumerate(Ylist): ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i) - ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable + ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable ax.set_title("Y{}".format(i + 1)) pylab.draw() pylab.tight_layout() diff --git a/GPy/models.py b/GPy/models.py index 76d14819..0aea59a0 100644 --- a/GPy/models.py +++ b/GPy/models.py @@ -14,7 +14,7 @@ detailed explanations for the different models. __updated__ = '2013-11-28' -from models_modules.bayesian_gplvm import BayesianGPLVM +from models_modules.bayesian_gplvm import BayesianGPLVM, BayesianGPLVMWithMissingData from models_modules.gp_regression import GPRegression from models_modules.gp_classification import GPClassification#; _gp_classification = gp_classification ; del gp_classification from models_modules.sparse_gp_regression import SparseGPRegression#; _sparse_gp_regression = sparse_gp_regression ; del sparse_gp_regression