From cb082898d335aea8be110e615a9666065669da4e Mon Sep 17 00:00:00 2001 From: James Hensman Date: Mon, 11 Mar 2013 18:56:37 +0000 Subject: [PATCH 1/9] added trace_sum for efficiency --- GPy/models/sparse_GP.py | 6 +++--- GPy/util/linalg.py | 7 +++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py index f70938c9..4846bf8a 100644 --- a/GPy/models/sparse_GP.py +++ b/GPy/models/sparse_GP.py @@ -3,7 +3,7 @@ import numpy as np import pylab as pb -from ..util.linalg import mdot, jitchol, chol_inv, pdinv +from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot from ..util.plot import gpplot from .. import kern from GP import GP @@ -107,7 +107,7 @@ class sparse_GP(GP): self.C = np.dot(tmp,tmp.T) #self.C = mdot(self.Lmi.T, self.Bi, self.Lmi) #self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T) - tmp = np.dot(self.C,self.psi1V/sf) + tmp = np.dot(self.C/sf,self.psi1V) self.E = np.dot(tmp,tmp.T) # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case @@ -156,7 +156,7 @@ class sparse_GP(GP): beta = self.likelihood.precision dbeta = 0.5 * self.N*self.D/beta - 0.5 * np.sum(np.square(self.likelihood.Y)) dbeta += - 0.5 * self.D * (self.psi0.sum() - np.trace(self.A)/beta*sf2) - dbeta += - 0.5 * self.D * np.sum(self.Bi*self.A)/beta + dbeta += - 0.5 * self.D * trace_dot(self.Bi,self.A)/beta dbeta += np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/beta self.partial_for_likelihood = -dbeta*self.likelihood.precision**2 diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index 26105789..cf023284 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -14,6 +14,13 @@ import types #import scipy.lib.lapack.flapack import scipy as sp +def trace_dot(a,b): + """ + efficiently compute the trace of the matrix product of a and b + """ + assert a.shape==b.T.shape + return np.dot(a.flatten(),b.T.flatten()) + def mdot(*args): """Multiply all the arguments using matrix product rules. The output is equivalent to multiplying the arguments one by one From d3c87feffadbbc281f16cb470b0c5cae6731e923 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Mon, 11 Mar 2013 19:43:46 +0000 Subject: [PATCH 2/9] some messing with the linear algebra in sparse_GP. This should be more efficient... let's hope nothing breaks --- GPy/models/sparse_GP.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py index 4846bf8a..f5279f86 100644 --- a/GPy/models/sparse_GP.py +++ b/GPy/models/sparse_GP.py @@ -102,17 +102,16 @@ class sparse_GP(GP): self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) self.psi1V = np.dot(self.psi1, self.V) - self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T) tmp = np.dot(self.Lmi.T, self.LBi.T) self.C = np.dot(tmp,tmp.T) - #self.C = mdot(self.Lmi.T, self.Bi, self.Lmi) - #self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T) - tmp = np.dot(self.C/sf,self.psi1V) - self.E = np.dot(tmp,tmp.T) + self.Cpsi1V = np.dot(self.C,self.psi1V) + self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T) + self.E = np.dot(self.Cpsi1VVpsi1,self.C)/sf2 # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten() - self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T + #self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T + self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T) if self.likelihood.is_heteroscedastic: if self.has_uncertain_inputs: self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB @@ -139,7 +138,7 @@ class sparse_GP(GP): # Compute dL_dKmm self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC - self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD + self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD #the partial derivative vector for the likelihood if self.likelihood.Nparams ==0: @@ -157,7 +156,7 @@ class sparse_GP(GP): dbeta = 0.5 * self.N*self.D/beta - 0.5 * np.sum(np.square(self.likelihood.Y)) dbeta += - 0.5 * self.D * (self.psi0.sum() - np.trace(self.A)/beta*sf2) dbeta += - 0.5 * self.D * trace_dot(self.Bi,self.A)/beta - dbeta += np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/beta + dbeta += np.trace(self.Cpsi1VVpsi1)/beta - 0.5 * trace_dot(np.dot(self.C,self.psi2_beta_scaled) , self.Cpsi1VVpsi1 )/beta self.partial_for_likelihood = -dbeta*self.likelihood.precision**2 @@ -198,7 +197,7 @@ class sparse_GP(GP): A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.likelihood.precision)) -0.5*self.likelihood.precision*self.likelihood.trYYT B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2) C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2)) - D = +0.5*np.sum(self.psi1VVpsi1 * self.C) + D = 0.5*np.trace(self.Cpsi1VVpsi1) return A+B+C+D def _log_likelihood_gradients(self): From 7e4b460cdbd4e37597efc9aa6403ed084b09e020 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Tue, 12 Mar 2013 09:18:15 +0000 Subject: [PATCH 3/9] more messing with the linear algebra in sparse_GP --- GPy/models/sparse_GP.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py index f5279f86..be451c12 100644 --- a/GPy/models/sparse_GP.py +++ b/GPy/models/sparse_GP.py @@ -137,8 +137,9 @@ class sparse_GP(GP): # Compute dL_dKmm self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB - self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC - self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD + #self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC + #self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD + self.dL_dKmm += 0.5*(self.D*(self.C/sf2 -self.Kmmi) + self.E) + np.dot(np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1,self.Kmmi) # d(C+D) #the partial derivative vector for the likelihood if self.likelihood.Nparams ==0: From 77df63952ff15f9a88ad43637264e5da947d86d7 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 12 Mar 2013 09:36:11 +0000 Subject: [PATCH 4/9] updated list of implemented kernels in the documentation --- doc/kernel_implementation.rst | 60 ++++++++++++++++++----------------- doc/tuto_kernel_overview.rst | 2 +- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/doc/kernel_implementation.rst b/doc/kernel_implementation.rst index 99ee006b..521087ba 100644 --- a/doc/kernel_implementation.rst +++ b/doc/kernel_implementation.rst @@ -5,35 +5,37 @@ List of implemented kernels The following table shows the implemented kernels in GPy and gives the details of the implemented function for each kernel. -==================== =========== ====== ======= =========== =============== ======= =========== ====== ====== ======= -NAME get/set K Kdiag dK_dtheta dKdiag_dtheta dK_dX dKdiag_dX psi0 psi1 psi2 -==================== =========== ====== ======= =========== =============== ======= =========== ====== ====== ======= -bias |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| --------------------- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- -Brownian |tick| |tick| |tick| |tick| |tick| |tick| |tick| --------------------- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- -exponential |tick| |tick| |tick| |tick| |tick| |tick| |tick| --------------------- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- -finite_dimensional |tick| |tick| |tick| |tick| |tick| --------------------- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- -linear |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| --------------------- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- -Matern32 |tick| |tick| |tick| |tick| |tick| |tick| |tick| --------------------- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- -Matern52 |tick| |tick| |tick| |tick| |tick| |tick| |tick| --------------------- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- -periodic_exponential |tick| |tick| |tick| |tick| |tick| --------------------- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- -periodic_Matern32 |tick| |tick| |tick| |tick| |tick| --------------------- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- -periodic_Matern52 |tick| |tick| |tick| |tick| |tick| --------------------- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- -rbf |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| --------------------- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- -spline |tick| |tick| |tick| |tick| |tick| |tick| --------------------- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- -white |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| -==================== =========== ====== ======= =========== =============== ======= =========== ====== ====== ======= +==================== =========== ===== =========== ====== ======= =========== =============== ======= =========== ====== ====== ======= +NAME Dimension ARD get/set K Kdiag dK_dtheta dKdiag_dtheta dK_dX dKdiag_dX psi0 psi1 psi2 +==================== =========== ===== =========== ====== ======= =========== =============== ======= =========== ====== ====== ======= +bias n no |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| +-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- +Brownian 1 no |tick| |tick| |tick| |tick| |tick| |tick| |tick| +-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- +exponential n yes |tick| |tick| |tick| |tick| |tick| |tick| |tick| +-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- +finite_dimensional n no |tick| |tick| |tick| |tick| |tick| +-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- +linear n yes |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| +-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- +Matern32 n yes |tick| |tick| |tick| |tick| |tick| |tick| |tick| +-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- +Matern52 n yes |tick| |tick| |tick| |tick| |tick| |tick| |tick| +-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- +periodic_exponential 1 no |tick| |tick| |tick| |tick| |tick| +-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- +periodic_Matern32 1 no |tick| |tick| |tick| |tick| |tick| +-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- +periodic_Matern52 1 no |tick| |tick| |tick| |tick| |tick| +-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- +rational quadratic 1 no |tick| |tick| |tick| |tick| |tick| |tick| |tick| +-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- +rbf n yes |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| +-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- +spline n no |tick| |tick| |tick| |tick| |tick| |tick| +-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- +white n no |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| +==================== =========== ===== =========== ====== ======= =========== =============== ======= =========== ====== ====== ======= Depending on the use, all functions may not be required diff --git a/doc/tuto_kernel_overview.rst b/doc/tuto_kernel_overview.rst index da19803b..27f895ba 100644 --- a/doc/tuto_kernel_overview.rst +++ b/doc/tuto_kernel_overview.rst @@ -39,7 +39,7 @@ return:: Implemented kernels =================== -Many kernels are already implemented in GPy. A comprehensive list can be found `here `_ and the following figure gives a summary of most of them: +Many kernels are already implemented in GPy. The following figure gives a summary of most of them (a comprehensive list can be list can be found `here `_): .. figure:: Figures/tuto_kern_overview_allkern.png :align: center From f14302e8d0aa92bd61e8b2ad62db52b34e79c14e Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 12 Mar 2013 09:41:27 +0000 Subject: [PATCH 5/9] updated list of implemented kernels in the documentation --- doc/kernel_implementation.rst | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/kernel_implementation.rst b/doc/kernel_implementation.rst index 521087ba..c566d21d 100644 --- a/doc/kernel_implementation.rst +++ b/doc/kernel_implementation.rst @@ -8,13 +8,13 @@ The following table shows the implemented kernels in GPy and gives the details o ==================== =========== ===== =========== ====== ======= =========== =============== ======= =========== ====== ====== ======= NAME Dimension ARD get/set K Kdiag dK_dtheta dKdiag_dtheta dK_dX dKdiag_dX psi0 psi1 psi2 ==================== =========== ===== =========== ====== ======= =========== =============== ======= =========== ====== ====== ======= -bias n no |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| +bias n |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| -------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- -Brownian 1 no |tick| |tick| |tick| |tick| |tick| |tick| |tick| +Brownian 1 |tick| |tick| |tick| |tick| |tick| |tick| |tick| -------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- exponential n yes |tick| |tick| |tick| |tick| |tick| |tick| |tick| -------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- -finite_dimensional n no |tick| |tick| |tick| |tick| |tick| +finite_dimensional n |tick| |tick| |tick| |tick| |tick| -------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- linear n yes |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| -------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- @@ -22,19 +22,19 @@ Matern32 n yes |tick| |tick| |tick| |tick| -------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- Matern52 n yes |tick| |tick| |tick| |tick| |tick| |tick| |tick| -------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- -periodic_exponential 1 no |tick| |tick| |tick| |tick| |tick| +periodic_exponential 1 |tick| |tick| |tick| |tick| |tick| -------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- -periodic_Matern32 1 no |tick| |tick| |tick| |tick| |tick| +periodic_Matern32 1 |tick| |tick| |tick| |tick| |tick| -------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- -periodic_Matern52 1 no |tick| |tick| |tick| |tick| |tick| +periodic_Matern52 1 |tick| |tick| |tick| |tick| |tick| -------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- -rational quadratic 1 no |tick| |tick| |tick| |tick| |tick| |tick| |tick| +rational quadratic 1 |tick| |tick| |tick| |tick| |tick| |tick| |tick| -------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- rbf n yes |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| -------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- -spline n no |tick| |tick| |tick| |tick| |tick| |tick| +spline 1 |tick| |tick| |tick| |tick| |tick| |tick| -------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ ------- -white n no |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| +white n |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| ==================== =========== ===== =========== ====== ======= =========== =============== ======= =========== ====== ====== ======= Depending on the use, all functions may not be required From bddeb998bf691e51514ba845cbecfa10484ff0fe Mon Sep 17 00:00:00 2001 From: James Hensman Date: Tue, 12 Mar 2013 10:04:02 +0000 Subject: [PATCH 6/9] typo in comments --- GPy/models/sparse_GP.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py index be451c12..a348c9f4 100644 --- a/GPy/models/sparse_GP.py +++ b/GPy/models/sparse_GP.py @@ -80,7 +80,7 @@ class sparse_GP(GP): #The rather complex computations of psi2_beta_scaled if self.likelihood.is_heteroscedastic: - assert self.likelihood.D == 1 #TODO: what is the likelihood is heterscedatic and there are multiple independent outputs? + assert self.likelihood.D == 1 #TODO: what if the likelihood is heterscedatic and there are multiple independent outputs? if self.has_uncertain_inputs: self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0) else: From c3dae7b25255bd2168063226b0d97a2e6ce649f3 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 12 Mar 2013 10:43:46 +0000 Subject: [PATCH 7/9] Fixed bug in dK_dX for the quadratic kernel --- GPy/kern/rational_quadratic.py | 4 ++-- doc/tuto_creating_new_kernels.rst | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/GPy/kern/rational_quadratic.py b/GPy/kern/rational_quadratic.py index 15200fd3..561ea065 100644 --- a/GPy/kern/rational_quadratic.py +++ b/GPy/kern/rational_quadratic.py @@ -73,8 +73,8 @@ class rational_quadratic(kernpart): if X2 is None: X2 = X dist2 = np.square((X-X2.T)/self.lengthscale) - dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 * (1 + dist2/2./self.power)**(-self.power-1) - target += np.sum(dL_dK*dX) + dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1) + target += np.sum(dL_dK*dX,1)[:,np.newaxis] def dKdiag_dX(self,dL_dKdiag,X,target): pass diff --git a/doc/tuto_creating_new_kernels.rst b/doc/tuto_creating_new_kernels.rst index 8ebf8b8f..24003ba2 100644 --- a/doc/tuto_creating_new_kernels.rst +++ b/doc/tuto_creating_new_kernels.rst @@ -136,8 +136,8 @@ Computes the derivative of the likelihood with respect to the inputs ``X`` (a :m if X2 is None: X2 = X dist2 = np.square((X-X2.T)/self.lengthscale) - dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 * (1 + dist2/2./self.power)**(-self.power-1) - target += np.sum(dL_dK*dX) + dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1) + target += np.sum(dL_dK*dX,1)[:,np.newaxis] **dKdiag_dX(self,dL_dKdiag,X,target)** From 09dd452b54c30f9ba113fe7709c886a6a0d66128 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Tue, 12 Mar 2013 11:18:25 +0000 Subject: [PATCH 8/9] added SCG code --- GPy/inference/SCG.py | 142 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 GPy/inference/SCG.py diff --git a/GPy/inference/SCG.py b/GPy/inference/SCG.py new file mode 100644 index 00000000..cbaee3e7 --- /dev/null +++ b/GPy/inference/SCG.py @@ -0,0 +1,142 @@ +#Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012) + +#Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman + +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT +# HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT +# NOT LIMITED TO, THE IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + + +import numpy as np + +def SCG(f, gradf, x, optargs=(), maxiters=500, display=True, xtol=1e-6, ftol=1e-6): + """ + Optimisation through Scaled Conjugate Gradients (SCG) + + f: the objective function + gradf : the gradient function (should return a 1D np.ndarray) + x : the initial condition + + Returns + x the optimal value for x + flog : a list of all the objective values + pointlog : a list of the x values tried + scalelog : a list of the scales used in optimisation (beta) + + """ + + sigma0 = 1.0e-4 + fold = f(x, *optargs) # Initial function value. + fnow = fold + gradnew = gradf(x, *optargs) # Initial gradient. + gradold = gradnew.copy() + d = -gradnew # Initial search direction. + success = True # Force calculation of directional derivs. + nsuccess = 0 # nsuccess counts number of successes. + beta = 1.0 # Initial scale parameter. + betamin = 1.0e-15 # Lower bound on scale. + betamax = 1.0e100 # Upper bound on scale. + + flog = [fold] + pointlog = [x.copy()] + scalelog = [beta] + + iteration = 0 + + # Main optimization loop. + while iteration < maxiters: + + # Calculate first and second directional derivatives. + if success: + mu = np.dot(d, gradnew) + if mu >= 0: + d = -gradnew + mu = np.dot(d, gradnew) + kappa = np.dot(d, d) + #if kappa < eps(): + #return x, flog, pointlog, scalelog + sigma = sigma0/np.sqrt(kappa) + xplus = x + sigma*d + gplus = gradf(xplus, *optargs) + theta = np.dot(d, (gplus - gradnew))/sigma + + # Increase effective curvature and evaluate step size alpha. + delta = theta + beta*kappa + if delta <= 0: + delta = beta*kappa + beta = beta - theta/kappa + + alpha = - mu/delta + + # Calculate the comparison ratio. + xnew = x + alpha*d + fnew = f(xnew, *optargs) + Delta = 2.*(fnew - fold)/(alpha*mu) + if Delta >= 0.: + success = True + nsuccess += 1 + x = xnew + fnow = fnew + else: + success = False + fnow = fold + + # Store relevant variables + flog.append(fnow) # Current function value + pointlog.append(x) # Current position + scalelog.append(beta) # current scale parameter + + iteration += 1 + if display: + print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta + + if success: + # Test for termination + if np.max(np.abs(alpha*d)) < xtol or np.max(np.abs(fnew-fold)) < ftol: + return x, flog, pointlog, scalelog + + else: + # Update variables for new position + fold = fnew + gradold = gradnew + gradnew = gradf(x, *optargs) + # If the gradient is zero then we are done. + if np.dot(gradnew,gradnew) == 0: + return x, flog, pointlog, scalelog + + # Adjust beta according to comparison ratio. + if Delta < 0.25: + beta = min(4.0*beta, betamax) + if Delta > 0.75: + beta = max(0.5*beta, betamin) + + # Update search direction using Polak-Ribiere formula, or re-start + # in direction of negative gradient after nparams steps. + if nsuccess == x.size: + d = -gradnew + nsuccess = 0 + elif success: + gamma = np.dot(gradold - gradnew,gradnew)/(mu) + d = gamma*d - gradnew + + # If we get here, then we haven't terminated in the given number of + # iterations. + if display: + print "maxiter exceeded" + + return x, flog, pointlog, scalelog From 7cb20798941b0d39d94637120ced0b3ad958e5b3 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Tue, 12 Mar 2013 11:44:28 +0000 Subject: [PATCH 9/9] non working integratino of SCG into GPy --- GPy/inference/optimization.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/GPy/inference/optimization.py b/GPy/inference/optimization.py index 94a5bcef..2e29fe6a 100644 --- a/GPy/inference/optimization.py +++ b/GPy/inference/optimization.py @@ -196,6 +196,16 @@ class opt_rasm(Optimizer): self.trace = opt_result[1] +class opt_scg(Optimizer): + def __init__(self, *args, **kwargs): + Optimizer.__init__(self, *args, **kwargs) + self.opt_name = "Scaled Conjugate Gradients" + + def opt(self, f_fp = None, f = None, fp = None): + assert not f is None + assert not fp is None + opt_result = SCG (f,fp,self.x_init, display=self.messages, + def get_optimizer(f_min): # import rasmussens_minimize as rasm from SGD import opt_SGD