async optimize working

This commit is contained in:
Max Zwiessele 2013-04-29 14:07:01 +01:00
parent 96a97ce790
commit f3f6226287
4 changed files with 145 additions and 131 deletions

View file

@ -173,7 +173,7 @@ def bgplvm_simulation_matlab_compare():
from GPy.models import mrd from GPy.models import mrd
from GPy import kern from GPy import kern
reload(mrd); reload(kern) reload(mrd); reload(kern)
k = kern.rbf(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k,
# X=mu, # X=mu,
# X_variance=S, # X_variance=S,

View file

@ -3,16 +3,15 @@ Created on 24 Apr 2013
@author: maxz @author: maxz
''' '''
from multiprocessing.process import Process
from GPy.inference.gradient_descent_update_rules import FletcherReeves from GPy.inference.gradient_descent_update_rules import FletcherReeves
import numpy import numpy
from multiprocessing import Value from multiprocessing import Value
from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2 from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2
from multiprocessing.synchronize import Lock, Event from multiprocessing.synchronize import Event
from copy import deepcopy
from multiprocessing.queues import Queue from multiprocessing.queues import Queue
from Queue import Empty from Queue import Empty
import sys import sys
from threading import Thread
RUNNING = "running" RUNNING = "running"
CONVERGED = "converged" CONVERGED = "converged"
@ -21,7 +20,9 @@ MAX_F_EVAL = "maximum number of function calls reached"
LINE_SEARCH = "line search failed" LINE_SEARCH = "line search failed"
KBINTERRUPT = "interrupted" KBINTERRUPT = "interrupted"
class _Async_Optimization(Process): SENTINEL = None
class _Async_Optimization(Thread):
def __init__(self, f, df, x0, update_rule, runsignal, def __init__(self, f, df, x0, update_rule, runsignal,
report_every=10, messages=0, maxiter=5e3, max_f_eval=15e3, report_every=10, messages=0, maxiter=5e3, max_f_eval=15e3,
gtol=1e-6, outqueue=None, *args, **kw): gtol=1e-6, outqueue=None, *args, **kw):
@ -67,6 +68,11 @@ class _Async_Optimization(Process):
pass pass
# print "callback done" # print "callback done"
def callback_return(self, *a):
self.callback(*a)
self.outq.put(SENTINEL)
self.runsignal.clear()
def run(self, *args, **kwargs): def run(self, *args, **kwargs):
raise NotImplementedError("Overwrite this with optimization (for async use)") raise NotImplementedError("Overwrite this with optimization (for async use)")
pass pass
@ -91,7 +97,6 @@ class _CGDAsync(_Async_Optimization):
it = 0 it = 0
while it < self.maxiter: while it < self.maxiter:
print self.runsignal.is_set()
if not self.runsignal.is_set(): if not self.runsignal.is_set():
break break
@ -117,7 +122,7 @@ class _CGDAsync(_Async_Optimization):
xi, xi,
si, gi, si, gi,
fi, fi_old) fi, fi_old)
if alphai is not None: if alphai is not None and fi2 < fi:
fi, fi_old = fi2, fi_old2 fi, fi_old = fi2, fi_old2
else: else:
alphai, _, _, fi, fi_old, gfi = \ alphai, _, _, fi, fi_old, gfi = \
@ -130,30 +135,32 @@ class _CGDAsync(_Async_Optimization):
break break
if gfi is not None: if gfi is not None:
gi = gfi gi = gfi
xi += numpy.dot(alphai, si)
if self.messages:
sys.stdout.write("\r")
sys.stdout.flush()
sys.stdout.write("iteration: {0:> 6g} f: {1:> 12F} g: {2:> 12F}".format(it, fi, gi))
if it % self.report_every == 0: if fi_old > fi:
self.callback(xi, fi, it, self.f_call.value, self.df_call.value, status) gi, ur, si = self.reset(xi, *a, **kw)
else:
xi += numpy.dot(alphai, si)
if self.messages:
sys.stdout.write("\r")
sys.stdout.flush()
sys.stdout.write("iteration: {0:> 6g} f:{1:> 12e} |g|:{2:> 12e}".format(it, fi, numpy.dot(gi.T, gi)))
if it % self.report_every == 0:
self.callback(xi, fi, it, self.f_call.value, self.df_call.value, status)
it += 1 it += 1
else: else:
status = MAXITER status = MAXITER
# self.result = [xi, fi, it, self.f_call.value, self.df_call.value, status] # self.result = [xi, fi, it, self.f_call.value, self.df_call.value, status]
self.callback(xi, fi, it, self.f_call.value, self.df_call.value, status) self.callback_return(xi, fi, it, self.f_call.value, self.df_call.value, status)
return
class Async_Optimize(object): class Async_Optimize(object):
callback = None callback = lambda *x: None
SENTINEL = object()
runsignal = Event() runsignal = Event()
def async_callback_collect(self, q): def async_callback_collect(self, q):
while self.runsignal.is_set(): while self.runsignal.is_set():
try: try:
for ret in iter(lambda: q.get(timeout=1), self.SENTINEL): for ret in iter(lambda: q.get(timeout=1), SENTINEL):
self.callback(*ret) self.callback(*ret)
except Empty: except Empty:
pass pass
@ -162,30 +169,32 @@ class Async_Optimize(object):
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6, messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, *args, **kwargs): report_every=10, *args, **kwargs):
self.runsignal.set() self.runsignal.set()
outqueue = Queue() outqueue = Queue(5)
if callback: if callback:
self.callback = callback self.callback = callback
collector = Process(target=self.async_callback_collect, args=(outqueue,)) c = Thread(target=self.async_callback_collect, args=(outqueue,))
collector.start() c.start()
p = _CGDAsync(f, df, x0, update_rule, self.runsignal, p = _CGDAsync(f, df, x0, update_rule, self.runsignal,
report_every=report_every, messages=messages, maxiter=maxiter, report_every=report_every, messages=messages, maxiter=maxiter,
max_f_eval=max_f_eval, gtol=gtol, outqueue=outqueue, *args, **kwargs) max_f_eval=max_f_eval, gtol=gtol, outqueue=outqueue, *args, **kwargs)
p.start() p.run()
return p return p, c
def fmin(self, f, df, x0, callback=None, update_rule=FletcherReeves, def fmin(self, f, df, x0, callback=None, update_rule=FletcherReeves,
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6, messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, *args, **kwargs): report_every=10, *args, **kwargs):
p = self.fmin_async(f, df, x0, callback, update_rule, messages, p, c = self.fmin_async(f, df, x0, callback, update_rule, messages,
maxiter, max_f_eval, gtol, maxiter, max_f_eval, gtol,
report_every, *args, **kwargs) report_every, *args, **kwargs)
while self.runsignal.is_set(): while self.runsignal.is_set():
try: try:
p.join(1) p.join(1)
c.join(1)
except KeyboardInterrupt: except KeyboardInterrupt:
print "^C" # print "^C"
self.runsignal.clear() self.runsignal.clear()
p.join() p.join()
c.join()
class CGD(Async_Optimize): class CGD(Async_Optimize):
''' '''

View file

@ -30,22 +30,22 @@ class sparse_GP(GP):
""" """
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False): def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
self.scale_factor = 100.0# a scaling factor to help keep the algorithm stable self.scale_factor = 100.0 # a scaling factor to help keep the algorithm stable
self.auto_scale_factor = False self.auto_scale_factor = False
self.Z = Z self.Z = Z
self.M = Z.shape[0] self.M = Z.shape[0]
self.likelihood = likelihood self.likelihood = likelihood
if X_variance is None: if X_variance is None:
self.has_uncertain_inputs=False self.has_uncertain_inputs = False
else: else:
assert X_variance.shape==X.shape assert X_variance.shape == X.shape
self.has_uncertain_inputs=True self.has_uncertain_inputs = True
self.X_variance = X_variance self.X_variance = X_variance
GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X) GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X)
#normalize X uncertainty also # normalize X uncertainty also
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.X_variance /= np.square(self._Xstd) self.X_variance /= np.square(self._Xstd)
@ -54,155 +54,155 @@ class sparse_GP(GP):
# kernel computations, using BGPLVM notation # kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z) self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_variance) self.psi0 = self.kern.psi0(self.Z, self.X, self.X_variance)
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_variance).T self.psi1 = self.kern.psi1(self.Z, self.X, self.X_variance).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_variance) self.psi2 = self.kern.psi2(self.Z, self.X, self.X_variance)
else: else:
self.psi0 = self.kern.Kdiag(self.X) self.psi0 = self.kern.Kdiag(self.X)
self.psi1 = self.kern.K(self.Z,self.X) self.psi1 = self.kern.K(self.Z, self.X)
self.psi2 = None self.psi2 = None
def _computations(self): def _computations(self):
#TODO: find routine to multiply triangular matrices # TODO: find routine to multiply triangular matrices
sf = self.scale_factor sf = self.scale_factor
sf2 = sf**2 sf2 = sf ** 2
#The rather complex computations of psi2_beta_scaled # The rather complex computations of psi2_beta_scaled
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
assert self.likelihood.D == 1 #TODO: what if the likelihood is heterscedatic and there are multiple independent outputs? assert self.likelihood.D == 1 # TODO: what if the likelihood is heterscedatic and there are multiple independent outputs?
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0) self.psi2_beta_scaled = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1) / sf2)).sum(0)
else: else:
tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf) tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N)) / sf)
#self.psi2_beta_scaled = np.dot(tmp,tmp.T) # self.psi2_beta_scaled = np.dot(tmp,tmp.T)
self.psi2_beta_scaled = tdot(tmp) self.psi2_beta_scaled = tdot(tmp)
else: else:
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0) self.psi2_beta_scaled = (self.psi2 * (self.likelihood.precision / sf2)).sum(0)
else: else:
tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf) tmp = self.psi1 * (np.sqrt(self.likelihood.precision) / sf)
#self.psi2_beta_scaled = np.dot(tmp,tmp.T) # self.psi2_beta_scaled = np.dot(tmp,tmp.T)
self.psi2_beta_scaled = tdot(tmp) self.psi2_beta_scaled = tdot(tmp)
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm) self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
self.V = (self.likelihood.precision/self.scale_factor)*self.likelihood.Y self.V = (self.likelihood.precision / self.scale_factor) * self.likelihood.Y
#Compute A = L^-1 psi2 beta L^-T # Compute A = L^-1 psi2 beta L^-T
#self. A = mdot(self.Lmi,self.psi2_beta_scaled,self.Lmi.T) # self. A = mdot(self.Lmi,self.psi2_beta_scaled,self.Lmi.T)
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,self.psi2_beta_scaled.T,lower=1)[0] tmp = linalg.lapack.flapack.dtrtrs(self.Lm, self.psi2_beta_scaled.T, lower=1)[0]
self.A = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1)[0] self.A = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp.T), lower=1)[0]
self.B = np.eye(self.M)/sf2 + self.A self.B = np.eye(self.M) / sf2 + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.psi1V = np.dot(self.psi1, self.V) self.psi1V = np.dot(self.psi1, self.V)
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.Bi),lower=1,trans=1)[0] tmp = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.Bi), lower=1, trans=1)[0]
self.C = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0] self.C = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp.T), lower=1, trans=1)[0]
#self.Cpsi1V = np.dot(self.C,self.psi1V) # self.Cpsi1V = np.dot(self.C,self.psi1V)
#back substutue C into psi1V # back substitute C into psi1V
tmp,info1 = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.psi1V),lower=1,trans=0) tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
tmp,info2 = linalg.lapack.flapack.dpotrs(self.LB,tmp,lower=1) tmp, _ = linalg.lapack.flapack.dpotrs(self.LB, tmp, lower=1)
self.Cpsi1V,info3 = linalg.lapack.flapack.dtrtrs(self.Lm,tmp,lower=1,trans=1) self.Cpsi1V, _ = linalg.lapack.flapack.dtrtrs(self.Lm, tmp, lower=1, trans=1)
self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T) #TODO: stabilize? self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V, self.psi1V.T) # TODO: stabilize?
self.E = tdot(self.Cpsi1V/sf) self.E = tdot(self.Cpsi1V / sf)
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten() self.dL_dpsi0 = -0.5 * self.D * (self.likelihood.precision * np.ones([self.N, 1])).flatten()
self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T) self.dL_dpsi1 = np.dot(self.Cpsi1V, self.V.T)
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
#self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB # self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
#self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC # self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
#self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD # self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
self.dL_dpsi2 = 0.5*self.likelihood.precision[:,None,None]*(self.D*(self.Kmmi - self.C/sf2) -self.E)[None,:,:] self.dL_dpsi2 = 0.5 * self.likelihood.precision[:, None, None] * (self.D * (self.Kmmi - self.C / sf2) - self.E)[None, :, :]
else: else:
#self.dL_dpsi1 += mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB # self.dL_dpsi1 += mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
#self.dL_dpsi1 += -mdot(self.C,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)/sf2) #dC # self.dL_dpsi1 += -mdot(self.C,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)/sf2) #dC
#self.dL_dpsi1 += -mdot(self.E,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dD # self.dL_dpsi1 += -mdot(self.E,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dD
self.dL_dpsi1 += np.dot(self.Kmmi - self.C/sf2 -self.E,self.psi1*self.likelihood.precision.reshape(1,self.N)) self.dL_dpsi1 += np.dot(self.Kmmi - self.C / sf2 - self.E, self.psi1 * self.likelihood.precision.reshape(1, self.N))
self.dL_dpsi2 = None self.dL_dpsi2 = None
else: else:
#self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB # self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB
#self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C # dC # self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C # dC
#self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E # dD # self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E # dD
self.dL_dpsi2 = 0.5*self.likelihood.precision*(self.D*(self.Kmmi - self.C/sf2) -self.E) self.dL_dpsi2 = 0.5 * self.likelihood.precision * (self.D * (self.Kmmi - self.C / sf2) - self.E)
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
#repeat for each of the N psi_2 matrices # repeat for each of the N psi_2 matrices
self.dL_dpsi2 = np.repeat(self.dL_dpsi2[None,:,:],self.N,axis=0) self.dL_dpsi2 = np.repeat(self.dL_dpsi2[None, :, :], self.N, axis=0)
else: else:
self.dL_dpsi1 += 2.*np.dot(self.dL_dpsi2,self.psi1) self.dL_dpsi1 += 2.*np.dot(self.dL_dpsi2, self.psi1)
self.dL_dpsi2 = None self.dL_dpsi2 = None
# Compute dL_dKmm # Compute dL_dKmm
#self.dL_dKmm_old = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB # self.dL_dKmm_old = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
#self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC # self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
#self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD # self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.B),lower=1,trans=1)[0] tmp = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.B), lower=1, trans=1)[0]
self.dL_dKmm = -0.5*self.D*sf2*linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0] #dA self.dL_dKmm = -0.5 * self.D * sf2 * linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp.T), lower=1, trans=1)[0] # dA
tmp = np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1 tmp = np.dot(self.D * self.C + self.E * sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1
tmp = linalg.lapack.flapack.dpotrs(self.Lm,np.asfortranarray(tmp.T),lower=1)[0].T tmp = linalg.lapack.flapack.dpotrs(self.Lm, np.asfortranarray(tmp.T), lower=1)[0].T
self.dL_dKmm += 0.5*(self.D*self.C/sf2 + self.E) +tmp # d(C+D) self.dL_dKmm += 0.5 * (self.D * self.C / sf2 + self.E) + tmp # d(C+D)
#the partial derivative vector for the likelihood # the partial derivative vector for the likelihood
if self.likelihood.Nparams ==0: if self.likelihood.Nparams == 0:
#save computation here. # save computation here.
self.partial_for_likelihood = None self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic: elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented" raise NotImplementedError, "heteroscedatic derivates not implemented"
#self.partial_for_likelihood = - 0.5 * self.D*self.likelihood.precision + 0.5 * (self.likelihood.Y**2).sum(1)*self.likelihood.precision**2 #dA # self.partial_for_likelihood = - 0.5 * self.D*self.likelihood.precision + 0.5 * (self.likelihood.Y**2).sum(1)*self.likelihood.precision**2 #dA
#self.partial_for_likelihood += 0.5 * self.D * (self.psi0*self.likelihood.precision**2 - (self.psi2*self.Kmmi[None,:,:]*self.likelihood.precision[:,None,None]**2).sum(1).sum(1)/sf2) #dB # self.partial_for_likelihood += 0.5 * self.D * (self.psi0*self.likelihood.precision**2 - (self.psi2*self.Kmmi[None,:,:]*self.likelihood.precision[:,None,None]**2).sum(1).sum(1)/sf2) #dB
#self.partial_for_likelihood += 0.5 * self.D * np.sum(self.Bi*self.A)*self.likelihood.precision #dC # self.partial_for_likelihood += 0.5 * self.D * np.sum(self.Bi*self.A)*self.likelihood.precision #dC
#self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD # self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD
else: else:
#likelihood is not heterscedatic # likelihood is not heterscedatic
self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * self.likelihood.trYYT*self.likelihood.precision**2 self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2) self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision * sf2)
self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi, self.A) * self.likelihood.precision
self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1)) self.partial_for_likelihood += self.likelihood.precision * (0.5 * trace_dot(self.psi2_beta_scaled, self.E * sf2) - np.trace(self.Cpsi1VVpsi1))
def log_likelihood(self): def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """ """ Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor**2 sf2 = self.scale_factor ** 2
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y) A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y)
B = -0.5*self.D*(np.sum(self.likelihood.precision.flatten()*self.psi0) - np.trace(self.A)*sf2) B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
else: else:
A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2) B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2)) C = -0.5 * self.D * (self.B_logdet + self.M * np.log(sf2))
D = 0.5*np.trace(self.Cpsi1VVpsi1) D = 0.5 * np.trace(self.Cpsi1VVpsi1)
return A+B+C+D return A + B + C + D
def _set_params(self, p): def _set_params(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q) self.Z = p[:self.M * self.Q].reshape(self.M, self.Q)
self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam]) self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:]) self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:])
self._compute_kernel_matrices() self._compute_kernel_matrices()
if self.auto_scale_factor: if self.auto_scale_factor:
self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision) self.scale_factor = np.sqrt(self.psi2.sum(0).mean() * self.likelihood.precision)
#if self.auto_scale_factor: # if self.auto_scale_factor:
# if self.likelihood.is_heteroscedastic: # if self.likelihood.is_heteroscedastic:
# self.scale_factor = max(1,np.sqrt(self.psi2_beta_scaled.sum(0).mean())) # self.scale_factor = max(1,np.sqrt(self.psi2_beta_scaled.sum(0).mean()))
# else: # else:
# self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision) # self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
#self.scale_factor = 1. # self.scale_factor = 1.
self._computations() self._computations()
def _get_params(self): def _get_params(self):
return np.hstack([self.Z.flatten(),GP._get_params(self)]) return np.hstack([self.Z.flatten(), GP._get_params(self)])
def _get_param_names(self): def _get_param_names(self):
return sum([['iip_%i_%i'%(i,j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])],[]) + GP._get_param_names(self) return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])], []) + GP._get_param_names(self)
def update_likelihood_approximation(self): def update_likelihood_approximation(self):
""" """
@ -214,9 +214,9 @@ class sparse_GP(GP):
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
raise NotImplementedError, "EP approximation not implemented for uncertain inputs" raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
else: else:
self.likelihood.fit_DTC(self.Kmm,self.psi1) self.likelihood.fit_DTC(self.Kmm, self.psi1)
#self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) # self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP self._set_params(self._get_params()) # update the GP
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
@ -226,13 +226,13 @@ class sparse_GP(GP):
""" """
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
""" """
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z) dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_variance) dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_variance) dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z,self.X, self.X_variance) dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else: else:
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1,self.Z,self.X) dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.Z, self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X) dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
return dL_dtheta return dL_dtheta
@ -243,22 +243,22 @@ class sparse_GP(GP):
""" """
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_variance) dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance) dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else: else:
dL_dZ += self.kern.dK_dX(self.dL_dpsi1,self.Z,self.X) dL_dZ += self.kern.dK_dX(self.dL_dpsi1, self.Z, self.X)
return dL_dZ return dL_dZ
def _raw_predict(self, Xnew, which_parts='all', full_cov=False): def _raw_predict(self, Xnew, which_parts='all', full_cov=False):
"""Internal helper function for making predictions, does not account for normalization""" """Internal helper function for making predictions, does not account for normalization"""
Kx = self.kern.K(self.Z, Xnew) Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V) mu = mdot(Kx.T, self.C / self.scale_factor, self.psi1V)
if full_cov: if full_cov:
Kxx = self.kern.K(Xnew,which_parts=which_parts) Kxx = self.kern.K(Xnew, which_parts=which_parts)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting var = Kxx - mdot(Kx.T, (self.Kmmi - self.C / self.scale_factor ** 2), Kx) # NOTE this won't work for plotting
else: else:
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts) Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) var = Kxx - np.sum(Kx * np.dot(self.Kmmi - self.C / self.scale_factor ** 2, Kx), 0)
return mu,var[:,None] return mu, var[:, None]

View file

@ -47,10 +47,15 @@ if __name__ == "__main__":
xopts = [x0.copy()] xopts = [x0.copy()]
optplts, = ax.plot3D([x0[0]], [x0[1]], zs=f(x0), marker='o', color='r') optplts, = ax.plot3D([x0[0]], [x0[1]], zs=f(x0), marker='o', color='r')
raw_input("enter to start optimize")
def callback(x, *a, **kw): def callback(x, *a, **kw):
xopts.append(x.copy()) xopts.append(x.copy())
time.sleep(.3) # time.sleep(.3)
optplts._verts3d = [numpy.array(xopts)[:, 0], numpy.array(xopts)[:, 1], [f(xs) for xs in xopts]] optplts._verts3d = [numpy.array(xopts)[:, 0], numpy.array(xopts)[:, 1], [f(xs) for xs in xopts]]
fig.canvas.draw() fig.canvas.draw()
res = opt.fmin(f, df, x0, callback, messages=True, report_every=1) res = opt.fmin(f, df, x0, callback, messages=True, maxiter=1000, report_every=1)
pylab.ion()
pylab.show()