LinearCF Psi Stat not working yet, strange bug in psi computations

This commit is contained in:
Max Zwiessele 2013-05-01 17:09:38 +01:00
parent c502b66ea3
commit 42474f0044
8 changed files with 353 additions and 244 deletions

View file

@ -82,7 +82,7 @@ def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300):
m.ensure_default_constraints() m.ensure_default_constraints()
y = m.likelihood.Y[0, :] y = m.likelihood.Y[0, :]
fig,(latent_axes,hist_axes) = plt.subplots(1,2) fig, (latent_axes, hist_axes) = plt.subplots(1, 2)
plt.sca(latent_axes) plt.sca(latent_axes)
m.plot_latent() m.plot_latent()
data_show = GPy.util.visualize.vector_show(y) data_show = GPy.util.visualize.vector_show(y)
@ -176,20 +176,34 @@ def bgplvm_simulation_matlab_compare():
Y = sim_data['Y'] Y = sim_data['Y']
S = sim_data['S'] S = sim_data['S']
mu = sim_data['mu'] mu = sim_data['mu']
M, [_, Q] = 20, mu.shape M, [_, Q] = 30, mu.shape
Q = 2
from GPy.models import mrd from GPy.models import mrd
from GPy import kern from GPy import kern
reload(mrd); reload(kern) reload(mrd); reload(kern)
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) k = kern.rbf(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k,
# X=mu, # X=mu,
# X_variance=S, # X_variance=S,
_debug=True) _debug=True)
m.ensure_default_constraints() m.ensure_default_constraints()
m.auto_scale_factor = True m.auto_scale_factor = True
m['noise'] = .01 # Y.var() / 100. m['noise'] = Y.var() / 100.
m['{}_variance'.format(k.parts[0].name)] = .01
lscstr = '{}'.format(k.parts[0].name)
# m[lscstr] = .01
m.unconstrain(lscstr); m.constrain_fixed(lscstr, 10)
lscstr = 'X_variance'
# m[lscstr] = .01
m.unconstrain(lscstr); m.constrain_fixed(lscstr, .1)
# cstr = 'white'
# m.unconstrain(cstr); m.constrain_bounded(cstr, .01, 1.)
# cstr = 'noise'
# m.unconstrain(cstr); m.constrain_bounded(cstr, .01, 1.)
return m return m
def bgplvm_simulation(burnin='scg', plot_sim=False, def bgplvm_simulation(burnin='scg', plot_sim=False,
@ -385,7 +399,7 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True):
Y = data['Y'] Y = data['Y']
if in_place: if in_place:
# Make figure move in place. # Make figure move in place.
data['Y'][:, 0:3]=0.0 data['Y'][:, 0:3] = 0.0
m = GPy.models.GPLVM(data['Y'], 2, normalize_Y=True) m = GPy.models.GPLVM(data['Y'], 2, normalize_Y=True)
# optimize # optimize

View file

@ -4,14 +4,14 @@ Created on 24 Apr 2013
@author: maxz @author: maxz
''' '''
from GPy.inference.gradient_descent_update_rules import FletcherReeves from GPy.inference.gradient_descent_update_rules import FletcherReeves
import numpy
from multiprocessing import Value
from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2
from multiprocessing.synchronize import Event
from multiprocessing.queues import Queue
from Queue import Empty from Queue import Empty
import sys from multiprocessing import Value
from multiprocessing.queues import Queue
from multiprocessing.synchronize import Event
from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2
from threading import Thread from threading import Thread
import numpy
import sys
RUNNING = "running" RUNNING = "running"
CONVERGED = "converged" CONVERGED = "converged"
@ -20,10 +20,9 @@ MAX_F_EVAL = "maximum number of function calls reached"
LINE_SEARCH = "line search failed" LINE_SEARCH = "line search failed"
KBINTERRUPT = "interrupted" KBINTERRUPT = "interrupted"
SENTINEL = None
class _Async_Optimization(Thread): class _Async_Optimization(Thread):
def __init__(self, f, df, x0, update_rule, runsignal,
def __init__(self, f, df, x0, update_rule, runsignal, SENTINEL,
report_every=10, messages=0, maxiter=5e3, max_f_eval=15e3, report_every=10, messages=0, maxiter=5e3, max_f_eval=15e3,
gtol=1e-6, outqueue=None, *args, **kw): gtol=1e-6, outqueue=None, *args, **kw):
""" """
@ -42,6 +41,7 @@ class _Async_Optimization(Thread):
self.maxiter = maxiter self.maxiter = maxiter
self.max_f_eval = max_f_eval self.max_f_eval = max_f_eval
self.gtol = gtol self.gtol = gtol
self.SENTINEL = SENTINEL
self.runsignal = runsignal self.runsignal = runsignal
# self.parent = parent # self.parent = parent
# self.result = None # self.result = None
@ -70,7 +70,7 @@ class _Async_Optimization(Thread):
def callback_return(self, *a): def callback_return(self, *a):
self.callback(*a) self.callback(*a)
self.outq.put(SENTINEL) self.outq.put(self.SENTINEL)
self.runsignal.clear() self.runsignal.clear()
def run(self, *args, **kwargs): def run(self, *args, **kwargs):
@ -136,7 +136,7 @@ class _CGDAsync(_Async_Optimization):
if gfi is not None: if gfi is not None:
gi = gfi gi = gfi
if fi_old > fi: if numpy.isnan(fi) or fi_old < fi:
gi, ur, si = self.reset(xi, *a, **kw) gi, ur, si = self.reset(xi, *a, **kw)
else: else:
xi += numpy.dot(alphai, si) xi += numpy.dot(alphai, si)
@ -145,22 +145,23 @@ class _CGDAsync(_Async_Optimization):
sys.stdout.flush() sys.stdout.flush()
sys.stdout.write("iteration: {0:> 6g} f:{1:> 12e} |g|:{2:> 12e}".format(it, fi, numpy.dot(gi.T, gi))) sys.stdout.write("iteration: {0:> 6g} f:{1:> 12e} |g|:{2:> 12e}".format(it, fi, numpy.dot(gi.T, gi)))
if it % self.report_every == 0: if it % self.report_every == 0:
self.callback(xi, fi, it, self.f_call.value, self.df_call.value, status) self.callback(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
it += 1 it += 1
else: else:
status = MAXITER status = MAXITER
# self.result = [xi, fi, it, self.f_call.value, self.df_call.value, status] self.callback_return(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
self.callback_return(xi, fi, it, self.f_call.value, self.df_call.value, status) self.result = [xi, fi, gi, it, self.f_call.value, self.df_call.value, status]
class Async_Optimize(object): class Async_Optimize(object):
callback = lambda *x: None callback = lambda *x: None
runsignal = Event() runsignal = Event()
SENTINEL = "SENTINEL"
def async_callback_collect(self, q): def async_callback_collect(self, q):
while self.runsignal.is_set(): while self.runsignal.is_set():
try: try:
for ret in iter(lambda: q.get(timeout=1), SENTINEL): for ret in iter(lambda: q.get(timeout=1), self.SENTINEL):
self.callback(*ret) self.callback(*ret)
except Empty: except Empty:
pass pass
@ -169,12 +170,12 @@ class Async_Optimize(object):
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6, messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, *args, **kwargs): report_every=10, *args, **kwargs):
self.runsignal.set() self.runsignal.set()
outqueue = Queue(5) outqueue = Queue()
if callback: if callback:
self.callback = callback self.callback = callback
c = Thread(target=self.async_callback_collect, args=(outqueue,)) c = Thread(target=self.async_callback_collect, args=(outqueue,))
c.start() c.start()
p = _CGDAsync(f, df, x0, update_rule, self.runsignal, p = _CGDAsync(f, df, x0, update_rule, self.runsignal, self.SENTINEL,
report_every=report_every, messages=messages, maxiter=maxiter, report_every=report_every, messages=messages, maxiter=maxiter,
max_f_eval=max_f_eval, gtol=gtol, outqueue=outqueue, *args, **kwargs) max_f_eval=max_f_eval, gtol=gtol, outqueue=outqueue, *args, **kwargs)
p.run() p.run()
@ -189,12 +190,14 @@ class Async_Optimize(object):
while self.runsignal.is_set(): while self.runsignal.is_set():
try: try:
p.join(1) p.join(1)
c.join(1) # c.join(1)
except KeyboardInterrupt: except KeyboardInterrupt:
# print "^C" # print "^C"
self.runsignal.clear() self.runsignal.clear()
p.join() p.join()
c.join() if c.is_alive():
print "WARNING: callback still running, optimisation done!"
return p.result
class CGD(Async_Optimize): class CGD(Async_Optimize):
''' '''
@ -215,7 +218,7 @@ class CGD(Async_Optimize):
callback gets called every `report_every` iterations callback gets called every `report_every` iterations
callback(xi, fi, iteration, function_calls, gradient_calls, status_message) callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
if df returns tuple (grad, natgrad) it will optimize according if df returns tuple (grad, natgrad) it will optimize according
to natural gradient rules to natural gradient rules
@ -233,7 +236,7 @@ class CGD(Async_Optimize):
**calls** **calls**
--------- ---------
callback(x_opt, f_opt, iteration, function_calls, gradient_calls, status_message) callback(x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message)
at end of optimization! at end of optimization!
""" """
@ -247,7 +250,7 @@ class CGD(Async_Optimize):
Minimize f, calling callback every `report_every` iterations with following syntax: Minimize f, calling callback every `report_every` iterations with following syntax:
callback(xi, fi, iteration, function_calls, gradient_calls, status_message) callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
if df returns tuple (grad, natgrad) it will optimize according if df returns tuple (grad, natgrad) it will optimize according
to natural gradient rules to natural gradient rules
@ -260,7 +263,7 @@ class CGD(Async_Optimize):
**returns** **returns**
--------- ---------
x_opt, f_opt, iteration, function_calls, gradient_calls, status_message x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message
at end of optimization at end of optimization
""" """

View file

@ -5,6 +5,7 @@
from kernpart import kernpart from kernpart import kernpart
import numpy as np import numpy as np
from ..util.linalg import tdot from ..util.linalg import tdot
from GPy.util.linalg import mdot
class linear(kernpart): class linear(kernpart):
""" """
@ -23,7 +24,7 @@ class linear(kernpart):
:rtype: kernel object :rtype: kernel object
""" """
def __init__(self,D,variances=None,ARD=False): def __init__(self, D, variances=None, ARD=False):
self.D = D self.D = D
self.ARD = ARD self.ARD = ARD
if ARD == False: if ARD == False:
@ -45,15 +46,15 @@ class linear(kernpart):
variances = np.ones(self.D) variances = np.ones(self.D)
self._set_params(variances.flatten()) self._set_params(variances.flatten())
#initialize cache # initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3,1)) self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._params = np.empty(shape=(3,1)) self._X, self._X2, self._params = np.empty(shape=(3, 1))
def _get_params(self): def _get_params(self):
return self.variances return self.variances
def _set_params(self,x): def _set_params(self, x):
assert x.size==(self.Nparam) assert x.size == (self.Nparam)
self.variances = x self.variances = x
self.variances2 = np.square(self.variances) self.variances2 = np.square(self.variances)
@ -61,115 +62,136 @@ class linear(kernpart):
if self.Nparam == 1: if self.Nparam == 1:
return ['variance'] return ['variance']
else: else:
return ['variance_%i'%i for i in range(self.variances.size)] return ['variance_%i' % i for i in range(self.variances.size)]
def K(self,X,X2,target): def K(self, X, X2, target):
if self.ARD: if self.ARD:
XX = X*np.sqrt(self.variances) XX = X * np.sqrt(self.variances)
if X2 is None: if X2 is None:
target += tdot(XX) target += tdot(XX)
else: else:
XX2 = X2*np.sqrt(self.variances) XX2 = X2 * np.sqrt(self.variances)
target += np.dot(XX, XX2.T) target += np.dot(XX, XX2.T)
else: else:
self._K_computations(X, X2) self._K_computations(X, X2)
target += self.variances * self._dot_product target += self.variances * self._dot_product
def Kdiag(self,X,target): def Kdiag(self, X, target):
np.add(target,np.sum(self.variances*np.square(X),-1),target) np.add(target, np.sum(self.variances * np.square(X), -1), target)
def dK_dtheta(self,dL_dK,X,X2,target): def dK_dtheta(self, dL_dK, X, X2, target):
if self.ARD: if self.ARD:
if X2 is None: if X2 is None:
[np.add(target[i:i+1],np.sum(dL_dK*tdot(X[:,i:i+1])),target[i:i+1]) for i in range(self.D)] [np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.D)]
else: else:
product = X[:,None,:]*X2[None,:,:] product = X[:, None, :] * X2[None, :, :]
target += (dL_dK[:,:,None]*product).sum(0).sum(0) target += (dL_dK[:, :, None] * product).sum(0).sum(0)
else: else:
self._K_computations(X, X2) self._K_computations(X, X2)
target += np.sum(self._dot_product*dL_dK) target += np.sum(self._dot_product * dL_dK)
def dKdiag_dtheta(self,dL_dKdiag, X, target): def dKdiag_dtheta(self, dL_dKdiag, X, target):
tmp = dL_dKdiag[:,None]*X**2 tmp = dL_dKdiag[:, None] * X ** 2
if self.ARD: if self.ARD:
target += tmp.sum(0) target += tmp.sum(0)
else: else:
target += tmp.sum() target += tmp.sum()
def dK_dX(self,dL_dK,X,X2,target): def dK_dX(self, dL_dK, X, X2, target):
target += (((X2[:, None, :] * self.variances)) * dL_dK[:,:, None]).sum(0) target += (((X2[:, None, :] * self.variances)) * dL_dK[:, :, None]).sum(0)
#---------------------------------------# #---------------------------------------#
# PSI statistics # # PSI statistics #
#---------------------------------------# #---------------------------------------#
def psi0(self,Z,mu,S,target): def psi0(self, Z, mu, S, target):
self._psi_computations(Z,mu,S) self._psi_computations(Z, mu, S)
target += np.sum(self.variances*self.mu2_S,1) target += np.sum(self.variances * self.mu2_S, 1)
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target): def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
self._psi_computations(Z,mu,S) self._psi_computations(Z, mu, S)
tmp = dL_dpsi0[:, None] * self.mu2_S tmp = dL_dpsi0[:, None] * self.mu2_S
if self.ARD: if self.ARD:
target += tmp.sum(0) target += tmp.sum(0)
else: else:
target += tmp.sum() target += tmp.sum()
def dpsi0_dmuS(self,dL_dpsi0, Z,mu,S,target_mu,target_S): def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
target_mu += dL_dpsi0[:, None] * (2.0*mu*self.variances) target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances)
target_S += dL_dpsi0[:, None] * self.variances target_S += dL_dpsi0[:, None] * self.variances
def psi1(self,Z,mu,S,target): def psi1(self, Z, mu, S, target):
"""the variance, it does nothing""" """the variance, it does nothing"""
self._psi1 = self.K(mu, Z, target) self._psi1 = self.K(mu, Z, target)
def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target): def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
"""the variance, it does nothing""" """the variance, it does nothing"""
self.dK_dtheta(dL_dpsi1,mu,Z,target) self.dK_dtheta(dL_dpsi1, mu, Z, target)
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S): def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
"""Do nothing for S, it does not affect psi1""" """Do nothing for S, it does not affect psi1"""
self._psi_computations(Z,mu,S) self._psi_computations(Z, mu, S)
target_mu += (dL_dpsi1.T[:,:, None]*(Z*self.variances)).sum(1) target_mu += (dL_dpsi1.T[:, :, None] * (Z * self.variances)).sum(1)
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target): def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
self.dK_dX(dL_dpsi1.T,Z,mu,target) self.dK_dX(dL_dpsi1.T, Z, mu, target)
def psi2(self,Z,mu,S,target): def psi2(self, Z, mu, S, target):
""" """
returns N,M,M matrix returns N,M,M matrix
""" """
self._psi_computations(Z,mu,S) self._psi_computations(Z, mu, S)
#psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :] # psi2_old = self.ZZ * np.square(self.variances) * self.mu2_S[:, None, None, :]
#target += psi2.sum(-1) # target += psi2.sum(-1)
target += np.tensordot(self.ZZ[None,:,:,:]*np.square(self.variances),self.mu2_S[:, None, None, :],((3),(3))).squeeze().T # slow way of doing it, but right
psi2_real = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
for n in range(mu.shape[0]):
for m_prime in range(Z.shape[0]):
for m in range(Z.shape[0]):
tmp = self._Z[m:m + 1] * self.variances
tmp = np.dot(tmp, (tdot(self._mu[n:n + 1].T) + np.diag(S[n:n + 1])))
psi2_real[n, m, m_prime] = np.dot(tmp, (
self._Z[m_prime:m_prime + 1] * self.variances).T)
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target): psi2_inner = mdot(self.ZA, self.inner, self.ZA.T)
self._psi_computations(Z,mu,S) mu2_S = (self._mu[:, None] * self._mu[:, :, None]) + self._S[:, :, None]
tmp = (dL_dpsi2[:,:,:,None]*(2.*self.ZZ*self.mu2_S[:,None,None,:]*self.variances)) psi2 = (self.ZA[None, :, None, :] * mu2_S[:, None]).sum(-1)
psi2 = (psi2[:, :, None] * self.ZA[None, None]).sum(-1)
# psi2_tensor = np.tensordot(self.ZZ[None, :, :, :] * np.square(self.variances), self.mu2_S[:, None, None, :], ((3), (3))).squeeze().T
# import ipdb;ipdb.set_trace()
target += psi2_real
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
tmp = (dL_dpsi2[:, :, :, None] * (2.*self.ZZ * self.mu2_S[:, None, None, :] * self.variances))
if self.ARD: if self.ARD:
target += tmp.sum(0).sum(0).sum(0) target += tmp.sum(0).sum(0).sum(0)
else: else:
target += tmp.sum() target += tmp.sum()
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S): def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
"""Think N,M,M,Q """ """Think N,M,M,Q """
self._psi_computations(Z,mu,S) self._psi_computations(Z, mu, S)
tmp = self.ZZ*np.square(self.variances) # M,M,Q tmp = self.ZZ * np.square(self.variances) # M,M,Q
target_mu += (dL_dpsi2[:,:,:,None]*tmp*2.*mu[:,None,None,:]).sum(1).sum(1) # import ipdb;ipdb.set_trace()
target_S += (dL_dpsi2[:,:,:,None]*tmp).sum(1).sum(1) target_mu += (dL_dpsi2[:, :, :, None] * tmp * 2.*mu[:, None, None, :]).sum(1).sum(1)
target_S += (dL_dpsi2[:, :, :, None] * tmp).sum(1).sum(1) * S.shape[0]
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target): def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z,mu,S) self._psi_computations(Z, mu, S)
mu2_S = np.sum(self.mu2_S,0)# Q, # mu2_S = np.sum(self.mu2_S, 0) # Q,
target += (dL_dpsi2[:,:,:,None] * (self.mu2_S[:,None,None,:]*(Z*np.square(self.variances)[None,:])[None,None,:,:])).sum(0).sum(1) # import ipdb;ipdb.set_trace()
#TODO: tensordot would gain some time here # prod = (np.eye(Z.shape[0])[:, None, :, None] * (np.dot(self.ZA, self.inner) * self.variances)[None, :, None])
# psi2_dZ = prod.swapaxes(0, 1) + prod
psi2_dZ_old = (dL_dpsi2[:, :, :, None] * (self.mu2_S[:, None, None, :] * (Z * np.square(self.variances)[None, :])[None, None, :, :])).sum(0).sum(1)
target += psi2_dZ_old # .sum(0).sum(1)
# TODO: tensordot would gain some time here
#---------------------------------------# #---------------------------------------#
# Precomputations # # Precomputations #
#---------------------------------------# #---------------------------------------#
def _K_computations(self,X,X2): def _K_computations(self, X, X2):
if not (np.array_equal(X, self._Xcache) and np.array_equal(X2, self._X2cache)): if not (np.array_equal(X, self._Xcache) and np.array_equal(X2, self._X2cache)):
self._Xcache = X.copy() self._Xcache = X.copy()
if X2 is None: if X2 is None:
@ -177,16 +199,18 @@ class linear(kernpart):
self._X2cache = None self._X2cache = None
else: else:
self._X2cache = X2.copy() self._X2cache = X2.copy()
self._dot_product = np.dot(X,X2.T) self._dot_product = np.dot(X, X2.T)
def _psi_computations(self,Z,mu,S): def _psi_computations(self, Z, mu, S):
#here are the "statistics" for psi1 and psi2 # here are the "statistics" for psi1 and psi2
if not np.all(Z==self._Z): if not np.all(Z == self._Z):
#Z has changed, compute Z specific stuff # Z has changed, compute Z specific stuff
#self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q # self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
self.ZZ = np.empty((Z.shape[0],Z.shape[0],Z.shape[1]),order='F') self.ZZ = np.empty((Z.shape[0], Z.shape[0], Z.shape[1]), order='F')
[tdot(Z[:,i:i+1],self.ZZ[:,:,i].T) for i in xrange(Z.shape[1])] [tdot(Z[:, i:i + 1], self.ZZ[:, :, i].T) for i in xrange(Z.shape[1])]
self._Z = Z.copy() self._Z = Z.copy()
if not (np.all(mu==self._mu) and np.all(S==self._S)): self.ZA = Z * self.variances
self.mu2_S = np.square(mu)+S if not (np.all(mu == self._mu) and np.all(S == self._S)):
self.mu2_S = np.square(mu) + S
self.inner = tdot(mu.T) + (np.diag(S.sum(0)))
self._mu, self._S = mu.copy(), S.copy() self._mu, self._S = mu.copy(), S.copy()

View file

@ -308,6 +308,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
Slatentgrads = ax3.quiver(xlatent, S, Ulatent, Sg, color=colors, Slatentgrads = ax3.quiver(xlatent, S, Ulatent, Sg, color=colors,
units=quiver_units, scale_units=quiver_scale_units, units=quiver_units, scale_units=quiver_scale_units,
scale=quiver_scale) scale=quiver_scale)
ax3.set_ylim(0, 1.)
xZ = np.tile(np.arange(0, Z.shape[0])[:, None], Z.shape[1]) xZ = np.tile(np.arange(0, Z.shape[0])[:, None], Z.shape[1])
UZ = np.zeros_like(Z) UZ = np.zeros_like(Z)
@ -427,11 +428,11 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
cbarkmmdl.update_normal(imkmmdl) cbarkmmdl.update_normal(imkmmdl)
ax2.relim() ax2.relim()
ax3.relim() # ax3.relim()
ax4.relim() ax4.relim()
ax5.relim() ax5.relim()
ax2.autoscale() ax2.autoscale()
ax3.autoscale() # ax3.autoscale()
ax4.autoscale() ax4.autoscale()
ax5.autoscale() ax5.autoscale()

View file

@ -30,22 +30,22 @@ class sparse_GP(GP):
""" """
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False): def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
self.scale_factor = 100.0 # a scaling factor to help keep the algorithm stable self.scale_factor = 100.0# a scaling factor to help keep the algorithm stable
self.auto_scale_factor = False self.auto_scale_factor = False
self.Z = Z self.Z = Z
self.M = Z.shape[0] self.M = Z.shape[0]
self.likelihood = likelihood self.likelihood = likelihood
if X_variance is None: if X_variance is None:
self.has_uncertain_inputs = False self.has_uncertain_inputs=False
else: else:
assert X_variance.shape == X.shape assert X_variance.shape==X.shape
self.has_uncertain_inputs = True self.has_uncertain_inputs=True
self.X_variance = X_variance self.X_variance = X_variance
GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X) GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X)
# normalize X uncertainty also #normalize X uncertainty also
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.X_variance /= np.square(self._Xstd) self.X_variance /= np.square(self._Xstd)
@ -54,155 +54,156 @@ class sparse_GP(GP):
# kernel computations, using BGPLVM notation # kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z) self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z, self.X, self.X_variance) self.psi0 = self.kern.psi0(self.Z,self.X, self.X_variance)
self.psi1 = self.kern.psi1(self.Z, self.X, self.X_variance).T self.psi1 = self.kern.psi1(self.Z,self.X, self.X_variance).T
self.psi2 = self.kern.psi2(self.Z, self.X, self.X_variance) self.psi2 = self.kern.psi2(self.Z,self.X, self.X_variance)
else: else:
self.psi0 = self.kern.Kdiag(self.X) self.psi0 = self.kern.Kdiag(self.X)
self.psi1 = self.kern.K(self.Z, self.X) self.psi1 = self.kern.K(self.Z,self.X)
self.psi2 = None self.psi2 = None
def _computations(self): def _computations(self):
# TODO: find routine to multiply triangular matrices #TODO: find routine to multiply triangular matrices
sf = self.scale_factor sf = self.scale_factor
sf2 = sf ** 2 sf2 = sf**2
# The rather complex computations of psi2_beta_scaled #The rather complex computations of psi2_beta_scaled
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
assert self.likelihood.D == 1 # TODO: what if the likelihood is heterscedatic and there are multiple independent outputs? assert self.likelihood.D == 1 #TODO: what if the likelihood is heterscedatic and there are multiple independent outputs?
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.psi2_beta_scaled = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1) / sf2)).sum(0) self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0)
else: else:
tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N)) / sf) tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf)
# self.psi2_beta_scaled = np.dot(tmp,tmp.T) #self.psi2_beta_scaled = np.dot(tmp,tmp.T)
self.psi2_beta_scaled = tdot(tmp) self.psi2_beta_scaled = tdot(tmp)
else: else:
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.psi2_beta_scaled = (self.psi2 * (self.likelihood.precision / sf2)).sum(0) self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0)
else: else:
tmp = self.psi1 * (np.sqrt(self.likelihood.precision) / sf) tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
# self.psi2_beta_scaled = np.dot(tmp,tmp.T) #self.psi2_beta_scaled = np.dot(tmp,tmp.T)
self.psi2_beta_scaled = tdot(tmp) self.psi2_beta_scaled = tdot(tmp)
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm) self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
self.V = (self.likelihood.precision / self.scale_factor) * self.likelihood.Y self.V = (self.likelihood.precision/self.scale_factor)*self.likelihood.Y
# Compute A = L^-1 psi2 beta L^-T #Compute A = L^-1 psi2 beta L^-T
# self. A = mdot(self.Lmi,self.psi2_beta_scaled,self.Lmi.T) #self. A = mdot(self.Lmi,self.psi2_beta_scaled,self.Lmi.T)
tmp = linalg.lapack.flapack.dtrtrs(self.Lm, self.psi2_beta_scaled.T, lower=1)[0] tmp = linalg.lapack.flapack.dtrtrs(self.Lm,self.psi2_beta_scaled.T,lower=1)[0]
self.A = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp.T), lower=1)[0] self.A = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1)[0]
self.B = np.eye(self.M) / sf2 + self.A self.B = np.eye(self.M)/sf2 + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.psi1V = np.dot(self.psi1, self.V) self.psi1V = np.dot(self.psi1, self.V)
tmp = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.Bi), lower=1, trans=1)[0] tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.Bi),lower=1,trans=1)[0]
self.C = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp.T), lower=1, trans=1)[0] self.C = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0]
# self.Cpsi1V = np.dot(self.C,self.psi1V) #back substutue C into psi1V
# back substitute C into psi1V tmp,info1 = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.psi1V),lower=1,trans=0)
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0) tmp,info2 = linalg.lapack.flapack.dpotrs(self.LB,tmp,lower=1)
tmp, _ = linalg.lapack.flapack.dpotrs(self.LB, tmp, lower=1) self.Cpsi1V,info3 = linalg.lapack.flapack.dtrtrs(self.Lm,tmp,lower=1,trans=1)
self.Cpsi1V, _ = linalg.lapack.flapack.dtrtrs(self.Lm, tmp, lower=1, trans=1) #self.Cpsi1V = np.dot(self.C,self.psi1V)
self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V, self.psi1V.T) # TODO: stabilize? self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
self.E = tdot(self.Cpsi1V / sf)
self.E = tdot(self.Cpsi1V/sf)
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
self.dL_dpsi0 = -0.5 * self.D * (self.likelihood.precision * np.ones([self.N, 1])).flatten() self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
self.dL_dpsi1 = np.dot(self.Cpsi1V, self.V.T) self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T)
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
# self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB #self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
# self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC #self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
# self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD #self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
self.dL_dpsi2 = 0.5 * self.likelihood.precision[:, None, None] * (self.D * (self.Kmmi - self.C / sf2) - self.E)[None, :, :] self.dL_dpsi2 = 0.5*self.likelihood.precision[:,None,None]*(self.D*(self.Kmmi - self.C/sf2) -self.E)[None,:,:]
else: else:
# self.dL_dpsi1 += mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB #self.dL_dpsi1 += mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
# self.dL_dpsi1 += -mdot(self.C,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)/sf2) #dC #self.dL_dpsi1 += -mdot(self.C,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)/sf2) #dC
# self.dL_dpsi1 += -mdot(self.E,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dD #self.dL_dpsi1 += -mdot(self.E,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dD
self.dL_dpsi1 += np.dot(self.Kmmi - self.C / sf2 - self.E, self.psi1 * self.likelihood.precision.reshape(1, self.N)) self.dL_dpsi1 += np.dot(self.Kmmi - self.C/sf2 -self.E,self.psi1*self.likelihood.precision.reshape(1,self.N))
self.dL_dpsi2 = None self.dL_dpsi2 = None
else: else:
# self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB #self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB
# self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C # dC #self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C # dC
# self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E # dD #self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E # dD
self.dL_dpsi2 = 0.5 * self.likelihood.precision * (self.D * (self.Kmmi - self.C / sf2) - self.E) self.dL_dpsi2 = 0.5*self.likelihood.precision*(self.D*(self.Kmmi - self.C/sf2) -self.E)
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
# repeat for each of the N psi_2 matrices #repeat for each of the N psi_2 matrices
self.dL_dpsi2 = np.repeat(self.dL_dpsi2[None, :, :], self.N, axis=0) self.dL_dpsi2 = np.repeat(self.dL_dpsi2[None,:,:],self.N,axis=0)
else: else:
self.dL_dpsi1 += 2.*np.dot(self.dL_dpsi2, self.psi1) self.dL_dpsi1 += 2.*np.dot(self.dL_dpsi2,self.psi1)
self.dL_dpsi2 = None self.dL_dpsi2 = None
# Compute dL_dKmm # Compute dL_dKmm
# self.dL_dKmm_old = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB #self.dL_dKmm_old = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
# self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC #self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
# self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD #self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
tmp = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.B), lower=1, trans=1)[0] tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.B),lower=1,trans=1)[0]
self.dL_dKmm = -0.5 * self.D * sf2 * linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp.T), lower=1, trans=1)[0] # dA self.dL_dKmm = -0.5*self.D*sf2*linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0] #dA
tmp = np.dot(self.D * self.C + self.E * sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1 tmp = np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1
tmp = linalg.lapack.flapack.dpotrs(self.Lm, np.asfortranarray(tmp.T), lower=1)[0].T tmp = linalg.lapack.flapack.dpotrs(self.Lm,np.asfortranarray(tmp.T),lower=1)[0].T
self.dL_dKmm += 0.5 * (self.D * self.C / sf2 + self.E) + tmp # d(C+D) self.dL_dKmm += 0.5*(self.D*self.C/sf2 + self.E) +tmp # d(C+D)
# the partial derivative vector for the likelihood #the partial derivative vector for the likelihood
if self.likelihood.Nparams == 0: if self.likelihood.Nparams ==0:
# save computation here. #save computation here.
self.partial_for_likelihood = None self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic: elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented" raise NotImplementedError, "heteroscedatic derivates not implemented"
# self.partial_for_likelihood = - 0.5 * self.D*self.likelihood.precision + 0.5 * (self.likelihood.Y**2).sum(1)*self.likelihood.precision**2 #dA #self.partial_for_likelihood = - 0.5 * self.D*self.likelihood.precision + 0.5 * (self.likelihood.Y**2).sum(1)*self.likelihood.precision**2 #dA
# self.partial_for_likelihood += 0.5 * self.D * (self.psi0*self.likelihood.precision**2 - (self.psi2*self.Kmmi[None,:,:]*self.likelihood.precision[:,None,None]**2).sum(1).sum(1)/sf2) #dB #self.partial_for_likelihood += 0.5 * self.D * (self.psi0*self.likelihood.precision**2 - (self.psi2*self.Kmmi[None,:,:]*self.likelihood.precision[:,None,None]**2).sum(1).sum(1)/sf2) #dB
# self.partial_for_likelihood += 0.5 * self.D * np.sum(self.Bi*self.A)*self.likelihood.precision #dC #self.partial_for_likelihood += 0.5 * self.D * np.sum(self.Bi*self.A)*self.likelihood.precision #dC
# self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD #self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD
else: else:
# likelihood is not heterscedatic #likelihood is not heterscedatic
self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2 self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * self.likelihood.trYYT*self.likelihood.precision**2
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision * sf2) self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2)
self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi, self.A) * self.likelihood.precision self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision
self.partial_for_likelihood += self.likelihood.precision * (0.5 * trace_dot(self.psi2_beta_scaled, self.E * sf2) - np.trace(self.Cpsi1VVpsi1)) self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))
def log_likelihood(self): def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """ """ Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor ** 2 sf2 = self.scale_factor**2
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y) A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2) B = -0.5*self.D*(np.sum(self.likelihood.precision.flatten()*self.psi0) - np.trace(self.A)*sf2)
else: else:
A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT
B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2) B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2)
C = -0.5 * self.D * (self.B_logdet + self.M * np.log(sf2)) C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
D = 0.5 * np.trace(self.Cpsi1VVpsi1) D = 0.5*np.trace(self.Cpsi1VVpsi1)
return A + B + C + D return A+B+C+D
def _set_params(self, p): def _set_params(self, p):
self.Z = p[:self.M * self.Q].reshape(self.M, self.Q) self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam]) self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:]) self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
self._compute_kernel_matrices() self._compute_kernel_matrices()
if self.auto_scale_factor: if self.auto_scale_factor:
self.scale_factor = np.sqrt(self.psi2.sum(0).mean() * self.likelihood.precision) self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
# if self.auto_scale_factor: #if self.auto_scale_factor:
# if self.likelihood.is_heteroscedastic: # if self.likelihood.is_heteroscedastic:
# self.scale_factor = max(1,np.sqrt(self.psi2_beta_scaled.sum(0).mean())) # self.scale_factor = max(1,np.sqrt(self.psi2_beta_scaled.sum(0).mean()))
# else: # else:
# self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision) # self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
# self.scale_factor = 1. #self.scale_factor = 1.
self._computations() self._computations()
def _get_params(self): def _get_params(self):
return np.hstack([self.Z.flatten(), GP._get_params(self)]) return np.hstack([self.Z.flatten(),GP._get_params(self)])
def _get_param_names(self): def _get_param_names(self):
return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])], []) + GP._get_param_names(self) return sum([['iip_%i_%i'%(i,j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])],[]) + GP._get_param_names(self)
def update_likelihood_approximation(self): def update_likelihood_approximation(self):
""" """
@ -214,9 +215,9 @@ class sparse_GP(GP):
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
raise NotImplementedError, "EP approximation not implemented for uncertain inputs" raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
else: else:
self.likelihood.fit_DTC(self.Kmm, self.psi1) self.likelihood.fit_DTC(self.Kmm,self.psi1)
# self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) #self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP self._set_params(self._get_params()) # update the GP
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
@ -226,13 +227,13 @@ class sparse_GP(GP):
""" """
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
""" """
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z) dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z)
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance) dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_variance)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T, self.Z, self.X, self.X_variance) dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_variance)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance) dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z,self.X, self.X_variance)
else: else:
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.Z, self.X) dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1,self.Z,self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X) dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
return dL_dtheta return dL_dtheta
@ -243,22 +244,22 @@ class sparse_GP(GP):
""" """
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance) dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_variance)
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance) dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else: else:
dL_dZ += self.kern.dK_dX(self.dL_dpsi1, self.Z, self.X) dL_dZ += self.kern.dK_dX(self.dL_dpsi1,self.Z,self.X)
return dL_dZ return dL_dZ
def _raw_predict(self, Xnew, which_parts='all', full_cov=False): def _raw_predict(self, Xnew, which_parts='all', full_cov=False):
"""Internal helper function for making predictions, does not account for normalization""" """Internal helper function for making predictions, does not account for normalization"""
Kx = self.kern.K(self.Z, Xnew) Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C / self.scale_factor, self.psi1V) mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov: if full_cov:
Kxx = self.kern.K(Xnew, which_parts=which_parts) Kxx = self.kern.K(Xnew,which_parts=which_parts)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C / self.scale_factor ** 2), Kx) # NOTE this won't work for plotting var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
else: else:
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts) Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
var = Kxx - np.sum(Kx * np.dot(self.Kmmi - self.C / self.scale_factor ** 2, Kx), 0) var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu, var[:, None] return mu,var[:,None]

View file

@ -5,7 +5,7 @@ Created on 26 Apr 2013
''' '''
import unittest import unittest
import numpy import numpy
from GPy.inference.conjugate_gradient_descent import CGD from GPy.inference.conjugate_gradient_descent import CGD, RUNNING
import pylab import pylab
import time import time
from scipy.optimize.optimize import rosen, rosen_der from scipy.optimize.optimize import rosen, rosen_der
@ -14,17 +14,62 @@ from scipy.optimize.optimize import rosen, rosen_der
class Test(unittest.TestCase): class Test(unittest.TestCase):
def testMinimizeSquare(self): def testMinimizeSquare(self):
f = lambda x: x ** 2 + 2 * x - 2 N = 2
A = numpy.random.rand(N) * numpy.eye(N)
b = numpy.random.rand(N) * 0
f = lambda x: numpy.dot(x.T.dot(A), x) - numpy.dot(x.T, b)
df = lambda x: numpy.dot(A, x) - b
opt = CGD()
restarts = 10
for _ in range(restarts):
try:
x0 = numpy.random.randn(N) * .5
res = opt.fmin(f, df, x0, messages=0,
maxiter=1000, gtol=1e-10)
assert numpy.allclose(res[0], 0, atol=1e-3)
break
except:
# RESTART
pass
else:
raise AssertionError("Test failed for {} restarts".format(restarts))
def testRosen(self):
N = 2
f = rosen
df = rosen_der
x0 = numpy.random.randn(N) * .5
opt = CGD()
restarts = 10
for _ in range(restarts):
try:
x0 = numpy.random.randn(N) * .5
res = opt.fmin(f, df, x0, messages=0,
maxiter=1000, gtol=1e-10)
assert numpy.allclose(res[0], 1, atol=1e-5)
break
except:
# RESTART
pass
else:
raise AssertionError("Test failed for {} restarts".format(restarts))
if __name__ == "__main__": if __name__ == "__main__":
# import sys;sys.argv = ['', 'Test.testMinimizeSquare'] # import sys;sys.argv = ['',
# 'Test.testMinimizeSquare',
# 'Test.testRosen',
# ]
# unittest.main() # unittest.main()
N = 2 N = 2
A = numpy.random.rand(N) * numpy.eye(N) A = numpy.random.rand(N) * numpy.eye(N)
b = numpy.random.rand(N) b = numpy.random.rand(N) * 0
# f = lambda x: numpy.dot(x.T.dot(A), x) + numpy.dot(x.T, b) # f = lambda x: numpy.dot(x.T.dot(A), x) - numpy.dot(x.T, b)
# df = lambda x: numpy.dot(A, x) - b # df = lambda x: numpy.dot(A, x) - b
f = rosen f = rosen
df = rosen_der df = rosen_der
x0 = numpy.random.randn(N) * .5 x0 = numpy.random.randn(N) * .5
@ -48,14 +93,21 @@ if __name__ == "__main__":
optplts, = ax.plot3D([x0[0]], [x0[1]], zs=f(x0), marker='o', color='r') optplts, = ax.plot3D([x0[0]], [x0[1]], zs=f(x0), marker='o', color='r')
raw_input("enter to start optimize") raw_input("enter to start optimize")
res = [0]
def callback(x, *a, **kw): def callback(*r):
xopts.append(x.copy()) xopts.append(r[0].copy())
# time.sleep(.3) # time.sleep(.3)
optplts._verts3d = [numpy.array(xopts)[:, 0], numpy.array(xopts)[:, 1], [f(xs) for xs in xopts]] optplts._verts3d = [numpy.array(xopts)[:, 0], numpy.array(xopts)[:, 1], [f(xs) for xs in xopts]]
fig.canvas.draw() fig.canvas.draw()
if r[-1] != RUNNING:
res[0] = r
p, c = opt.fmin_async(f, df, x0.copy(), callback, messages=True, maxiter=1000,
report_every=20, gtol=1e-12)
res = opt.fmin(f, df, x0, callback, messages=True, maxiter=1000, report_every=1)
pylab.ion() pylab.ion()
pylab.show() pylab.show()
pass

View file

@ -9,21 +9,30 @@ import numpy as np
import pylab import pylab
__test__ = False __test__ = False
np.random.seed(0)
def ard(p):
try:
if p.ARD:
return "ARD"
except:
pass
return ""
class Test(unittest.TestCase): class Test(unittest.TestCase):
D = 9 D = 9
M = 5 M = 3
Nsamples = 3e6 Nsamples = 6e6
def setUp(self): def setUp(self):
self.kerns = ( self.kerns = (
GPy.kern.rbf(self.D), GPy.kern.rbf(self.D, ARD=True), # GPy.kern.rbf(self.D), GPy.kern.rbf(self.D, ARD=True),
GPy.kern.linear(self.D), GPy.kern.linear(self.D, ARD=True), GPy.kern.linear(self.D, ARD=False), GPy.kern.linear(self.D, ARD=True),
GPy.kern.linear(self.D) + GPy.kern.bias(self.D), GPy.kern.linear(self.D) + GPy.kern.bias(self.D),
GPy.kern.rbf(self.D) + GPy.kern.bias(self.D), # GPy.kern.rbf(self.D) + GPy.kern.bias(self.D),
GPy.kern.linear(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D), GPy.kern.linear(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
GPy.kern.rbf(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D), # GPy.kern.rbf(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
GPy.kern.bias(self.D), GPy.kern.white(self.D), # GPy.kern.bias(self.D), GPy.kern.white(self.D),
) )
self.q_x_mean = np.random.randn(self.D) self.q_x_mean = np.random.randn(self.D)
self.q_x_variance = np.exp(np.random.randn(self.D)) self.q_x_variance = np.exp(np.random.randn(self.D))
@ -66,18 +75,21 @@ class Test(unittest.TestCase):
K_ += K K_ += K
diffs.append(((psi2 - (K_ / (i + 1))) ** 2).mean()) diffs.append(((psi2 - (K_ / (i + 1))) ** 2).mean())
K_ /= self.Nsamples / Nsamples K_ /= self.Nsamples / Nsamples
msg = "psi2: {}".format("+".join([p.name + ard(p) for p in kern.parts]))
try: try:
# pylab.figure("+".join([p.name for p in kern.parts]) + "psi2") pylab.figure(msg)
# pylab.plot(diffs) pylab.plot(diffs)
self.assertTrue(np.allclose(psi2.squeeze(), K_, self.assertTrue(np.allclose(psi2.squeeze(), K_,
rtol=1e-1, atol=.1), rtol=1e-1, atol=.1),
msg="{}: not matching".format("+".join([p.name for p in kern.parts]))) msg=msg + ": not matching")
except: except:
print "{}: not matching".format(kern.parts[0].name) import ipdb;ipdb.set_trace()
kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
print msg + ": not matching"
if __name__ == "__main__": if __name__ == "__main__":
import sys;sys.argv = ['', import sys;sys.argv = ['',
'Test.test_psi0', # 'Test.test_psi0',
'Test.test_psi1', # 'Test.test_psi1',
'Test.test_psi2'] 'Test.test_psi2']
unittest.main() unittest.main()

View file

@ -106,18 +106,18 @@ if __name__ == "__main__":
import sys import sys
interactive = 'i' in sys.argv interactive = 'i' in sys.argv
if interactive: if interactive:
N, M, Q, D = 30, 5, 4, 30 # N, M, Q, D = 30, 5, 4, 30
X = numpy.random.rand(N, Q) # X = numpy.random.rand(N, Q)
k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) # k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X) # K = k.K(X)
Y = numpy.random.multivariate_normal(numpy.zeros(N), K, D).T # Y = numpy.random.multivariate_normal(numpy.zeros(N), K, D).T
Y -= Y.mean(axis=0) # Y -= Y.mean(axis=0)
k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) # k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M) # m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M)
m.ensure_default_constraints() # m.ensure_default_constraints()
m.randomize() # m.randomize()
# self.assertTrue(m.checkgrad()) # # self.assertTrue(m.checkgrad())
numpy.random.seed(0)
Q = 5 Q = 5
N = 50 N = 50
M = 10 M = 10
@ -126,11 +126,11 @@ if __name__ == "__main__":
X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1) X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
Z = numpy.random.permutation(X)[:M] Z = numpy.random.permutation(X)[:M]
Y = X.dot(numpy.random.randn(Q, D)) Y = X.dot(numpy.random.randn(Q, D))
kernel = GPy.kern.bias(Q) # kernel = GPy.kern.bias(Q)
#
kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q), # kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q),
GPy.kern.linear(Q) + GPy.kern.bias(Q), # GPy.kern.linear(Q) + GPy.kern.bias(Q),
GPy.kern.rbf(Q) + GPy.kern.bias(Q)] # GPy.kern.rbf(Q) + GPy.kern.bias(Q)]
# for k in kernels: # for k in kernels:
# m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z, # m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
@ -143,11 +143,13 @@ if __name__ == "__main__":
# M=M, kernel=kernel) # M=M, kernel=kernel)
# m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z, # m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=kernel) # M=M, kernel=kernel)
m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, # m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
M=M, kernel=GPy.kern.rbf(Q)) # M=M, kernel=GPy.kern.rbf(Q))
m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
M=M, kernel=GPy.kern.linear(Q) + GPy.kern.bias(Q)) M=M, kernel=GPy.kern.linear(Q))
m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, m3.ensure_default_constraints()
M=M, kernel=GPy.kern.rbf(Q) + GPy.kern.bias(Q)) # + GPy.kern.bias(Q))
# m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=GPy.kern.rbf(Q) + GPy.kern.bias(Q))
else: else:
unittest.main() unittest.main()