LinearCF Psi Stat not working yet, strange bug in psi computations

This commit is contained in:
Max Zwiessele 2013-05-01 17:09:38 +01:00
parent c502b66ea3
commit 42474f0044
8 changed files with 353 additions and 244 deletions

View file

@ -176,20 +176,34 @@ def bgplvm_simulation_matlab_compare():
Y = sim_data['Y']
S = sim_data['S']
mu = sim_data['mu']
M, [_, Q] = 20, mu.shape
M, [_, Q] = 30, mu.shape
Q = 2
from GPy.models import mrd
from GPy import kern
reload(mrd); reload(kern)
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
k = kern.rbf(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k,
# X=mu,
# X_variance=S,
_debug=True)
m.ensure_default_constraints()
m.auto_scale_factor = True
m['noise'] = .01 # Y.var() / 100.
m['{}_variance'.format(k.parts[0].name)] = .01
m['noise'] = Y.var() / 100.
lscstr = '{}'.format(k.parts[0].name)
# m[lscstr] = .01
m.unconstrain(lscstr); m.constrain_fixed(lscstr, 10)
lscstr = 'X_variance'
# m[lscstr] = .01
m.unconstrain(lscstr); m.constrain_fixed(lscstr, .1)
# cstr = 'white'
# m.unconstrain(cstr); m.constrain_bounded(cstr, .01, 1.)
# cstr = 'noise'
# m.unconstrain(cstr); m.constrain_bounded(cstr, .01, 1.)
return m
def bgplvm_simulation(burnin='scg', plot_sim=False,

View file

@ -4,14 +4,14 @@ Created on 24 Apr 2013
@author: maxz
'''
from GPy.inference.gradient_descent_update_rules import FletcherReeves
import numpy
from multiprocessing import Value
from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2
from multiprocessing.synchronize import Event
from multiprocessing.queues import Queue
from Queue import Empty
import sys
from multiprocessing import Value
from multiprocessing.queues import Queue
from multiprocessing.synchronize import Event
from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2
from threading import Thread
import numpy
import sys
RUNNING = "running"
CONVERGED = "converged"
@ -20,10 +20,9 @@ MAX_F_EVAL = "maximum number of function calls reached"
LINE_SEARCH = "line search failed"
KBINTERRUPT = "interrupted"
SENTINEL = None
class _Async_Optimization(Thread):
def __init__(self, f, df, x0, update_rule, runsignal,
def __init__(self, f, df, x0, update_rule, runsignal, SENTINEL,
report_every=10, messages=0, maxiter=5e3, max_f_eval=15e3,
gtol=1e-6, outqueue=None, *args, **kw):
"""
@ -42,6 +41,7 @@ class _Async_Optimization(Thread):
self.maxiter = maxiter
self.max_f_eval = max_f_eval
self.gtol = gtol
self.SENTINEL = SENTINEL
self.runsignal = runsignal
# self.parent = parent
# self.result = None
@ -70,7 +70,7 @@ class _Async_Optimization(Thread):
def callback_return(self, *a):
self.callback(*a)
self.outq.put(SENTINEL)
self.outq.put(self.SENTINEL)
self.runsignal.clear()
def run(self, *args, **kwargs):
@ -136,7 +136,7 @@ class _CGDAsync(_Async_Optimization):
if gfi is not None:
gi = gfi
if fi_old > fi:
if numpy.isnan(fi) or fi_old < fi:
gi, ur, si = self.reset(xi, *a, **kw)
else:
xi += numpy.dot(alphai, si)
@ -146,21 +146,22 @@ class _CGDAsync(_Async_Optimization):
sys.stdout.write("iteration: {0:> 6g} f:{1:> 12e} |g|:{2:> 12e}".format(it, fi, numpy.dot(gi.T, gi)))
if it % self.report_every == 0:
self.callback(xi, fi, it, self.f_call.value, self.df_call.value, status)
self.callback(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
it += 1
else:
status = MAXITER
# self.result = [xi, fi, it, self.f_call.value, self.df_call.value, status]
self.callback_return(xi, fi, it, self.f_call.value, self.df_call.value, status)
self.callback_return(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
self.result = [xi, fi, gi, it, self.f_call.value, self.df_call.value, status]
class Async_Optimize(object):
callback = lambda *x: None
runsignal = Event()
SENTINEL = "SENTINEL"
def async_callback_collect(self, q):
while self.runsignal.is_set():
try:
for ret in iter(lambda: q.get(timeout=1), SENTINEL):
for ret in iter(lambda: q.get(timeout=1), self.SENTINEL):
self.callback(*ret)
except Empty:
pass
@ -169,12 +170,12 @@ class Async_Optimize(object):
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, *args, **kwargs):
self.runsignal.set()
outqueue = Queue(5)
outqueue = Queue()
if callback:
self.callback = callback
c = Thread(target=self.async_callback_collect, args=(outqueue,))
c.start()
p = _CGDAsync(f, df, x0, update_rule, self.runsignal,
p = _CGDAsync(f, df, x0, update_rule, self.runsignal, self.SENTINEL,
report_every=report_every, messages=messages, maxiter=maxiter,
max_f_eval=max_f_eval, gtol=gtol, outqueue=outqueue, *args, **kwargs)
p.run()
@ -189,12 +190,14 @@ class Async_Optimize(object):
while self.runsignal.is_set():
try:
p.join(1)
c.join(1)
# c.join(1)
except KeyboardInterrupt:
# print "^C"
self.runsignal.clear()
p.join()
c.join()
if c.is_alive():
print "WARNING: callback still running, optimisation done!"
return p.result
class CGD(Async_Optimize):
'''
@ -215,7 +218,7 @@ class CGD(Async_Optimize):
callback gets called every `report_every` iterations
callback(xi, fi, iteration, function_calls, gradient_calls, status_message)
callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
if df returns tuple (grad, natgrad) it will optimize according
to natural gradient rules
@ -233,7 +236,7 @@ class CGD(Async_Optimize):
**calls**
---------
callback(x_opt, f_opt, iteration, function_calls, gradient_calls, status_message)
callback(x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message)
at end of optimization!
"""
@ -247,7 +250,7 @@ class CGD(Async_Optimize):
Minimize f, calling callback every `report_every` iterations with following syntax:
callback(xi, fi, iteration, function_calls, gradient_calls, status_message)
callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
if df returns tuple (grad, natgrad) it will optimize according
to natural gradient rules
@ -260,7 +263,7 @@ class CGD(Async_Optimize):
**returns**
---------
x_opt, f_opt, iteration, function_calls, gradient_calls, status_message
x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message
at end of optimization
"""

View file

@ -5,6 +5,7 @@
from kernpart import kernpart
import numpy as np
from ..util.linalg import tdot
from GPy.util.linalg import mdot
class linear(kernpart):
"""
@ -140,9 +141,25 @@ class linear(kernpart):
returns N,M,M matrix
"""
self._psi_computations(Z, mu, S)
#psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :]
# psi2_old = self.ZZ * np.square(self.variances) * self.mu2_S[:, None, None, :]
# target += psi2.sum(-1)
target += np.tensordot(self.ZZ[None,:,:,:]*np.square(self.variances),self.mu2_S[:, None, None, :],((3),(3))).squeeze().T
# slow way of doing it, but right
psi2_real = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
for n in range(mu.shape[0]):
for m_prime in range(Z.shape[0]):
for m in range(Z.shape[0]):
tmp = self._Z[m:m + 1] * self.variances
tmp = np.dot(tmp, (tdot(self._mu[n:n + 1].T) + np.diag(S[n:n + 1])))
psi2_real[n, m, m_prime] = np.dot(tmp, (
self._Z[m_prime:m_prime + 1] * self.variances).T)
psi2_inner = mdot(self.ZA, self.inner, self.ZA.T)
mu2_S = (self._mu[:, None] * self._mu[:, :, None]) + self._S[:, :, None]
psi2 = (self.ZA[None, :, None, :] * mu2_S[:, None]).sum(-1)
psi2 = (psi2[:, :, None] * self.ZA[None, None]).sum(-1)
# psi2_tensor = np.tensordot(self.ZZ[None, :, :, :] * np.square(self.variances), self.mu2_S[:, None, None, :], ((3), (3))).squeeze().T
# import ipdb;ipdb.set_trace()
target += psi2_real
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
@ -156,13 +173,18 @@ class linear(kernpart):
"""Think N,M,M,Q """
self._psi_computations(Z, mu, S)
tmp = self.ZZ * np.square(self.variances) # M,M,Q
# import ipdb;ipdb.set_trace()
target_mu += (dL_dpsi2[:, :, :, None] * tmp * 2.*mu[:, None, None, :]).sum(1).sum(1)
target_S += (dL_dpsi2[:,:,:,None]*tmp).sum(1).sum(1)
target_S += (dL_dpsi2[:, :, :, None] * tmp).sum(1).sum(1) * S.shape[0]
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
mu2_S = np.sum(self.mu2_S,0)# Q,
target += (dL_dpsi2[:,:,:,None] * (self.mu2_S[:,None,None,:]*(Z*np.square(self.variances)[None,:])[None,None,:,:])).sum(0).sum(1)
# mu2_S = np.sum(self.mu2_S, 0) # Q,
# import ipdb;ipdb.set_trace()
# prod = (np.eye(Z.shape[0])[:, None, :, None] * (np.dot(self.ZA, self.inner) * self.variances)[None, :, None])
# psi2_dZ = prod.swapaxes(0, 1) + prod
psi2_dZ_old = (dL_dpsi2[:, :, :, None] * (self.mu2_S[:, None, None, :] * (Z * np.square(self.variances)[None, :])[None, None, :, :])).sum(0).sum(1)
target += psi2_dZ_old # .sum(0).sum(1)
# TODO: tensordot would gain some time here
#---------------------------------------#
@ -187,6 +209,8 @@ class linear(kernpart):
self.ZZ = np.empty((Z.shape[0], Z.shape[0], Z.shape[1]), order='F')
[tdot(Z[:, i:i + 1], self.ZZ[:, :, i].T) for i in xrange(Z.shape[1])]
self._Z = Z.copy()
self.ZA = Z * self.variances
if not (np.all(mu == self._mu) and np.all(S == self._S)):
self.mu2_S = np.square(mu) + S
self.inner = tdot(mu.T) + (np.diag(S.sum(0)))
self._mu, self._S = mu.copy(), S.copy()

View file

@ -308,6 +308,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
Slatentgrads = ax3.quiver(xlatent, S, Ulatent, Sg, color=colors,
units=quiver_units, scale_units=quiver_scale_units,
scale=quiver_scale)
ax3.set_ylim(0, 1.)
xZ = np.tile(np.arange(0, Z.shape[0])[:, None], Z.shape[1])
UZ = np.zeros_like(Z)
@ -427,11 +428,11 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
cbarkmmdl.update_normal(imkmmdl)
ax2.relim()
ax3.relim()
# ax3.relim()
ax4.relim()
ax5.relim()
ax2.autoscale()
ax3.autoscale()
# ax3.autoscale()
ax4.autoscale()
ax5.autoscale()

View file

@ -102,13 +102,14 @@ class sparse_GP(GP):
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.Bi),lower=1,trans=1)[0]
self.C = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0]
#back substutue C into psi1V
tmp,info1 = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.psi1V),lower=1,trans=0)
tmp,info2 = linalg.lapack.flapack.dpotrs(self.LB,tmp,lower=1)
self.Cpsi1V,info3 = linalg.lapack.flapack.dtrtrs(self.Lm,tmp,lower=1,trans=1)
#self.Cpsi1V = np.dot(self.C,self.psi1V)
# back substitute C into psi1V
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
tmp, _ = linalg.lapack.flapack.dpotrs(self.LB, tmp, lower=1)
self.Cpsi1V, _ = linalg.lapack.flapack.dtrtrs(self.Lm, tmp, lower=1, trans=1)
self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V, self.psi1V.T) # TODO: stabilize?
self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
self.E = tdot(self.Cpsi1V/sf)

View file

@ -5,7 +5,7 @@ Created on 26 Apr 2013
'''
import unittest
import numpy
from GPy.inference.conjugate_gradient_descent import CGD
from GPy.inference.conjugate_gradient_descent import CGD, RUNNING
import pylab
import time
from scipy.optimize.optimize import rosen, rosen_der
@ -14,17 +14,62 @@ from scipy.optimize.optimize import rosen, rosen_der
class Test(unittest.TestCase):
def testMinimizeSquare(self):
f = lambda x: x ** 2 + 2 * x - 2
if __name__ == "__main__":
# import sys;sys.argv = ['', 'Test.testMinimizeSquare']
# unittest.main()
N = 2
A = numpy.random.rand(N) * numpy.eye(N)
b = numpy.random.rand(N)
# f = lambda x: numpy.dot(x.T.dot(A), x) + numpy.dot(x.T, b)
# df = lambda x: numpy.dot(A, x) - b
b = numpy.random.rand(N) * 0
f = lambda x: numpy.dot(x.T.dot(A), x) - numpy.dot(x.T, b)
df = lambda x: numpy.dot(A, x) - b
opt = CGD()
restarts = 10
for _ in range(restarts):
try:
x0 = numpy.random.randn(N) * .5
res = opt.fmin(f, df, x0, messages=0,
maxiter=1000, gtol=1e-10)
assert numpy.allclose(res[0], 0, atol=1e-3)
break
except:
# RESTART
pass
else:
raise AssertionError("Test failed for {} restarts".format(restarts))
def testRosen(self):
N = 2
f = rosen
df = rosen_der
x0 = numpy.random.randn(N) * .5
opt = CGD()
restarts = 10
for _ in range(restarts):
try:
x0 = numpy.random.randn(N) * .5
res = opt.fmin(f, df, x0, messages=0,
maxiter=1000, gtol=1e-10)
assert numpy.allclose(res[0], 1, atol=1e-5)
break
except:
# RESTART
pass
else:
raise AssertionError("Test failed for {} restarts".format(restarts))
if __name__ == "__main__":
# import sys;sys.argv = ['',
# 'Test.testMinimizeSquare',
# 'Test.testRosen',
# ]
# unittest.main()
N = 2
A = numpy.random.rand(N) * numpy.eye(N)
b = numpy.random.rand(N) * 0
# f = lambda x: numpy.dot(x.T.dot(A), x) - numpy.dot(x.T, b)
# df = lambda x: numpy.dot(A, x) - b
f = rosen
df = rosen_der
x0 = numpy.random.randn(N) * .5
@ -48,14 +93,21 @@ if __name__ == "__main__":
optplts, = ax.plot3D([x0[0]], [x0[1]], zs=f(x0), marker='o', color='r')
raw_input("enter to start optimize")
res = [0]
def callback(x, *a, **kw):
xopts.append(x.copy())
def callback(*r):
xopts.append(r[0].copy())
# time.sleep(.3)
optplts._verts3d = [numpy.array(xopts)[:, 0], numpy.array(xopts)[:, 1], [f(xs) for xs in xopts]]
fig.canvas.draw()
if r[-1] != RUNNING:
res[0] = r
p, c = opt.fmin_async(f, df, x0.copy(), callback, messages=True, maxiter=1000,
report_every=20, gtol=1e-12)
res = opt.fmin(f, df, x0, callback, messages=True, maxiter=1000, report_every=1)
pylab.ion()
pylab.show()
pass

View file

@ -9,21 +9,30 @@ import numpy as np
import pylab
__test__ = False
np.random.seed(0)
def ard(p):
try:
if p.ARD:
return "ARD"
except:
pass
return ""
class Test(unittest.TestCase):
D = 9
M = 5
Nsamples = 3e6
M = 3
Nsamples = 6e6
def setUp(self):
self.kerns = (
GPy.kern.rbf(self.D), GPy.kern.rbf(self.D, ARD=True),
GPy.kern.linear(self.D), GPy.kern.linear(self.D, ARD=True),
# GPy.kern.rbf(self.D), GPy.kern.rbf(self.D, ARD=True),
GPy.kern.linear(self.D, ARD=False), GPy.kern.linear(self.D, ARD=True),
GPy.kern.linear(self.D) + GPy.kern.bias(self.D),
GPy.kern.rbf(self.D) + GPy.kern.bias(self.D),
# GPy.kern.rbf(self.D) + GPy.kern.bias(self.D),
GPy.kern.linear(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
GPy.kern.rbf(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
GPy.kern.bias(self.D), GPy.kern.white(self.D),
# GPy.kern.rbf(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
# GPy.kern.bias(self.D), GPy.kern.white(self.D),
)
self.q_x_mean = np.random.randn(self.D)
self.q_x_variance = np.exp(np.random.randn(self.D))
@ -66,18 +75,21 @@ class Test(unittest.TestCase):
K_ += K
diffs.append(((psi2 - (K_ / (i + 1))) ** 2).mean())
K_ /= self.Nsamples / Nsamples
msg = "psi2: {}".format("+".join([p.name + ard(p) for p in kern.parts]))
try:
# pylab.figure("+".join([p.name for p in kern.parts]) + "psi2")
# pylab.plot(diffs)
pylab.figure(msg)
pylab.plot(diffs)
self.assertTrue(np.allclose(psi2.squeeze(), K_,
rtol=1e-1, atol=.1),
msg="{}: not matching".format("+".join([p.name for p in kern.parts])))
msg=msg + ": not matching")
except:
print "{}: not matching".format(kern.parts[0].name)
import ipdb;ipdb.set_trace()
kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
print msg + ": not matching"
if __name__ == "__main__":
import sys;sys.argv = ['',
'Test.test_psi0',
'Test.test_psi1',
# 'Test.test_psi0',
# 'Test.test_psi1',
'Test.test_psi2']
unittest.main()

View file

@ -106,18 +106,18 @@ if __name__ == "__main__":
import sys
interactive = 'i' in sys.argv
if interactive:
N, M, Q, D = 30, 5, 4, 30
X = numpy.random.rand(N, Q)
k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = numpy.random.multivariate_normal(numpy.zeros(N), K, D).T
Y -= Y.mean(axis=0)
k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M)
m.ensure_default_constraints()
m.randomize()
# self.assertTrue(m.checkgrad())
# N, M, Q, D = 30, 5, 4, 30
# X = numpy.random.rand(N, Q)
# k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
# K = k.K(X)
# Y = numpy.random.multivariate_normal(numpy.zeros(N), K, D).T
# Y -= Y.mean(axis=0)
# k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
# m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M)
# m.ensure_default_constraints()
# m.randomize()
# # self.assertTrue(m.checkgrad())
numpy.random.seed(0)
Q = 5
N = 50
M = 10
@ -126,11 +126,11 @@ if __name__ == "__main__":
X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
Z = numpy.random.permutation(X)[:M]
Y = X.dot(numpy.random.randn(Q, D))
kernel = GPy.kern.bias(Q)
kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q),
GPy.kern.linear(Q) + GPy.kern.bias(Q),
GPy.kern.rbf(Q) + GPy.kern.bias(Q)]
# kernel = GPy.kern.bias(Q)
#
# kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q),
# GPy.kern.linear(Q) + GPy.kern.bias(Q),
# GPy.kern.rbf(Q) + GPy.kern.bias(Q)]
# for k in kernels:
# m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
@ -143,11 +143,13 @@ if __name__ == "__main__":
# M=M, kernel=kernel)
# m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=kernel)
m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
M=M, kernel=GPy.kern.rbf(Q))
# m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=GPy.kern.rbf(Q))
m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
M=M, kernel=GPy.kern.linear(Q) + GPy.kern.bias(Q))
m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
M=M, kernel=GPy.kern.rbf(Q) + GPy.kern.bias(Q))
M=M, kernel=GPy.kern.linear(Q))
m3.ensure_default_constraints()
# + GPy.kern.bias(Q))
# m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=GPy.kern.rbf(Q) + GPy.kern.bias(Q))
else:
unittest.main()