mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-12 21:42:39 +02:00
LinearCF Psi Stat not working yet, strange bug in psi computations
This commit is contained in:
parent
c502b66ea3
commit
42474f0044
8 changed files with 353 additions and 244 deletions
|
|
@ -82,7 +82,7 @@ def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300):
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
|
|
||||||
y = m.likelihood.Y[0, :]
|
y = m.likelihood.Y[0, :]
|
||||||
fig,(latent_axes,hist_axes) = plt.subplots(1,2)
|
fig, (latent_axes, hist_axes) = plt.subplots(1, 2)
|
||||||
plt.sca(latent_axes)
|
plt.sca(latent_axes)
|
||||||
m.plot_latent()
|
m.plot_latent()
|
||||||
data_show = GPy.util.visualize.vector_show(y)
|
data_show = GPy.util.visualize.vector_show(y)
|
||||||
|
|
@ -176,20 +176,34 @@ def bgplvm_simulation_matlab_compare():
|
||||||
Y = sim_data['Y']
|
Y = sim_data['Y']
|
||||||
S = sim_data['S']
|
S = sim_data['S']
|
||||||
mu = sim_data['mu']
|
mu = sim_data['mu']
|
||||||
M, [_, Q] = 20, mu.shape
|
M, [_, Q] = 30, mu.shape
|
||||||
|
Q = 2
|
||||||
|
|
||||||
from GPy.models import mrd
|
from GPy.models import mrd
|
||||||
from GPy import kern
|
from GPy import kern
|
||||||
reload(mrd); reload(kern)
|
reload(mrd); reload(kern)
|
||||||
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
|
k = kern.rbf(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
|
||||||
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k,
|
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k,
|
||||||
# X=mu,
|
# X=mu,
|
||||||
# X_variance=S,
|
# X_variance=S,
|
||||||
_debug=True)
|
_debug=True)
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
m.auto_scale_factor = True
|
m.auto_scale_factor = True
|
||||||
m['noise'] = .01 # Y.var() / 100.
|
m['noise'] = Y.var() / 100.
|
||||||
m['{}_variance'.format(k.parts[0].name)] = .01
|
|
||||||
|
lscstr = '{}'.format(k.parts[0].name)
|
||||||
|
# m[lscstr] = .01
|
||||||
|
m.unconstrain(lscstr); m.constrain_fixed(lscstr, 10)
|
||||||
|
|
||||||
|
lscstr = 'X_variance'
|
||||||
|
# m[lscstr] = .01
|
||||||
|
m.unconstrain(lscstr); m.constrain_fixed(lscstr, .1)
|
||||||
|
|
||||||
|
# cstr = 'white'
|
||||||
|
# m.unconstrain(cstr); m.constrain_bounded(cstr, .01, 1.)
|
||||||
|
|
||||||
|
# cstr = 'noise'
|
||||||
|
# m.unconstrain(cstr); m.constrain_bounded(cstr, .01, 1.)
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def bgplvm_simulation(burnin='scg', plot_sim=False,
|
def bgplvm_simulation(burnin='scg', plot_sim=False,
|
||||||
|
|
@ -385,7 +399,7 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True):
|
||||||
Y = data['Y']
|
Y = data['Y']
|
||||||
if in_place:
|
if in_place:
|
||||||
# Make figure move in place.
|
# Make figure move in place.
|
||||||
data['Y'][:, 0:3]=0.0
|
data['Y'][:, 0:3] = 0.0
|
||||||
m = GPy.models.GPLVM(data['Y'], 2, normalize_Y=True)
|
m = GPy.models.GPLVM(data['Y'], 2, normalize_Y=True)
|
||||||
|
|
||||||
# optimize
|
# optimize
|
||||||
|
|
|
||||||
|
|
@ -4,14 +4,14 @@ Created on 24 Apr 2013
|
||||||
@author: maxz
|
@author: maxz
|
||||||
'''
|
'''
|
||||||
from GPy.inference.gradient_descent_update_rules import FletcherReeves
|
from GPy.inference.gradient_descent_update_rules import FletcherReeves
|
||||||
import numpy
|
|
||||||
from multiprocessing import Value
|
|
||||||
from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2
|
|
||||||
from multiprocessing.synchronize import Event
|
|
||||||
from multiprocessing.queues import Queue
|
|
||||||
from Queue import Empty
|
from Queue import Empty
|
||||||
import sys
|
from multiprocessing import Value
|
||||||
|
from multiprocessing.queues import Queue
|
||||||
|
from multiprocessing.synchronize import Event
|
||||||
|
from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
import numpy
|
||||||
|
import sys
|
||||||
|
|
||||||
RUNNING = "running"
|
RUNNING = "running"
|
||||||
CONVERGED = "converged"
|
CONVERGED = "converged"
|
||||||
|
|
@ -20,10 +20,9 @@ MAX_F_EVAL = "maximum number of function calls reached"
|
||||||
LINE_SEARCH = "line search failed"
|
LINE_SEARCH = "line search failed"
|
||||||
KBINTERRUPT = "interrupted"
|
KBINTERRUPT = "interrupted"
|
||||||
|
|
||||||
SENTINEL = None
|
|
||||||
|
|
||||||
class _Async_Optimization(Thread):
|
class _Async_Optimization(Thread):
|
||||||
def __init__(self, f, df, x0, update_rule, runsignal,
|
|
||||||
|
def __init__(self, f, df, x0, update_rule, runsignal, SENTINEL,
|
||||||
report_every=10, messages=0, maxiter=5e3, max_f_eval=15e3,
|
report_every=10, messages=0, maxiter=5e3, max_f_eval=15e3,
|
||||||
gtol=1e-6, outqueue=None, *args, **kw):
|
gtol=1e-6, outqueue=None, *args, **kw):
|
||||||
"""
|
"""
|
||||||
|
|
@ -42,6 +41,7 @@ class _Async_Optimization(Thread):
|
||||||
self.maxiter = maxiter
|
self.maxiter = maxiter
|
||||||
self.max_f_eval = max_f_eval
|
self.max_f_eval = max_f_eval
|
||||||
self.gtol = gtol
|
self.gtol = gtol
|
||||||
|
self.SENTINEL = SENTINEL
|
||||||
self.runsignal = runsignal
|
self.runsignal = runsignal
|
||||||
# self.parent = parent
|
# self.parent = parent
|
||||||
# self.result = None
|
# self.result = None
|
||||||
|
|
@ -70,7 +70,7 @@ class _Async_Optimization(Thread):
|
||||||
|
|
||||||
def callback_return(self, *a):
|
def callback_return(self, *a):
|
||||||
self.callback(*a)
|
self.callback(*a)
|
||||||
self.outq.put(SENTINEL)
|
self.outq.put(self.SENTINEL)
|
||||||
self.runsignal.clear()
|
self.runsignal.clear()
|
||||||
|
|
||||||
def run(self, *args, **kwargs):
|
def run(self, *args, **kwargs):
|
||||||
|
|
@ -136,7 +136,7 @@ class _CGDAsync(_Async_Optimization):
|
||||||
if gfi is not None:
|
if gfi is not None:
|
||||||
gi = gfi
|
gi = gfi
|
||||||
|
|
||||||
if fi_old > fi:
|
if numpy.isnan(fi) or fi_old < fi:
|
||||||
gi, ur, si = self.reset(xi, *a, **kw)
|
gi, ur, si = self.reset(xi, *a, **kw)
|
||||||
else:
|
else:
|
||||||
xi += numpy.dot(alphai, si)
|
xi += numpy.dot(alphai, si)
|
||||||
|
|
@ -145,22 +145,23 @@ class _CGDAsync(_Async_Optimization):
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
sys.stdout.write("iteration: {0:> 6g} f:{1:> 12e} |g|:{2:> 12e}".format(it, fi, numpy.dot(gi.T, gi)))
|
sys.stdout.write("iteration: {0:> 6g} f:{1:> 12e} |g|:{2:> 12e}".format(it, fi, numpy.dot(gi.T, gi)))
|
||||||
|
|
||||||
if it % self.report_every == 0:
|
if it % self.report_every == 0:
|
||||||
self.callback(xi, fi, it, self.f_call.value, self.df_call.value, status)
|
self.callback(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
|
||||||
it += 1
|
it += 1
|
||||||
else:
|
else:
|
||||||
status = MAXITER
|
status = MAXITER
|
||||||
# self.result = [xi, fi, it, self.f_call.value, self.df_call.value, status]
|
self.callback_return(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
|
||||||
self.callback_return(xi, fi, it, self.f_call.value, self.df_call.value, status)
|
self.result = [xi, fi, gi, it, self.f_call.value, self.df_call.value, status]
|
||||||
|
|
||||||
class Async_Optimize(object):
|
class Async_Optimize(object):
|
||||||
callback = lambda *x: None
|
callback = lambda *x: None
|
||||||
runsignal = Event()
|
runsignal = Event()
|
||||||
|
SENTINEL = "SENTINEL"
|
||||||
|
|
||||||
def async_callback_collect(self, q):
|
def async_callback_collect(self, q):
|
||||||
while self.runsignal.is_set():
|
while self.runsignal.is_set():
|
||||||
try:
|
try:
|
||||||
for ret in iter(lambda: q.get(timeout=1), SENTINEL):
|
for ret in iter(lambda: q.get(timeout=1), self.SENTINEL):
|
||||||
self.callback(*ret)
|
self.callback(*ret)
|
||||||
except Empty:
|
except Empty:
|
||||||
pass
|
pass
|
||||||
|
|
@ -169,12 +170,12 @@ class Async_Optimize(object):
|
||||||
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
|
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
|
||||||
report_every=10, *args, **kwargs):
|
report_every=10, *args, **kwargs):
|
||||||
self.runsignal.set()
|
self.runsignal.set()
|
||||||
outqueue = Queue(5)
|
outqueue = Queue()
|
||||||
if callback:
|
if callback:
|
||||||
self.callback = callback
|
self.callback = callback
|
||||||
c = Thread(target=self.async_callback_collect, args=(outqueue,))
|
c = Thread(target=self.async_callback_collect, args=(outqueue,))
|
||||||
c.start()
|
c.start()
|
||||||
p = _CGDAsync(f, df, x0, update_rule, self.runsignal,
|
p = _CGDAsync(f, df, x0, update_rule, self.runsignal, self.SENTINEL,
|
||||||
report_every=report_every, messages=messages, maxiter=maxiter,
|
report_every=report_every, messages=messages, maxiter=maxiter,
|
||||||
max_f_eval=max_f_eval, gtol=gtol, outqueue=outqueue, *args, **kwargs)
|
max_f_eval=max_f_eval, gtol=gtol, outqueue=outqueue, *args, **kwargs)
|
||||||
p.run()
|
p.run()
|
||||||
|
|
@ -189,12 +190,14 @@ class Async_Optimize(object):
|
||||||
while self.runsignal.is_set():
|
while self.runsignal.is_set():
|
||||||
try:
|
try:
|
||||||
p.join(1)
|
p.join(1)
|
||||||
c.join(1)
|
# c.join(1)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
# print "^C"
|
# print "^C"
|
||||||
self.runsignal.clear()
|
self.runsignal.clear()
|
||||||
p.join()
|
p.join()
|
||||||
c.join()
|
if c.is_alive():
|
||||||
|
print "WARNING: callback still running, optimisation done!"
|
||||||
|
return p.result
|
||||||
|
|
||||||
class CGD(Async_Optimize):
|
class CGD(Async_Optimize):
|
||||||
'''
|
'''
|
||||||
|
|
@ -215,7 +218,7 @@ class CGD(Async_Optimize):
|
||||||
|
|
||||||
callback gets called every `report_every` iterations
|
callback gets called every `report_every` iterations
|
||||||
|
|
||||||
callback(xi, fi, iteration, function_calls, gradient_calls, status_message)
|
callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
|
||||||
|
|
||||||
if df returns tuple (grad, natgrad) it will optimize according
|
if df returns tuple (grad, natgrad) it will optimize according
|
||||||
to natural gradient rules
|
to natural gradient rules
|
||||||
|
|
@ -233,7 +236,7 @@ class CGD(Async_Optimize):
|
||||||
**calls**
|
**calls**
|
||||||
---------
|
---------
|
||||||
|
|
||||||
callback(x_opt, f_opt, iteration, function_calls, gradient_calls, status_message)
|
callback(x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message)
|
||||||
|
|
||||||
at end of optimization!
|
at end of optimization!
|
||||||
"""
|
"""
|
||||||
|
|
@ -247,7 +250,7 @@ class CGD(Async_Optimize):
|
||||||
|
|
||||||
Minimize f, calling callback every `report_every` iterations with following syntax:
|
Minimize f, calling callback every `report_every` iterations with following syntax:
|
||||||
|
|
||||||
callback(xi, fi, iteration, function_calls, gradient_calls, status_message)
|
callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
|
||||||
|
|
||||||
if df returns tuple (grad, natgrad) it will optimize according
|
if df returns tuple (grad, natgrad) it will optimize according
|
||||||
to natural gradient rules
|
to natural gradient rules
|
||||||
|
|
@ -260,7 +263,7 @@ class CGD(Async_Optimize):
|
||||||
**returns**
|
**returns**
|
||||||
---------
|
---------
|
||||||
|
|
||||||
x_opt, f_opt, iteration, function_calls, gradient_calls, status_message
|
x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message
|
||||||
|
|
||||||
at end of optimization
|
at end of optimization
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@
|
||||||
from kernpart import kernpart
|
from kernpart import kernpart
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from ..util.linalg import tdot
|
from ..util.linalg import tdot
|
||||||
|
from GPy.util.linalg import mdot
|
||||||
|
|
||||||
class linear(kernpart):
|
class linear(kernpart):
|
||||||
"""
|
"""
|
||||||
|
|
@ -23,7 +24,7 @@ class linear(kernpart):
|
||||||
:rtype: kernel object
|
:rtype: kernel object
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,D,variances=None,ARD=False):
|
def __init__(self, D, variances=None, ARD=False):
|
||||||
self.D = D
|
self.D = D
|
||||||
self.ARD = ARD
|
self.ARD = ARD
|
||||||
if ARD == False:
|
if ARD == False:
|
||||||
|
|
@ -45,15 +46,15 @@ class linear(kernpart):
|
||||||
variances = np.ones(self.D)
|
variances = np.ones(self.D)
|
||||||
self._set_params(variances.flatten())
|
self._set_params(variances.flatten())
|
||||||
|
|
||||||
#initialize cache
|
# initialize cache
|
||||||
self._Z, self._mu, self._S = np.empty(shape=(3,1))
|
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
|
||||||
self._X, self._X2, self._params = np.empty(shape=(3,1))
|
self._X, self._X2, self._params = np.empty(shape=(3, 1))
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
return self.variances
|
return self.variances
|
||||||
|
|
||||||
def _set_params(self,x):
|
def _set_params(self, x):
|
||||||
assert x.size==(self.Nparam)
|
assert x.size == (self.Nparam)
|
||||||
self.variances = x
|
self.variances = x
|
||||||
self.variances2 = np.square(self.variances)
|
self.variances2 = np.square(self.variances)
|
||||||
|
|
||||||
|
|
@ -61,115 +62,136 @@ class linear(kernpart):
|
||||||
if self.Nparam == 1:
|
if self.Nparam == 1:
|
||||||
return ['variance']
|
return ['variance']
|
||||||
else:
|
else:
|
||||||
return ['variance_%i'%i for i in range(self.variances.size)]
|
return ['variance_%i' % i for i in range(self.variances.size)]
|
||||||
|
|
||||||
def K(self,X,X2,target):
|
def K(self, X, X2, target):
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
XX = X*np.sqrt(self.variances)
|
XX = X * np.sqrt(self.variances)
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
target += tdot(XX)
|
target += tdot(XX)
|
||||||
else:
|
else:
|
||||||
XX2 = X2*np.sqrt(self.variances)
|
XX2 = X2 * np.sqrt(self.variances)
|
||||||
target += np.dot(XX, XX2.T)
|
target += np.dot(XX, XX2.T)
|
||||||
else:
|
else:
|
||||||
self._K_computations(X, X2)
|
self._K_computations(X, X2)
|
||||||
target += self.variances * self._dot_product
|
target += self.variances * self._dot_product
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
def Kdiag(self, X, target):
|
||||||
np.add(target,np.sum(self.variances*np.square(X),-1),target)
|
np.add(target, np.sum(self.variances * np.square(X), -1), target)
|
||||||
|
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
[np.add(target[i:i+1],np.sum(dL_dK*tdot(X[:,i:i+1])),target[i:i+1]) for i in range(self.D)]
|
[np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.D)]
|
||||||
else:
|
else:
|
||||||
product = X[:,None,:]*X2[None,:,:]
|
product = X[:, None, :] * X2[None, :, :]
|
||||||
target += (dL_dK[:,:,None]*product).sum(0).sum(0)
|
target += (dL_dK[:, :, None] * product).sum(0).sum(0)
|
||||||
else:
|
else:
|
||||||
self._K_computations(X, X2)
|
self._K_computations(X, X2)
|
||||||
target += np.sum(self._dot_product*dL_dK)
|
target += np.sum(self._dot_product * dL_dK)
|
||||||
|
|
||||||
def dKdiag_dtheta(self,dL_dKdiag, X, target):
|
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
||||||
tmp = dL_dKdiag[:,None]*X**2
|
tmp = dL_dKdiag[:, None] * X ** 2
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
target += tmp.sum(0)
|
target += tmp.sum(0)
|
||||||
else:
|
else:
|
||||||
target += tmp.sum()
|
target += tmp.sum()
|
||||||
|
|
||||||
def dK_dX(self,dL_dK,X,X2,target):
|
def dK_dX(self, dL_dK, X, X2, target):
|
||||||
target += (((X2[:, None, :] * self.variances)) * dL_dK[:,:, None]).sum(0)
|
target += (((X2[:, None, :] * self.variances)) * dL_dK[:, :, None]).sum(0)
|
||||||
|
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
# PSI statistics #
|
# PSI statistics #
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
|
|
||||||
def psi0(self,Z,mu,S,target):
|
def psi0(self, Z, mu, S, target):
|
||||||
self._psi_computations(Z,mu,S)
|
self._psi_computations(Z, mu, S)
|
||||||
target += np.sum(self.variances*self.mu2_S,1)
|
target += np.sum(self.variances * self.mu2_S, 1)
|
||||||
|
|
||||||
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
|
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
|
||||||
self._psi_computations(Z,mu,S)
|
self._psi_computations(Z, mu, S)
|
||||||
tmp = dL_dpsi0[:, None] * self.mu2_S
|
tmp = dL_dpsi0[:, None] * self.mu2_S
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
target += tmp.sum(0)
|
target += tmp.sum(0)
|
||||||
else:
|
else:
|
||||||
target += tmp.sum()
|
target += tmp.sum()
|
||||||
|
|
||||||
def dpsi0_dmuS(self,dL_dpsi0, Z,mu,S,target_mu,target_S):
|
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
|
||||||
target_mu += dL_dpsi0[:, None] * (2.0*mu*self.variances)
|
target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances)
|
||||||
target_S += dL_dpsi0[:, None] * self.variances
|
target_S += dL_dpsi0[:, None] * self.variances
|
||||||
|
|
||||||
def psi1(self,Z,mu,S,target):
|
def psi1(self, Z, mu, S, target):
|
||||||
"""the variance, it does nothing"""
|
"""the variance, it does nothing"""
|
||||||
self._psi1 = self.K(mu, Z, target)
|
self._psi1 = self.K(mu, Z, target)
|
||||||
|
|
||||||
def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target):
|
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
|
||||||
"""the variance, it does nothing"""
|
"""the variance, it does nothing"""
|
||||||
self.dK_dtheta(dL_dpsi1,mu,Z,target)
|
self.dK_dtheta(dL_dpsi1, mu, Z, target)
|
||||||
|
|
||||||
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
|
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
|
||||||
"""Do nothing for S, it does not affect psi1"""
|
"""Do nothing for S, it does not affect psi1"""
|
||||||
self._psi_computations(Z,mu,S)
|
self._psi_computations(Z, mu, S)
|
||||||
target_mu += (dL_dpsi1.T[:,:, None]*(Z*self.variances)).sum(1)
|
target_mu += (dL_dpsi1.T[:, :, None] * (Z * self.variances)).sum(1)
|
||||||
|
|
||||||
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
|
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
|
||||||
self.dK_dX(dL_dpsi1.T,Z,mu,target)
|
self.dK_dX(dL_dpsi1.T, Z, mu, target)
|
||||||
|
|
||||||
def psi2(self,Z,mu,S,target):
|
def psi2(self, Z, mu, S, target):
|
||||||
"""
|
"""
|
||||||
returns N,M,M matrix
|
returns N,M,M matrix
|
||||||
"""
|
"""
|
||||||
self._psi_computations(Z,mu,S)
|
self._psi_computations(Z, mu, S)
|
||||||
#psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :]
|
# psi2_old = self.ZZ * np.square(self.variances) * self.mu2_S[:, None, None, :]
|
||||||
#target += psi2.sum(-1)
|
# target += psi2.sum(-1)
|
||||||
target += np.tensordot(self.ZZ[None,:,:,:]*np.square(self.variances),self.mu2_S[:, None, None, :],((3),(3))).squeeze().T
|
# slow way of doing it, but right
|
||||||
|
psi2_real = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
|
||||||
|
for n in range(mu.shape[0]):
|
||||||
|
for m_prime in range(Z.shape[0]):
|
||||||
|
for m in range(Z.shape[0]):
|
||||||
|
tmp = self._Z[m:m + 1] * self.variances
|
||||||
|
tmp = np.dot(tmp, (tdot(self._mu[n:n + 1].T) + np.diag(S[n:n + 1])))
|
||||||
|
psi2_real[n, m, m_prime] = np.dot(tmp, (
|
||||||
|
self._Z[m_prime:m_prime + 1] * self.variances).T)
|
||||||
|
|
||||||
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
|
psi2_inner = mdot(self.ZA, self.inner, self.ZA.T)
|
||||||
self._psi_computations(Z,mu,S)
|
mu2_S = (self._mu[:, None] * self._mu[:, :, None]) + self._S[:, :, None]
|
||||||
tmp = (dL_dpsi2[:,:,:,None]*(2.*self.ZZ*self.mu2_S[:,None,None,:]*self.variances))
|
psi2 = (self.ZA[None, :, None, :] * mu2_S[:, None]).sum(-1)
|
||||||
|
psi2 = (psi2[:, :, None] * self.ZA[None, None]).sum(-1)
|
||||||
|
# psi2_tensor = np.tensordot(self.ZZ[None, :, :, :] * np.square(self.variances), self.mu2_S[:, None, None, :], ((3), (3))).squeeze().T
|
||||||
|
# import ipdb;ipdb.set_trace()
|
||||||
|
target += psi2_real
|
||||||
|
|
||||||
|
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
|
||||||
|
self._psi_computations(Z, mu, S)
|
||||||
|
tmp = (dL_dpsi2[:, :, :, None] * (2.*self.ZZ * self.mu2_S[:, None, None, :] * self.variances))
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
target += tmp.sum(0).sum(0).sum(0)
|
target += tmp.sum(0).sum(0).sum(0)
|
||||||
else:
|
else:
|
||||||
target += tmp.sum()
|
target += tmp.sum()
|
||||||
|
|
||||||
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
|
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
|
||||||
"""Think N,M,M,Q """
|
"""Think N,M,M,Q """
|
||||||
self._psi_computations(Z,mu,S)
|
self._psi_computations(Z, mu, S)
|
||||||
tmp = self.ZZ*np.square(self.variances) # M,M,Q
|
tmp = self.ZZ * np.square(self.variances) # M,M,Q
|
||||||
target_mu += (dL_dpsi2[:,:,:,None]*tmp*2.*mu[:,None,None,:]).sum(1).sum(1)
|
# import ipdb;ipdb.set_trace()
|
||||||
target_S += (dL_dpsi2[:,:,:,None]*tmp).sum(1).sum(1)
|
target_mu += (dL_dpsi2[:, :, :, None] * tmp * 2.*mu[:, None, None, :]).sum(1).sum(1)
|
||||||
|
target_S += (dL_dpsi2[:, :, :, None] * tmp).sum(1).sum(1) * S.shape[0]
|
||||||
|
|
||||||
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
|
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
|
||||||
self._psi_computations(Z,mu,S)
|
self._psi_computations(Z, mu, S)
|
||||||
mu2_S = np.sum(self.mu2_S,0)# Q,
|
# mu2_S = np.sum(self.mu2_S, 0) # Q,
|
||||||
target += (dL_dpsi2[:,:,:,None] * (self.mu2_S[:,None,None,:]*(Z*np.square(self.variances)[None,:])[None,None,:,:])).sum(0).sum(1)
|
# import ipdb;ipdb.set_trace()
|
||||||
#TODO: tensordot would gain some time here
|
# prod = (np.eye(Z.shape[0])[:, None, :, None] * (np.dot(self.ZA, self.inner) * self.variances)[None, :, None])
|
||||||
|
# psi2_dZ = prod.swapaxes(0, 1) + prod
|
||||||
|
psi2_dZ_old = (dL_dpsi2[:, :, :, None] * (self.mu2_S[:, None, None, :] * (Z * np.square(self.variances)[None, :])[None, None, :, :])).sum(0).sum(1)
|
||||||
|
target += psi2_dZ_old # .sum(0).sum(1)
|
||||||
|
# TODO: tensordot would gain some time here
|
||||||
|
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
# Precomputations #
|
# Precomputations #
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
|
|
||||||
def _K_computations(self,X,X2):
|
def _K_computations(self, X, X2):
|
||||||
if not (np.array_equal(X, self._Xcache) and np.array_equal(X2, self._X2cache)):
|
if not (np.array_equal(X, self._Xcache) and np.array_equal(X2, self._X2cache)):
|
||||||
self._Xcache = X.copy()
|
self._Xcache = X.copy()
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
|
|
@ -177,16 +199,18 @@ class linear(kernpart):
|
||||||
self._X2cache = None
|
self._X2cache = None
|
||||||
else:
|
else:
|
||||||
self._X2cache = X2.copy()
|
self._X2cache = X2.copy()
|
||||||
self._dot_product = np.dot(X,X2.T)
|
self._dot_product = np.dot(X, X2.T)
|
||||||
|
|
||||||
def _psi_computations(self,Z,mu,S):
|
def _psi_computations(self, Z, mu, S):
|
||||||
#here are the "statistics" for psi1 and psi2
|
# here are the "statistics" for psi1 and psi2
|
||||||
if not np.all(Z==self._Z):
|
if not np.all(Z == self._Z):
|
||||||
#Z has changed, compute Z specific stuff
|
# Z has changed, compute Z specific stuff
|
||||||
#self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
|
# self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
|
||||||
self.ZZ = np.empty((Z.shape[0],Z.shape[0],Z.shape[1]),order='F')
|
self.ZZ = np.empty((Z.shape[0], Z.shape[0], Z.shape[1]), order='F')
|
||||||
[tdot(Z[:,i:i+1],self.ZZ[:,:,i].T) for i in xrange(Z.shape[1])]
|
[tdot(Z[:, i:i + 1], self.ZZ[:, :, i].T) for i in xrange(Z.shape[1])]
|
||||||
self._Z = Z.copy()
|
self._Z = Z.copy()
|
||||||
if not (np.all(mu==self._mu) and np.all(S==self._S)):
|
self.ZA = Z * self.variances
|
||||||
self.mu2_S = np.square(mu)+S
|
if not (np.all(mu == self._mu) and np.all(S == self._S)):
|
||||||
|
self.mu2_S = np.square(mu) + S
|
||||||
|
self.inner = tdot(mu.T) + (np.diag(S.sum(0)))
|
||||||
self._mu, self._S = mu.copy(), S.copy()
|
self._mu, self._S = mu.copy(), S.copy()
|
||||||
|
|
|
||||||
|
|
@ -308,6 +308,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
Slatentgrads = ax3.quiver(xlatent, S, Ulatent, Sg, color=colors,
|
Slatentgrads = ax3.quiver(xlatent, S, Ulatent, Sg, color=colors,
|
||||||
units=quiver_units, scale_units=quiver_scale_units,
|
units=quiver_units, scale_units=quiver_scale_units,
|
||||||
scale=quiver_scale)
|
scale=quiver_scale)
|
||||||
|
ax3.set_ylim(0, 1.)
|
||||||
|
|
||||||
xZ = np.tile(np.arange(0, Z.shape[0])[:, None], Z.shape[1])
|
xZ = np.tile(np.arange(0, Z.shape[0])[:, None], Z.shape[1])
|
||||||
UZ = np.zeros_like(Z)
|
UZ = np.zeros_like(Z)
|
||||||
|
|
@ -427,11 +428,11 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
cbarkmmdl.update_normal(imkmmdl)
|
cbarkmmdl.update_normal(imkmmdl)
|
||||||
|
|
||||||
ax2.relim()
|
ax2.relim()
|
||||||
ax3.relim()
|
# ax3.relim()
|
||||||
ax4.relim()
|
ax4.relim()
|
||||||
ax5.relim()
|
ax5.relim()
|
||||||
ax2.autoscale()
|
ax2.autoscale()
|
||||||
ax3.autoscale()
|
# ax3.autoscale()
|
||||||
ax4.autoscale()
|
ax4.autoscale()
|
||||||
ax5.autoscale()
|
ax5.autoscale()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,22 +30,22 @@ class sparse_GP(GP):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
|
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
|
||||||
self.scale_factor = 100.0 # a scaling factor to help keep the algorithm stable
|
self.scale_factor = 100.0# a scaling factor to help keep the algorithm stable
|
||||||
self.auto_scale_factor = False
|
self.auto_scale_factor = False
|
||||||
self.Z = Z
|
self.Z = Z
|
||||||
self.M = Z.shape[0]
|
self.M = Z.shape[0]
|
||||||
self.likelihood = likelihood
|
self.likelihood = likelihood
|
||||||
|
|
||||||
if X_variance is None:
|
if X_variance is None:
|
||||||
self.has_uncertain_inputs = False
|
self.has_uncertain_inputs=False
|
||||||
else:
|
else:
|
||||||
assert X_variance.shape == X.shape
|
assert X_variance.shape==X.shape
|
||||||
self.has_uncertain_inputs = True
|
self.has_uncertain_inputs=True
|
||||||
self.X_variance = X_variance
|
self.X_variance = X_variance
|
||||||
|
|
||||||
GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X)
|
GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X)
|
||||||
|
|
||||||
# normalize X uncertainty also
|
#normalize X uncertainty also
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
self.X_variance /= np.square(self._Xstd)
|
self.X_variance /= np.square(self._Xstd)
|
||||||
|
|
||||||
|
|
@ -54,155 +54,156 @@ class sparse_GP(GP):
|
||||||
# kernel computations, using BGPLVM notation
|
# kernel computations, using BGPLVM notation
|
||||||
self.Kmm = self.kern.K(self.Z)
|
self.Kmm = self.kern.K(self.Z)
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
self.psi0 = self.kern.psi0(self.Z, self.X, self.X_variance)
|
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_variance)
|
||||||
self.psi1 = self.kern.psi1(self.Z, self.X, self.X_variance).T
|
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_variance).T
|
||||||
self.psi2 = self.kern.psi2(self.Z, self.X, self.X_variance)
|
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_variance)
|
||||||
else:
|
else:
|
||||||
self.psi0 = self.kern.Kdiag(self.X)
|
self.psi0 = self.kern.Kdiag(self.X)
|
||||||
self.psi1 = self.kern.K(self.Z, self.X)
|
self.psi1 = self.kern.K(self.Z,self.X)
|
||||||
self.psi2 = None
|
self.psi2 = None
|
||||||
|
|
||||||
def _computations(self):
|
def _computations(self):
|
||||||
# TODO: find routine to multiply triangular matrices
|
#TODO: find routine to multiply triangular matrices
|
||||||
|
|
||||||
sf = self.scale_factor
|
sf = self.scale_factor
|
||||||
sf2 = sf ** 2
|
sf2 = sf**2
|
||||||
|
|
||||||
# The rather complex computations of psi2_beta_scaled
|
#The rather complex computations of psi2_beta_scaled
|
||||||
if self.likelihood.is_heteroscedastic:
|
if self.likelihood.is_heteroscedastic:
|
||||||
assert self.likelihood.D == 1 # TODO: what if the likelihood is heterscedatic and there are multiple independent outputs?
|
assert self.likelihood.D == 1 #TODO: what if the likelihood is heterscedatic and there are multiple independent outputs?
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
self.psi2_beta_scaled = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1) / sf2)).sum(0)
|
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0)
|
||||||
else:
|
else:
|
||||||
tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N)) / sf)
|
tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf)
|
||||||
# self.psi2_beta_scaled = np.dot(tmp,tmp.T)
|
#self.psi2_beta_scaled = np.dot(tmp,tmp.T)
|
||||||
self.psi2_beta_scaled = tdot(tmp)
|
self.psi2_beta_scaled = tdot(tmp)
|
||||||
else:
|
else:
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
self.psi2_beta_scaled = (self.psi2 * (self.likelihood.precision / sf2)).sum(0)
|
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0)
|
||||||
else:
|
else:
|
||||||
tmp = self.psi1 * (np.sqrt(self.likelihood.precision) / sf)
|
tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
|
||||||
# self.psi2_beta_scaled = np.dot(tmp,tmp.T)
|
#self.psi2_beta_scaled = np.dot(tmp,tmp.T)
|
||||||
self.psi2_beta_scaled = tdot(tmp)
|
self.psi2_beta_scaled = tdot(tmp)
|
||||||
|
|
||||||
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
|
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
|
||||||
|
|
||||||
self.V = (self.likelihood.precision / self.scale_factor) * self.likelihood.Y
|
self.V = (self.likelihood.precision/self.scale_factor)*self.likelihood.Y
|
||||||
|
|
||||||
# Compute A = L^-1 psi2 beta L^-T
|
#Compute A = L^-1 psi2 beta L^-T
|
||||||
# self. A = mdot(self.Lmi,self.psi2_beta_scaled,self.Lmi.T)
|
#self. A = mdot(self.Lmi,self.psi2_beta_scaled,self.Lmi.T)
|
||||||
tmp = linalg.lapack.flapack.dtrtrs(self.Lm, self.psi2_beta_scaled.T, lower=1)[0]
|
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,self.psi2_beta_scaled.T,lower=1)[0]
|
||||||
self.A = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp.T), lower=1)[0]
|
self.A = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1)[0]
|
||||||
|
|
||||||
self.B = np.eye(self.M) / sf2 + self.A
|
self.B = np.eye(self.M)/sf2 + self.A
|
||||||
|
|
||||||
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
|
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
|
||||||
|
|
||||||
self.psi1V = np.dot(self.psi1, self.V)
|
self.psi1V = np.dot(self.psi1, self.V)
|
||||||
tmp = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.Bi), lower=1, trans=1)[0]
|
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.Bi),lower=1,trans=1)[0]
|
||||||
self.C = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp.T), lower=1, trans=1)[0]
|
self.C = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0]
|
||||||
|
|
||||||
# self.Cpsi1V = np.dot(self.C,self.psi1V)
|
#back substutue C into psi1V
|
||||||
# back substitute C into psi1V
|
tmp,info1 = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.psi1V),lower=1,trans=0)
|
||||||
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
|
tmp,info2 = linalg.lapack.flapack.dpotrs(self.LB,tmp,lower=1)
|
||||||
tmp, _ = linalg.lapack.flapack.dpotrs(self.LB, tmp, lower=1)
|
self.Cpsi1V,info3 = linalg.lapack.flapack.dtrtrs(self.Lm,tmp,lower=1,trans=1)
|
||||||
self.Cpsi1V, _ = linalg.lapack.flapack.dtrtrs(self.Lm, tmp, lower=1, trans=1)
|
#self.Cpsi1V = np.dot(self.C,self.psi1V)
|
||||||
|
|
||||||
self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V, self.psi1V.T) # TODO: stabilize?
|
self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
|
||||||
self.E = tdot(self.Cpsi1V / sf)
|
|
||||||
|
self.E = tdot(self.Cpsi1V/sf)
|
||||||
|
|
||||||
|
|
||||||
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
|
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
|
||||||
self.dL_dpsi0 = -0.5 * self.D * (self.likelihood.precision * np.ones([self.N, 1])).flatten()
|
self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
|
||||||
self.dL_dpsi1 = np.dot(self.Cpsi1V, self.V.T)
|
self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T)
|
||||||
if self.likelihood.is_heteroscedastic:
|
if self.likelihood.is_heteroscedastic:
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
# self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
|
#self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
|
||||||
# self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
|
#self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
|
||||||
# self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
|
#self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
|
||||||
self.dL_dpsi2 = 0.5 * self.likelihood.precision[:, None, None] * (self.D * (self.Kmmi - self.C / sf2) - self.E)[None, :, :]
|
self.dL_dpsi2 = 0.5*self.likelihood.precision[:,None,None]*(self.D*(self.Kmmi - self.C/sf2) -self.E)[None,:,:]
|
||||||
else:
|
else:
|
||||||
# self.dL_dpsi1 += mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
|
#self.dL_dpsi1 += mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
|
||||||
# self.dL_dpsi1 += -mdot(self.C,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)/sf2) #dC
|
#self.dL_dpsi1 += -mdot(self.C,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)/sf2) #dC
|
||||||
# self.dL_dpsi1 += -mdot(self.E,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dD
|
#self.dL_dpsi1 += -mdot(self.E,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dD
|
||||||
self.dL_dpsi1 += np.dot(self.Kmmi - self.C / sf2 - self.E, self.psi1 * self.likelihood.precision.reshape(1, self.N))
|
self.dL_dpsi1 += np.dot(self.Kmmi - self.C/sf2 -self.E,self.psi1*self.likelihood.precision.reshape(1,self.N))
|
||||||
self.dL_dpsi2 = None
|
self.dL_dpsi2 = None
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB
|
#self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB
|
||||||
# self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C # dC
|
#self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C # dC
|
||||||
# self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E # dD
|
#self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E # dD
|
||||||
self.dL_dpsi2 = 0.5 * self.likelihood.precision * (self.D * (self.Kmmi - self.C / sf2) - self.E)
|
self.dL_dpsi2 = 0.5*self.likelihood.precision*(self.D*(self.Kmmi - self.C/sf2) -self.E)
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
# repeat for each of the N psi_2 matrices
|
#repeat for each of the N psi_2 matrices
|
||||||
self.dL_dpsi2 = np.repeat(self.dL_dpsi2[None, :, :], self.N, axis=0)
|
self.dL_dpsi2 = np.repeat(self.dL_dpsi2[None,:,:],self.N,axis=0)
|
||||||
else:
|
else:
|
||||||
self.dL_dpsi1 += 2.*np.dot(self.dL_dpsi2, self.psi1)
|
self.dL_dpsi1 += 2.*np.dot(self.dL_dpsi2,self.psi1)
|
||||||
self.dL_dpsi2 = None
|
self.dL_dpsi2 = None
|
||||||
|
|
||||||
|
|
||||||
# Compute dL_dKmm
|
# Compute dL_dKmm
|
||||||
# self.dL_dKmm_old = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
|
#self.dL_dKmm_old = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
|
||||||
# self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
|
#self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
|
||||||
# self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
|
#self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
|
||||||
tmp = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.B), lower=1, trans=1)[0]
|
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.B),lower=1,trans=1)[0]
|
||||||
self.dL_dKmm = -0.5 * self.D * sf2 * linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp.T), lower=1, trans=1)[0] # dA
|
self.dL_dKmm = -0.5*self.D*sf2*linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0] #dA
|
||||||
tmp = np.dot(self.D * self.C + self.E * sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1
|
tmp = np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1
|
||||||
tmp = linalg.lapack.flapack.dpotrs(self.Lm, np.asfortranarray(tmp.T), lower=1)[0].T
|
tmp = linalg.lapack.flapack.dpotrs(self.Lm,np.asfortranarray(tmp.T),lower=1)[0].T
|
||||||
self.dL_dKmm += 0.5 * (self.D * self.C / sf2 + self.E) + tmp # d(C+D)
|
self.dL_dKmm += 0.5*(self.D*self.C/sf2 + self.E) +tmp # d(C+D)
|
||||||
|
|
||||||
# the partial derivative vector for the likelihood
|
#the partial derivative vector for the likelihood
|
||||||
if self.likelihood.Nparams == 0:
|
if self.likelihood.Nparams ==0:
|
||||||
# save computation here.
|
#save computation here.
|
||||||
self.partial_for_likelihood = None
|
self.partial_for_likelihood = None
|
||||||
elif self.likelihood.is_heteroscedastic:
|
elif self.likelihood.is_heteroscedastic:
|
||||||
raise NotImplementedError, "heteroscedatic derivates not implemented"
|
raise NotImplementedError, "heteroscedatic derivates not implemented"
|
||||||
# self.partial_for_likelihood = - 0.5 * self.D*self.likelihood.precision + 0.5 * (self.likelihood.Y**2).sum(1)*self.likelihood.precision**2 #dA
|
#self.partial_for_likelihood = - 0.5 * self.D*self.likelihood.precision + 0.5 * (self.likelihood.Y**2).sum(1)*self.likelihood.precision**2 #dA
|
||||||
# self.partial_for_likelihood += 0.5 * self.D * (self.psi0*self.likelihood.precision**2 - (self.psi2*self.Kmmi[None,:,:]*self.likelihood.precision[:,None,None]**2).sum(1).sum(1)/sf2) #dB
|
#self.partial_for_likelihood += 0.5 * self.D * (self.psi0*self.likelihood.precision**2 - (self.psi2*self.Kmmi[None,:,:]*self.likelihood.precision[:,None,None]**2).sum(1).sum(1)/sf2) #dB
|
||||||
# self.partial_for_likelihood += 0.5 * self.D * np.sum(self.Bi*self.A)*self.likelihood.precision #dC
|
#self.partial_for_likelihood += 0.5 * self.D * np.sum(self.Bi*self.A)*self.likelihood.precision #dC
|
||||||
# self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD
|
#self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD
|
||||||
else:
|
else:
|
||||||
# likelihood is not heterscedatic
|
#likelihood is not heterscedatic
|
||||||
self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
|
self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * self.likelihood.trYYT*self.likelihood.precision**2
|
||||||
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision * sf2)
|
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2)
|
||||||
self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi, self.A) * self.likelihood.precision
|
self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision
|
||||||
self.partial_for_likelihood += self.likelihood.precision * (0.5 * trace_dot(self.psi2_beta_scaled, self.E * sf2) - np.trace(self.Cpsi1VVpsi1))
|
self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
""" Compute the (lower bound on the) log marginal likelihood """
|
""" Compute the (lower bound on the) log marginal likelihood """
|
||||||
sf2 = self.scale_factor ** 2
|
sf2 = self.scale_factor**2
|
||||||
if self.likelihood.is_heteroscedastic:
|
if self.likelihood.is_heteroscedastic:
|
||||||
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y)
|
A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
|
||||||
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
|
B = -0.5*self.D*(np.sum(self.likelihood.precision.flatten()*self.psi0) - np.trace(self.A)*sf2)
|
||||||
else:
|
else:
|
||||||
A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
|
A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT
|
||||||
B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
|
B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2)
|
||||||
C = -0.5 * self.D * (self.B_logdet + self.M * np.log(sf2))
|
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
|
||||||
D = 0.5 * np.trace(self.Cpsi1VVpsi1)
|
D = 0.5*np.trace(self.Cpsi1VVpsi1)
|
||||||
return A + B + C + D
|
return A+B+C+D
|
||||||
|
|
||||||
def _set_params(self, p):
|
def _set_params(self, p):
|
||||||
self.Z = p[:self.M * self.Q].reshape(self.M, self.Q)
|
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
|
||||||
self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam])
|
self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
|
||||||
self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:])
|
self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
|
||||||
self._compute_kernel_matrices()
|
self._compute_kernel_matrices()
|
||||||
if self.auto_scale_factor:
|
if self.auto_scale_factor:
|
||||||
self.scale_factor = np.sqrt(self.psi2.sum(0).mean() * self.likelihood.precision)
|
self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
|
||||||
# if self.auto_scale_factor:
|
#if self.auto_scale_factor:
|
||||||
# if self.likelihood.is_heteroscedastic:
|
# if self.likelihood.is_heteroscedastic:
|
||||||
# self.scale_factor = max(1,np.sqrt(self.psi2_beta_scaled.sum(0).mean()))
|
# self.scale_factor = max(1,np.sqrt(self.psi2_beta_scaled.sum(0).mean()))
|
||||||
# else:
|
# else:
|
||||||
# self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
|
# self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
|
||||||
# self.scale_factor = 1.
|
#self.scale_factor = 1.
|
||||||
self._computations()
|
self._computations()
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
return np.hstack([self.Z.flatten(), GP._get_params(self)])
|
return np.hstack([self.Z.flatten(),GP._get_params(self)])
|
||||||
|
|
||||||
def _get_param_names(self):
|
def _get_param_names(self):
|
||||||
return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])], []) + GP._get_param_names(self)
|
return sum([['iip_%i_%i'%(i,j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])],[]) + GP._get_param_names(self)
|
||||||
|
|
||||||
def update_likelihood_approximation(self):
|
def update_likelihood_approximation(self):
|
||||||
"""
|
"""
|
||||||
|
|
@ -214,9 +215,9 @@ class sparse_GP(GP):
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
|
raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
|
||||||
else:
|
else:
|
||||||
self.likelihood.fit_DTC(self.Kmm, self.psi1)
|
self.likelihood.fit_DTC(self.Kmm,self.psi1)
|
||||||
# self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
|
#self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
|
||||||
self._set_params(self._get_params()) # update the GP
|
self._set_params(self._get_params()) # update the GP
|
||||||
|
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
def _log_likelihood_gradients(self):
|
||||||
|
|
@ -226,13 +227,13 @@ class sparse_GP(GP):
|
||||||
"""
|
"""
|
||||||
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
|
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
|
||||||
"""
|
"""
|
||||||
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
|
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z)
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance)
|
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_variance)
|
||||||
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T, self.Z, self.X, self.X_variance)
|
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_variance)
|
||||||
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance)
|
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z,self.X, self.X_variance)
|
||||||
else:
|
else:
|
||||||
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.Z, self.X)
|
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1,self.Z,self.X)
|
||||||
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
|
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
|
||||||
|
|
||||||
return dL_dtheta
|
return dL_dtheta
|
||||||
|
|
@ -243,22 +244,22 @@ class sparse_GP(GP):
|
||||||
"""
|
"""
|
||||||
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
|
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
|
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_variance)
|
||||||
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
|
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
|
||||||
else:
|
else:
|
||||||
dL_dZ += self.kern.dK_dX(self.dL_dpsi1, self.Z, self.X)
|
dL_dZ += self.kern.dK_dX(self.dL_dpsi1,self.Z,self.X)
|
||||||
return dL_dZ
|
return dL_dZ
|
||||||
|
|
||||||
def _raw_predict(self, Xnew, which_parts='all', full_cov=False):
|
def _raw_predict(self, Xnew, which_parts='all', full_cov=False):
|
||||||
"""Internal helper function for making predictions, does not account for normalization"""
|
"""Internal helper function for making predictions, does not account for normalization"""
|
||||||
|
|
||||||
Kx = self.kern.K(self.Z, Xnew)
|
Kx = self.kern.K(self.Z, Xnew)
|
||||||
mu = mdot(Kx.T, self.C / self.scale_factor, self.psi1V)
|
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
|
||||||
if full_cov:
|
if full_cov:
|
||||||
Kxx = self.kern.K(Xnew, which_parts=which_parts)
|
Kxx = self.kern.K(Xnew,which_parts=which_parts)
|
||||||
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C / self.scale_factor ** 2), Kx) # NOTE this won't work for plotting
|
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
|
||||||
else:
|
else:
|
||||||
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
|
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
|
||||||
var = Kxx - np.sum(Kx * np.dot(self.Kmmi - self.C / self.scale_factor ** 2, Kx), 0)
|
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
|
||||||
|
|
||||||
return mu, var[:, None]
|
return mu,var[:,None]
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ Created on 26 Apr 2013
|
||||||
'''
|
'''
|
||||||
import unittest
|
import unittest
|
||||||
import numpy
|
import numpy
|
||||||
from GPy.inference.conjugate_gradient_descent import CGD
|
from GPy.inference.conjugate_gradient_descent import CGD, RUNNING
|
||||||
import pylab
|
import pylab
|
||||||
import time
|
import time
|
||||||
from scipy.optimize.optimize import rosen, rosen_der
|
from scipy.optimize.optimize import rosen, rosen_der
|
||||||
|
|
@ -14,17 +14,62 @@ from scipy.optimize.optimize import rosen, rosen_der
|
||||||
class Test(unittest.TestCase):
|
class Test(unittest.TestCase):
|
||||||
|
|
||||||
def testMinimizeSquare(self):
|
def testMinimizeSquare(self):
|
||||||
f = lambda x: x ** 2 + 2 * x - 2
|
N = 2
|
||||||
|
A = numpy.random.rand(N) * numpy.eye(N)
|
||||||
|
b = numpy.random.rand(N) * 0
|
||||||
|
f = lambda x: numpy.dot(x.T.dot(A), x) - numpy.dot(x.T, b)
|
||||||
|
df = lambda x: numpy.dot(A, x) - b
|
||||||
|
|
||||||
|
opt = CGD()
|
||||||
|
|
||||||
|
restarts = 10
|
||||||
|
for _ in range(restarts):
|
||||||
|
try:
|
||||||
|
x0 = numpy.random.randn(N) * .5
|
||||||
|
res = opt.fmin(f, df, x0, messages=0,
|
||||||
|
maxiter=1000, gtol=1e-10)
|
||||||
|
assert numpy.allclose(res[0], 0, atol=1e-3)
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
# RESTART
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise AssertionError("Test failed for {} restarts".format(restarts))
|
||||||
|
|
||||||
|
def testRosen(self):
|
||||||
|
N = 2
|
||||||
|
f = rosen
|
||||||
|
df = rosen_der
|
||||||
|
x0 = numpy.random.randn(N) * .5
|
||||||
|
|
||||||
|
opt = CGD()
|
||||||
|
|
||||||
|
restarts = 10
|
||||||
|
for _ in range(restarts):
|
||||||
|
try:
|
||||||
|
x0 = numpy.random.randn(N) * .5
|
||||||
|
res = opt.fmin(f, df, x0, messages=0,
|
||||||
|
maxiter=1000, gtol=1e-10)
|
||||||
|
assert numpy.allclose(res[0], 1, atol=1e-5)
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
# RESTART
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise AssertionError("Test failed for {} restarts".format(restarts))
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# import sys;sys.argv = ['', 'Test.testMinimizeSquare']
|
# import sys;sys.argv = ['',
|
||||||
|
# 'Test.testMinimizeSquare',
|
||||||
|
# 'Test.testRosen',
|
||||||
|
# ]
|
||||||
# unittest.main()
|
# unittest.main()
|
||||||
|
|
||||||
N = 2
|
N = 2
|
||||||
A = numpy.random.rand(N) * numpy.eye(N)
|
A = numpy.random.rand(N) * numpy.eye(N)
|
||||||
b = numpy.random.rand(N)
|
b = numpy.random.rand(N) * 0
|
||||||
# f = lambda x: numpy.dot(x.T.dot(A), x) + numpy.dot(x.T, b)
|
# f = lambda x: numpy.dot(x.T.dot(A), x) - numpy.dot(x.T, b)
|
||||||
# df = lambda x: numpy.dot(A, x) - b
|
# df = lambda x: numpy.dot(A, x) - b
|
||||||
|
|
||||||
f = rosen
|
f = rosen
|
||||||
df = rosen_der
|
df = rosen_der
|
||||||
x0 = numpy.random.randn(N) * .5
|
x0 = numpy.random.randn(N) * .5
|
||||||
|
|
@ -48,14 +93,21 @@ if __name__ == "__main__":
|
||||||
optplts, = ax.plot3D([x0[0]], [x0[1]], zs=f(x0), marker='o', color='r')
|
optplts, = ax.plot3D([x0[0]], [x0[1]], zs=f(x0), marker='o', color='r')
|
||||||
|
|
||||||
raw_input("enter to start optimize")
|
raw_input("enter to start optimize")
|
||||||
|
res = [0]
|
||||||
|
|
||||||
def callback(x, *a, **kw):
|
def callback(*r):
|
||||||
xopts.append(x.copy())
|
xopts.append(r[0].copy())
|
||||||
# time.sleep(.3)
|
# time.sleep(.3)
|
||||||
optplts._verts3d = [numpy.array(xopts)[:, 0], numpy.array(xopts)[:, 1], [f(xs) for xs in xopts]]
|
optplts._verts3d = [numpy.array(xopts)[:, 0], numpy.array(xopts)[:, 1], [f(xs) for xs in xopts]]
|
||||||
fig.canvas.draw()
|
fig.canvas.draw()
|
||||||
|
if r[-1] != RUNNING:
|
||||||
|
res[0] = r
|
||||||
|
|
||||||
|
p, c = opt.fmin_async(f, df, x0.copy(), callback, messages=True, maxiter=1000,
|
||||||
|
report_every=20, gtol=1e-12)
|
||||||
|
|
||||||
res = opt.fmin(f, df, x0, callback, messages=True, maxiter=1000, report_every=1)
|
|
||||||
|
|
||||||
pylab.ion()
|
pylab.ion()
|
||||||
pylab.show()
|
pylab.show()
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
|
||||||
|
|
@ -9,21 +9,30 @@ import numpy as np
|
||||||
import pylab
|
import pylab
|
||||||
|
|
||||||
__test__ = False
|
__test__ = False
|
||||||
|
np.random.seed(0)
|
||||||
|
|
||||||
|
def ard(p):
|
||||||
|
try:
|
||||||
|
if p.ARD:
|
||||||
|
return "ARD"
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return ""
|
||||||
|
|
||||||
class Test(unittest.TestCase):
|
class Test(unittest.TestCase):
|
||||||
D = 9
|
D = 9
|
||||||
M = 5
|
M = 3
|
||||||
Nsamples = 3e6
|
Nsamples = 6e6
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.kerns = (
|
self.kerns = (
|
||||||
GPy.kern.rbf(self.D), GPy.kern.rbf(self.D, ARD=True),
|
# GPy.kern.rbf(self.D), GPy.kern.rbf(self.D, ARD=True),
|
||||||
GPy.kern.linear(self.D), GPy.kern.linear(self.D, ARD=True),
|
GPy.kern.linear(self.D, ARD=False), GPy.kern.linear(self.D, ARD=True),
|
||||||
GPy.kern.linear(self.D) + GPy.kern.bias(self.D),
|
GPy.kern.linear(self.D) + GPy.kern.bias(self.D),
|
||||||
GPy.kern.rbf(self.D) + GPy.kern.bias(self.D),
|
# GPy.kern.rbf(self.D) + GPy.kern.bias(self.D),
|
||||||
GPy.kern.linear(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
|
GPy.kern.linear(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
|
||||||
GPy.kern.rbf(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
|
# GPy.kern.rbf(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
|
||||||
GPy.kern.bias(self.D), GPy.kern.white(self.D),
|
# GPy.kern.bias(self.D), GPy.kern.white(self.D),
|
||||||
)
|
)
|
||||||
self.q_x_mean = np.random.randn(self.D)
|
self.q_x_mean = np.random.randn(self.D)
|
||||||
self.q_x_variance = np.exp(np.random.randn(self.D))
|
self.q_x_variance = np.exp(np.random.randn(self.D))
|
||||||
|
|
@ -66,18 +75,21 @@ class Test(unittest.TestCase):
|
||||||
K_ += K
|
K_ += K
|
||||||
diffs.append(((psi2 - (K_ / (i + 1))) ** 2).mean())
|
diffs.append(((psi2 - (K_ / (i + 1))) ** 2).mean())
|
||||||
K_ /= self.Nsamples / Nsamples
|
K_ /= self.Nsamples / Nsamples
|
||||||
|
msg = "psi2: {}".format("+".join([p.name + ard(p) for p in kern.parts]))
|
||||||
try:
|
try:
|
||||||
# pylab.figure("+".join([p.name for p in kern.parts]) + "psi2")
|
pylab.figure(msg)
|
||||||
# pylab.plot(diffs)
|
pylab.plot(diffs)
|
||||||
self.assertTrue(np.allclose(psi2.squeeze(), K_,
|
self.assertTrue(np.allclose(psi2.squeeze(), K_,
|
||||||
rtol=1e-1, atol=.1),
|
rtol=1e-1, atol=.1),
|
||||||
msg="{}: not matching".format("+".join([p.name for p in kern.parts])))
|
msg=msg + ": not matching")
|
||||||
except:
|
except:
|
||||||
print "{}: not matching".format(kern.parts[0].name)
|
import ipdb;ipdb.set_trace()
|
||||||
|
kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
|
||||||
|
print msg + ": not matching"
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import sys;sys.argv = ['',
|
import sys;sys.argv = ['',
|
||||||
'Test.test_psi0',
|
# 'Test.test_psi0',
|
||||||
'Test.test_psi1',
|
# 'Test.test_psi1',
|
||||||
'Test.test_psi2']
|
'Test.test_psi2']
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
||||||
|
|
@ -106,18 +106,18 @@ if __name__ == "__main__":
|
||||||
import sys
|
import sys
|
||||||
interactive = 'i' in sys.argv
|
interactive = 'i' in sys.argv
|
||||||
if interactive:
|
if interactive:
|
||||||
N, M, Q, D = 30, 5, 4, 30
|
# N, M, Q, D = 30, 5, 4, 30
|
||||||
X = numpy.random.rand(N, Q)
|
# X = numpy.random.rand(N, Q)
|
||||||
k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
|
# k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
|
||||||
K = k.K(X)
|
# K = k.K(X)
|
||||||
Y = numpy.random.multivariate_normal(numpy.zeros(N), K, D).T
|
# Y = numpy.random.multivariate_normal(numpy.zeros(N), K, D).T
|
||||||
Y -= Y.mean(axis=0)
|
# Y -= Y.mean(axis=0)
|
||||||
k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
|
# k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
|
||||||
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M)
|
# m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M)
|
||||||
m.ensure_default_constraints()
|
# m.ensure_default_constraints()
|
||||||
m.randomize()
|
# m.randomize()
|
||||||
# self.assertTrue(m.checkgrad())
|
# # self.assertTrue(m.checkgrad())
|
||||||
|
numpy.random.seed(0)
|
||||||
Q = 5
|
Q = 5
|
||||||
N = 50
|
N = 50
|
||||||
M = 10
|
M = 10
|
||||||
|
|
@ -126,11 +126,11 @@ if __name__ == "__main__":
|
||||||
X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
|
X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
|
||||||
Z = numpy.random.permutation(X)[:M]
|
Z = numpy.random.permutation(X)[:M]
|
||||||
Y = X.dot(numpy.random.randn(Q, D))
|
Y = X.dot(numpy.random.randn(Q, D))
|
||||||
kernel = GPy.kern.bias(Q)
|
# kernel = GPy.kern.bias(Q)
|
||||||
|
#
|
||||||
kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q),
|
# kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q),
|
||||||
GPy.kern.linear(Q) + GPy.kern.bias(Q),
|
# GPy.kern.linear(Q) + GPy.kern.bias(Q),
|
||||||
GPy.kern.rbf(Q) + GPy.kern.bias(Q)]
|
# GPy.kern.rbf(Q) + GPy.kern.bias(Q)]
|
||||||
|
|
||||||
# for k in kernels:
|
# for k in kernels:
|
||||||
# m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
|
# m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
|
||||||
|
|
@ -143,11 +143,13 @@ if __name__ == "__main__":
|
||||||
# M=M, kernel=kernel)
|
# M=M, kernel=kernel)
|
||||||
# m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
|
# m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
|
||||||
# M=M, kernel=kernel)
|
# M=M, kernel=kernel)
|
||||||
m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
|
# m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
|
||||||
M=M, kernel=GPy.kern.rbf(Q))
|
# M=M, kernel=GPy.kern.rbf(Q))
|
||||||
m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
|
m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
|
||||||
M=M, kernel=GPy.kern.linear(Q) + GPy.kern.bias(Q))
|
M=M, kernel=GPy.kern.linear(Q))
|
||||||
m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
|
m3.ensure_default_constraints()
|
||||||
M=M, kernel=GPy.kern.rbf(Q) + GPy.kern.bias(Q))
|
# + GPy.kern.bias(Q))
|
||||||
|
# m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
|
||||||
|
# M=M, kernel=GPy.kern.rbf(Q) + GPy.kern.bias(Q))
|
||||||
else:
|
else:
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue