LinearCF Psi Stat not working yet, strange bug in psi computations

This commit is contained in:
Max Zwiessele 2013-05-01 17:09:38 +01:00
parent c502b66ea3
commit 42474f0044
8 changed files with 353 additions and 244 deletions

View file

@ -82,7 +82,7 @@ def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300):
m.ensure_default_constraints()
y = m.likelihood.Y[0, :]
fig,(latent_axes,hist_axes) = plt.subplots(1,2)
fig, (latent_axes, hist_axes) = plt.subplots(1, 2)
plt.sca(latent_axes)
m.plot_latent()
data_show = GPy.util.visualize.vector_show(y)
@ -176,20 +176,34 @@ def bgplvm_simulation_matlab_compare():
Y = sim_data['Y']
S = sim_data['S']
mu = sim_data['mu']
M, [_, Q] = 20, mu.shape
M, [_, Q] = 30, mu.shape
Q = 2
from GPy.models import mrd
from GPy import kern
reload(mrd); reload(kern)
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
k = kern.rbf(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k,
# X=mu,
# X_variance=S,
_debug=True)
m.ensure_default_constraints()
m.auto_scale_factor = True
m['noise'] = .01 # Y.var() / 100.
m['{}_variance'.format(k.parts[0].name)] = .01
m['noise'] = Y.var() / 100.
lscstr = '{}'.format(k.parts[0].name)
# m[lscstr] = .01
m.unconstrain(lscstr); m.constrain_fixed(lscstr, 10)
lscstr = 'X_variance'
# m[lscstr] = .01
m.unconstrain(lscstr); m.constrain_fixed(lscstr, .1)
# cstr = 'white'
# m.unconstrain(cstr); m.constrain_bounded(cstr, .01, 1.)
# cstr = 'noise'
# m.unconstrain(cstr); m.constrain_bounded(cstr, .01, 1.)
return m
def bgplvm_simulation(burnin='scg', plot_sim=False,
@ -385,7 +399,7 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True):
Y = data['Y']
if in_place:
# Make figure move in place.
data['Y'][:, 0:3]=0.0
data['Y'][:, 0:3] = 0.0
m = GPy.models.GPLVM(data['Y'], 2, normalize_Y=True)
# optimize

View file

@ -4,14 +4,14 @@ Created on 24 Apr 2013
@author: maxz
'''
from GPy.inference.gradient_descent_update_rules import FletcherReeves
import numpy
from multiprocessing import Value
from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2
from multiprocessing.synchronize import Event
from multiprocessing.queues import Queue
from Queue import Empty
import sys
from multiprocessing import Value
from multiprocessing.queues import Queue
from multiprocessing.synchronize import Event
from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2
from threading import Thread
import numpy
import sys
RUNNING = "running"
CONVERGED = "converged"
@ -20,10 +20,9 @@ MAX_F_EVAL = "maximum number of function calls reached"
LINE_SEARCH = "line search failed"
KBINTERRUPT = "interrupted"
SENTINEL = None
class _Async_Optimization(Thread):
def __init__(self, f, df, x0, update_rule, runsignal,
def __init__(self, f, df, x0, update_rule, runsignal, SENTINEL,
report_every=10, messages=0, maxiter=5e3, max_f_eval=15e3,
gtol=1e-6, outqueue=None, *args, **kw):
"""
@ -42,6 +41,7 @@ class _Async_Optimization(Thread):
self.maxiter = maxiter
self.max_f_eval = max_f_eval
self.gtol = gtol
self.SENTINEL = SENTINEL
self.runsignal = runsignal
# self.parent = parent
# self.result = None
@ -70,7 +70,7 @@ class _Async_Optimization(Thread):
def callback_return(self, *a):
self.callback(*a)
self.outq.put(SENTINEL)
self.outq.put(self.SENTINEL)
self.runsignal.clear()
def run(self, *args, **kwargs):
@ -136,7 +136,7 @@ class _CGDAsync(_Async_Optimization):
if gfi is not None:
gi = gfi
if fi_old > fi:
if numpy.isnan(fi) or fi_old < fi:
gi, ur, si = self.reset(xi, *a, **kw)
else:
xi += numpy.dot(alphai, si)
@ -145,22 +145,23 @@ class _CGDAsync(_Async_Optimization):
sys.stdout.flush()
sys.stdout.write("iteration: {0:> 6g} f:{1:> 12e} |g|:{2:> 12e}".format(it, fi, numpy.dot(gi.T, gi)))
if it % self.report_every == 0:
self.callback(xi, fi, it, self.f_call.value, self.df_call.value, status)
if it % self.report_every == 0:
self.callback(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
it += 1
else:
status = MAXITER
# self.result = [xi, fi, it, self.f_call.value, self.df_call.value, status]
self.callback_return(xi, fi, it, self.f_call.value, self.df_call.value, status)
self.callback_return(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
self.result = [xi, fi, gi, it, self.f_call.value, self.df_call.value, status]
class Async_Optimize(object):
callback = lambda *x: None
runsignal = Event()
SENTINEL = "SENTINEL"
def async_callback_collect(self, q):
while self.runsignal.is_set():
try:
for ret in iter(lambda: q.get(timeout=1), SENTINEL):
for ret in iter(lambda: q.get(timeout=1), self.SENTINEL):
self.callback(*ret)
except Empty:
pass
@ -169,12 +170,12 @@ class Async_Optimize(object):
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, *args, **kwargs):
self.runsignal.set()
outqueue = Queue(5)
outqueue = Queue()
if callback:
self.callback = callback
c = Thread(target=self.async_callback_collect, args=(outqueue,))
c.start()
p = _CGDAsync(f, df, x0, update_rule, self.runsignal,
p = _CGDAsync(f, df, x0, update_rule, self.runsignal, self.SENTINEL,
report_every=report_every, messages=messages, maxiter=maxiter,
max_f_eval=max_f_eval, gtol=gtol, outqueue=outqueue, *args, **kwargs)
p.run()
@ -189,12 +190,14 @@ class Async_Optimize(object):
while self.runsignal.is_set():
try:
p.join(1)
c.join(1)
# c.join(1)
except KeyboardInterrupt:
# print "^C"
self.runsignal.clear()
p.join()
c.join()
if c.is_alive():
print "WARNING: callback still running, optimisation done!"
return p.result
class CGD(Async_Optimize):
'''
@ -215,7 +218,7 @@ class CGD(Async_Optimize):
callback gets called every `report_every` iterations
callback(xi, fi, iteration, function_calls, gradient_calls, status_message)
callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
if df returns tuple (grad, natgrad) it will optimize according
to natural gradient rules
@ -233,7 +236,7 @@ class CGD(Async_Optimize):
**calls**
---------
callback(x_opt, f_opt, iteration, function_calls, gradient_calls, status_message)
callback(x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message)
at end of optimization!
"""
@ -247,7 +250,7 @@ class CGD(Async_Optimize):
Minimize f, calling callback every `report_every` iterations with following syntax:
callback(xi, fi, iteration, function_calls, gradient_calls, status_message)
callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
if df returns tuple (grad, natgrad) it will optimize according
to natural gradient rules
@ -260,7 +263,7 @@ class CGD(Async_Optimize):
**returns**
---------
x_opt, f_opt, iteration, function_calls, gradient_calls, status_message
x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message
at end of optimization
"""

View file

@ -5,6 +5,7 @@
from kernpart import kernpart
import numpy as np
from ..util.linalg import tdot
from GPy.util.linalg import mdot
class linear(kernpart):
"""
@ -23,7 +24,7 @@ class linear(kernpart):
:rtype: kernel object
"""
def __init__(self,D,variances=None,ARD=False):
def __init__(self, D, variances=None, ARD=False):
self.D = D
self.ARD = ARD
if ARD == False:
@ -45,15 +46,15 @@ class linear(kernpart):
variances = np.ones(self.D)
self._set_params(variances.flatten())
#initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3,1))
self._X, self._X2, self._params = np.empty(shape=(3,1))
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._params = np.empty(shape=(3, 1))
def _get_params(self):
return self.variances
def _set_params(self,x):
assert x.size==(self.Nparam)
def _set_params(self, x):
assert x.size == (self.Nparam)
self.variances = x
self.variances2 = np.square(self.variances)
@ -61,115 +62,136 @@ class linear(kernpart):
if self.Nparam == 1:
return ['variance']
else:
return ['variance_%i'%i for i in range(self.variances.size)]
return ['variance_%i' % i for i in range(self.variances.size)]
def K(self,X,X2,target):
def K(self, X, X2, target):
if self.ARD:
XX = X*np.sqrt(self.variances)
XX = X * np.sqrt(self.variances)
if X2 is None:
target += tdot(XX)
else:
XX2 = X2*np.sqrt(self.variances)
XX2 = X2 * np.sqrt(self.variances)
target += np.dot(XX, XX2.T)
else:
self._K_computations(X, X2)
target += self.variances * self._dot_product
def Kdiag(self,X,target):
np.add(target,np.sum(self.variances*np.square(X),-1),target)
def Kdiag(self, X, target):
np.add(target, np.sum(self.variances * np.square(X), -1), target)
def dK_dtheta(self,dL_dK,X,X2,target):
def dK_dtheta(self, dL_dK, X, X2, target):
if self.ARD:
if X2 is None:
[np.add(target[i:i+1],np.sum(dL_dK*tdot(X[:,i:i+1])),target[i:i+1]) for i in range(self.D)]
[np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.D)]
else:
product = X[:,None,:]*X2[None,:,:]
target += (dL_dK[:,:,None]*product).sum(0).sum(0)
product = X[:, None, :] * X2[None, :, :]
target += (dL_dK[:, :, None] * product).sum(0).sum(0)
else:
self._K_computations(X, X2)
target += np.sum(self._dot_product*dL_dK)
target += np.sum(self._dot_product * dL_dK)
def dKdiag_dtheta(self,dL_dKdiag, X, target):
tmp = dL_dKdiag[:,None]*X**2
def dKdiag_dtheta(self, dL_dKdiag, X, target):
tmp = dL_dKdiag[:, None] * X ** 2
if self.ARD:
target += tmp.sum(0)
else:
target += tmp.sum()
def dK_dX(self,dL_dK,X,X2,target):
target += (((X2[:, None, :] * self.variances)) * dL_dK[:,:, None]).sum(0)
def dK_dX(self, dL_dK, X, X2, target):
target += (((X2[:, None, :] * self.variances)) * dL_dK[:, :, None]).sum(0)
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
def psi0(self,Z,mu,S,target):
self._psi_computations(Z,mu,S)
target += np.sum(self.variances*self.mu2_S,1)
def psi0(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target += np.sum(self.variances * self.mu2_S, 1)
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
self._psi_computations(Z,mu,S)
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
self._psi_computations(Z, mu, S)
tmp = dL_dpsi0[:, None] * self.mu2_S
if self.ARD:
target += tmp.sum(0)
else:
target += tmp.sum()
def dpsi0_dmuS(self,dL_dpsi0, Z,mu,S,target_mu,target_S):
target_mu += dL_dpsi0[:, None] * (2.0*mu*self.variances)
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances)
target_S += dL_dpsi0[:, None] * self.variances
def psi1(self,Z,mu,S,target):
def psi1(self, Z, mu, S, target):
"""the variance, it does nothing"""
self._psi1 = self.K(mu, Z, target)
def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target):
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
"""the variance, it does nothing"""
self.dK_dtheta(dL_dpsi1,mu,Z,target)
self.dK_dtheta(dL_dpsi1, mu, Z, target)
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
"""Do nothing for S, it does not affect psi1"""
self._psi_computations(Z,mu,S)
target_mu += (dL_dpsi1.T[:,:, None]*(Z*self.variances)).sum(1)
self._psi_computations(Z, mu, S)
target_mu += (dL_dpsi1.T[:, :, None] * (Z * self.variances)).sum(1)
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
self.dK_dX(dL_dpsi1.T,Z,mu,target)
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
self.dK_dX(dL_dpsi1.T, Z, mu, target)
def psi2(self,Z,mu,S,target):
def psi2(self, Z, mu, S, target):
"""
returns N,M,M matrix
"""
self._psi_computations(Z,mu,S)
#psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :]
#target += psi2.sum(-1)
target += np.tensordot(self.ZZ[None,:,:,:]*np.square(self.variances),self.mu2_S[:, None, None, :],((3),(3))).squeeze().T
self._psi_computations(Z, mu, S)
# psi2_old = self.ZZ * np.square(self.variances) * self.mu2_S[:, None, None, :]
# target += psi2.sum(-1)
# slow way of doing it, but right
psi2_real = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
for n in range(mu.shape[0]):
for m_prime in range(Z.shape[0]):
for m in range(Z.shape[0]):
tmp = self._Z[m:m + 1] * self.variances
tmp = np.dot(tmp, (tdot(self._mu[n:n + 1].T) + np.diag(S[n:n + 1])))
psi2_real[n, m, m_prime] = np.dot(tmp, (
self._Z[m_prime:m_prime + 1] * self.variances).T)
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
self._psi_computations(Z,mu,S)
tmp = (dL_dpsi2[:,:,:,None]*(2.*self.ZZ*self.mu2_S[:,None,None,:]*self.variances))
psi2_inner = mdot(self.ZA, self.inner, self.ZA.T)
mu2_S = (self._mu[:, None] * self._mu[:, :, None]) + self._S[:, :, None]
psi2 = (self.ZA[None, :, None, :] * mu2_S[:, None]).sum(-1)
psi2 = (psi2[:, :, None] * self.ZA[None, None]).sum(-1)
# psi2_tensor = np.tensordot(self.ZZ[None, :, :, :] * np.square(self.variances), self.mu2_S[:, None, None, :], ((3), (3))).squeeze().T
# import ipdb;ipdb.set_trace()
target += psi2_real
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
tmp = (dL_dpsi2[:, :, :, None] * (2.*self.ZZ * self.mu2_S[:, None, None, :] * self.variances))
if self.ARD:
target += tmp.sum(0).sum(0).sum(0)
else:
target += tmp.sum()
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
"""Think N,M,M,Q """
self._psi_computations(Z,mu,S)
tmp = self.ZZ*np.square(self.variances) # M,M,Q
target_mu += (dL_dpsi2[:,:,:,None]*tmp*2.*mu[:,None,None,:]).sum(1).sum(1)
target_S += (dL_dpsi2[:,:,:,None]*tmp).sum(1).sum(1)
self._psi_computations(Z, mu, S)
tmp = self.ZZ * np.square(self.variances) # M,M,Q
# import ipdb;ipdb.set_trace()
target_mu += (dL_dpsi2[:, :, :, None] * tmp * 2.*mu[:, None, None, :]).sum(1).sum(1)
target_S += (dL_dpsi2[:, :, :, None] * tmp).sum(1).sum(1) * S.shape[0]
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
self._psi_computations(Z,mu,S)
mu2_S = np.sum(self.mu2_S,0)# Q,
target += (dL_dpsi2[:,:,:,None] * (self.mu2_S[:,None,None,:]*(Z*np.square(self.variances)[None,:])[None,None,:,:])).sum(0).sum(1)
#TODO: tensordot would gain some time here
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
# mu2_S = np.sum(self.mu2_S, 0) # Q,
# import ipdb;ipdb.set_trace()
# prod = (np.eye(Z.shape[0])[:, None, :, None] * (np.dot(self.ZA, self.inner) * self.variances)[None, :, None])
# psi2_dZ = prod.swapaxes(0, 1) + prod
psi2_dZ_old = (dL_dpsi2[:, :, :, None] * (self.mu2_S[:, None, None, :] * (Z * np.square(self.variances)[None, :])[None, None, :, :])).sum(0).sum(1)
target += psi2_dZ_old # .sum(0).sum(1)
# TODO: tensordot would gain some time here
#---------------------------------------#
# Precomputations #
#---------------------------------------#
def _K_computations(self,X,X2):
def _K_computations(self, X, X2):
if not (np.array_equal(X, self._Xcache) and np.array_equal(X2, self._X2cache)):
self._Xcache = X.copy()
if X2 is None:
@ -177,16 +199,18 @@ class linear(kernpart):
self._X2cache = None
else:
self._X2cache = X2.copy()
self._dot_product = np.dot(X,X2.T)
self._dot_product = np.dot(X, X2.T)
def _psi_computations(self,Z,mu,S):
#here are the "statistics" for psi1 and psi2
if not np.all(Z==self._Z):
#Z has changed, compute Z specific stuff
#self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
self.ZZ = np.empty((Z.shape[0],Z.shape[0],Z.shape[1]),order='F')
[tdot(Z[:,i:i+1],self.ZZ[:,:,i].T) for i in xrange(Z.shape[1])]
def _psi_computations(self, Z, mu, S):
# here are the "statistics" for psi1 and psi2
if not np.all(Z == self._Z):
# Z has changed, compute Z specific stuff
# self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
self.ZZ = np.empty((Z.shape[0], Z.shape[0], Z.shape[1]), order='F')
[tdot(Z[:, i:i + 1], self.ZZ[:, :, i].T) for i in xrange(Z.shape[1])]
self._Z = Z.copy()
if not (np.all(mu==self._mu) and np.all(S==self._S)):
self.mu2_S = np.square(mu)+S
self.ZA = Z * self.variances
if not (np.all(mu == self._mu) and np.all(S == self._S)):
self.mu2_S = np.square(mu) + S
self.inner = tdot(mu.T) + (np.diag(S.sum(0)))
self._mu, self._S = mu.copy(), S.copy()

View file

@ -308,6 +308,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
Slatentgrads = ax3.quiver(xlatent, S, Ulatent, Sg, color=colors,
units=quiver_units, scale_units=quiver_scale_units,
scale=quiver_scale)
ax3.set_ylim(0, 1.)
xZ = np.tile(np.arange(0, Z.shape[0])[:, None], Z.shape[1])
UZ = np.zeros_like(Z)
@ -427,11 +428,11 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
cbarkmmdl.update_normal(imkmmdl)
ax2.relim()
ax3.relim()
# ax3.relim()
ax4.relim()
ax5.relim()
ax2.autoscale()
ax3.autoscale()
# ax3.autoscale()
ax4.autoscale()
ax5.autoscale()

View file

@ -30,22 +30,22 @@ class sparse_GP(GP):
"""
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
self.scale_factor = 100.0 # a scaling factor to help keep the algorithm stable
self.scale_factor = 100.0# a scaling factor to help keep the algorithm stable
self.auto_scale_factor = False
self.Z = Z
self.M = Z.shape[0]
self.likelihood = likelihood
if X_variance is None:
self.has_uncertain_inputs = False
self.has_uncertain_inputs=False
else:
assert X_variance.shape == X.shape
self.has_uncertain_inputs = True
assert X_variance.shape==X.shape
self.has_uncertain_inputs=True
self.X_variance = X_variance
GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X)
# normalize X uncertainty also
#normalize X uncertainty also
if self.has_uncertain_inputs:
self.X_variance /= np.square(self._Xstd)
@ -54,155 +54,156 @@ class sparse_GP(GP):
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z, self.X, self.X_variance)
self.psi1 = self.kern.psi1(self.Z, self.X, self.X_variance).T
self.psi2 = self.kern.psi2(self.Z, self.X, self.X_variance)
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_variance)
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_variance).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_variance)
else:
self.psi0 = self.kern.Kdiag(self.X)
self.psi1 = self.kern.K(self.Z, self.X)
self.psi1 = self.kern.K(self.Z,self.X)
self.psi2 = None
def _computations(self):
# TODO: find routine to multiply triangular matrices
#TODO: find routine to multiply triangular matrices
sf = self.scale_factor
sf2 = sf ** 2
sf2 = sf**2
# The rather complex computations of psi2_beta_scaled
#The rather complex computations of psi2_beta_scaled
if self.likelihood.is_heteroscedastic:
assert self.likelihood.D == 1 # TODO: what if the likelihood is heterscedatic and there are multiple independent outputs?
assert self.likelihood.D == 1 #TODO: what if the likelihood is heterscedatic and there are multiple independent outputs?
if self.has_uncertain_inputs:
self.psi2_beta_scaled = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1) / sf2)).sum(0)
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0)
else:
tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N)) / sf)
# self.psi2_beta_scaled = np.dot(tmp,tmp.T)
tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf)
#self.psi2_beta_scaled = np.dot(tmp,tmp.T)
self.psi2_beta_scaled = tdot(tmp)
else:
if self.has_uncertain_inputs:
self.psi2_beta_scaled = (self.psi2 * (self.likelihood.precision / sf2)).sum(0)
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0)
else:
tmp = self.psi1 * (np.sqrt(self.likelihood.precision) / sf)
# self.psi2_beta_scaled = np.dot(tmp,tmp.T)
tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
#self.psi2_beta_scaled = np.dot(tmp,tmp.T)
self.psi2_beta_scaled = tdot(tmp)
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
self.V = (self.likelihood.precision / self.scale_factor) * self.likelihood.Y
self.V = (self.likelihood.precision/self.scale_factor)*self.likelihood.Y
# Compute A = L^-1 psi2 beta L^-T
# self. A = mdot(self.Lmi,self.psi2_beta_scaled,self.Lmi.T)
tmp = linalg.lapack.flapack.dtrtrs(self.Lm, self.psi2_beta_scaled.T, lower=1)[0]
self.A = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp.T), lower=1)[0]
#Compute A = L^-1 psi2 beta L^-T
#self. A = mdot(self.Lmi,self.psi2_beta_scaled,self.Lmi.T)
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,self.psi2_beta_scaled.T,lower=1)[0]
self.A = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1)[0]
self.B = np.eye(self.M) / sf2 + self.A
self.B = np.eye(self.M)/sf2 + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.psi1V = np.dot(self.psi1, self.V)
tmp = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.Bi), lower=1, trans=1)[0]
self.C = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp.T), lower=1, trans=1)[0]
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.Bi),lower=1,trans=1)[0]
self.C = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0]
# self.Cpsi1V = np.dot(self.C,self.psi1V)
# back substitute C into psi1V
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
tmp, _ = linalg.lapack.flapack.dpotrs(self.LB, tmp, lower=1)
self.Cpsi1V, _ = linalg.lapack.flapack.dtrtrs(self.Lm, tmp, lower=1, trans=1)
#back substutue C into psi1V
tmp,info1 = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.psi1V),lower=1,trans=0)
tmp,info2 = linalg.lapack.flapack.dpotrs(self.LB,tmp,lower=1)
self.Cpsi1V,info3 = linalg.lapack.flapack.dtrtrs(self.Lm,tmp,lower=1,trans=1)
#self.Cpsi1V = np.dot(self.C,self.psi1V)
self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V, self.psi1V.T) # TODO: stabilize?
self.E = tdot(self.Cpsi1V / sf)
self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
self.E = tdot(self.Cpsi1V/sf)
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
self.dL_dpsi0 = -0.5 * self.D * (self.likelihood.precision * np.ones([self.N, 1])).flatten()
self.dL_dpsi1 = np.dot(self.Cpsi1V, self.V.T)
self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T)
if self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs:
# self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
# self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
# self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
self.dL_dpsi2 = 0.5 * self.likelihood.precision[:, None, None] * (self.D * (self.Kmmi - self.C / sf2) - self.E)[None, :, :]
#self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
#self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
#self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
self.dL_dpsi2 = 0.5*self.likelihood.precision[:,None,None]*(self.D*(self.Kmmi - self.C/sf2) -self.E)[None,:,:]
else:
# self.dL_dpsi1 += mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
# self.dL_dpsi1 += -mdot(self.C,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)/sf2) #dC
# self.dL_dpsi1 += -mdot(self.E,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dD
self.dL_dpsi1 += np.dot(self.Kmmi - self.C / sf2 - self.E, self.psi1 * self.likelihood.precision.reshape(1, self.N))
#self.dL_dpsi1 += mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
#self.dL_dpsi1 += -mdot(self.C,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)/sf2) #dC
#self.dL_dpsi1 += -mdot(self.E,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dD
self.dL_dpsi1 += np.dot(self.Kmmi - self.C/sf2 -self.E,self.psi1*self.likelihood.precision.reshape(1,self.N))
self.dL_dpsi2 = None
else:
# self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB
# self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C # dC
# self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E # dD
self.dL_dpsi2 = 0.5 * self.likelihood.precision * (self.D * (self.Kmmi - self.C / sf2) - self.E)
#self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB
#self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C # dC
#self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E # dD
self.dL_dpsi2 = 0.5*self.likelihood.precision*(self.D*(self.Kmmi - self.C/sf2) -self.E)
if self.has_uncertain_inputs:
# repeat for each of the N psi_2 matrices
self.dL_dpsi2 = np.repeat(self.dL_dpsi2[None, :, :], self.N, axis=0)
#repeat for each of the N psi_2 matrices
self.dL_dpsi2 = np.repeat(self.dL_dpsi2[None,:,:],self.N,axis=0)
else:
self.dL_dpsi1 += 2.*np.dot(self.dL_dpsi2, self.psi1)
self.dL_dpsi1 += 2.*np.dot(self.dL_dpsi2,self.psi1)
self.dL_dpsi2 = None
# Compute dL_dKmm
# self.dL_dKmm_old = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
# self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
# self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
tmp = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.B), lower=1, trans=1)[0]
self.dL_dKmm = -0.5 * self.D * sf2 * linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp.T), lower=1, trans=1)[0] # dA
tmp = np.dot(self.D * self.C + self.E * sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1
tmp = linalg.lapack.flapack.dpotrs(self.Lm, np.asfortranarray(tmp.T), lower=1)[0].T
self.dL_dKmm += 0.5 * (self.D * self.C / sf2 + self.E) + tmp # d(C+D)
#self.dL_dKmm_old = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
#self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
#self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.B),lower=1,trans=1)[0]
self.dL_dKmm = -0.5*self.D*sf2*linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0] #dA
tmp = np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1
tmp = linalg.lapack.flapack.dpotrs(self.Lm,np.asfortranarray(tmp.T),lower=1)[0].T
self.dL_dKmm += 0.5*(self.D*self.C/sf2 + self.E) +tmp # d(C+D)
# the partial derivative vector for the likelihood
if self.likelihood.Nparams == 0:
# save computation here.
#the partial derivative vector for the likelihood
if self.likelihood.Nparams ==0:
#save computation here.
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented"
# self.partial_for_likelihood = - 0.5 * self.D*self.likelihood.precision + 0.5 * (self.likelihood.Y**2).sum(1)*self.likelihood.precision**2 #dA
# self.partial_for_likelihood += 0.5 * self.D * (self.psi0*self.likelihood.precision**2 - (self.psi2*self.Kmmi[None,:,:]*self.likelihood.precision[:,None,None]**2).sum(1).sum(1)/sf2) #dB
# self.partial_for_likelihood += 0.5 * self.D * np.sum(self.Bi*self.A)*self.likelihood.precision #dC
# self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD
#self.partial_for_likelihood = - 0.5 * self.D*self.likelihood.precision + 0.5 * (self.likelihood.Y**2).sum(1)*self.likelihood.precision**2 #dA
#self.partial_for_likelihood += 0.5 * self.D * (self.psi0*self.likelihood.precision**2 - (self.psi2*self.Kmmi[None,:,:]*self.likelihood.precision[:,None,None]**2).sum(1).sum(1)/sf2) #dB
#self.partial_for_likelihood += 0.5 * self.D * np.sum(self.Bi*self.A)*self.likelihood.precision #dC
#self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD
else:
# likelihood is not heterscedatic
self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision * sf2)
self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi, self.A) * self.likelihood.precision
self.partial_for_likelihood += self.likelihood.precision * (0.5 * trace_dot(self.psi2_beta_scaled, self.E * sf2) - np.trace(self.Cpsi1VVpsi1))
#likelihood is not heterscedatic
self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * self.likelihood.trYYT*self.likelihood.precision**2
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2)
self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision
self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor ** 2
sf2 = self.scale_factor**2
if self.likelihood.is_heteroscedastic:
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y)
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
B = -0.5*self.D*(np.sum(self.likelihood.precision.flatten()*self.psi0) - np.trace(self.A)*sf2)
else:
A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
C = -0.5 * self.D * (self.B_logdet + self.M * np.log(sf2))
D = 0.5 * np.trace(self.Cpsi1VVpsi1)
return A + B + C + D
A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT
B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2)
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
D = 0.5*np.trace(self.Cpsi1VVpsi1)
return A+B+C+D
def _set_params(self, p):
self.Z = p[:self.M * self.Q].reshape(self.M, self.Q)
self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:])
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
self._compute_kernel_matrices()
if self.auto_scale_factor:
self.scale_factor = np.sqrt(self.psi2.sum(0).mean() * self.likelihood.precision)
# if self.auto_scale_factor:
self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
#if self.auto_scale_factor:
# if self.likelihood.is_heteroscedastic:
# self.scale_factor = max(1,np.sqrt(self.psi2_beta_scaled.sum(0).mean()))
# else:
# self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
# self.scale_factor = 1.
#self.scale_factor = 1.
self._computations()
def _get_params(self):
return np.hstack([self.Z.flatten(), GP._get_params(self)])
return np.hstack([self.Z.flatten(),GP._get_params(self)])
def _get_param_names(self):
return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])], []) + GP._get_param_names(self)
return sum([['iip_%i_%i'%(i,j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])],[]) + GP._get_param_names(self)
def update_likelihood_approximation(self):
"""
@ -214,9 +215,9 @@ class sparse_GP(GP):
if self.has_uncertain_inputs:
raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
else:
self.likelihood.fit_DTC(self.Kmm, self.psi1)
# self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP
self.likelihood.fit_DTC(self.Kmm,self.psi1)
#self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP
def _log_likelihood_gradients(self):
@ -226,13 +227,13 @@ class sparse_GP(GP):
"""
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
"""
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z)
if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_variance)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_variance)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z,self.X, self.X_variance)
else:
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.Z, self.X)
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1,self.Z,self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
return dL_dtheta
@ -243,22 +244,22 @@ class sparse_GP(GP):
"""
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_variance)
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else:
dL_dZ += self.kern.dK_dX(self.dL_dpsi1, self.Z, self.X)
dL_dZ += self.kern.dK_dX(self.dL_dpsi1,self.Z,self.X)
return dL_dZ
def _raw_predict(self, Xnew, which_parts='all', full_cov=False):
"""Internal helper function for making predictions, does not account for normalization"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C / self.scale_factor, self.psi1V)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov:
Kxx = self.kern.K(Xnew, which_parts=which_parts)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C / self.scale_factor ** 2), Kx) # NOTE this won't work for plotting
Kxx = self.kern.K(Xnew,which_parts=which_parts)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
var = Kxx - np.sum(Kx * np.dot(self.Kmmi - self.C / self.scale_factor ** 2, Kx), 0)
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu, var[:, None]
return mu,var[:,None]

View file

@ -5,7 +5,7 @@ Created on 26 Apr 2013
'''
import unittest
import numpy
from GPy.inference.conjugate_gradient_descent import CGD
from GPy.inference.conjugate_gradient_descent import CGD, RUNNING
import pylab
import time
from scipy.optimize.optimize import rosen, rosen_der
@ -14,17 +14,62 @@ from scipy.optimize.optimize import rosen, rosen_der
class Test(unittest.TestCase):
def testMinimizeSquare(self):
f = lambda x: x ** 2 + 2 * x - 2
N = 2
A = numpy.random.rand(N) * numpy.eye(N)
b = numpy.random.rand(N) * 0
f = lambda x: numpy.dot(x.T.dot(A), x) - numpy.dot(x.T, b)
df = lambda x: numpy.dot(A, x) - b
opt = CGD()
restarts = 10
for _ in range(restarts):
try:
x0 = numpy.random.randn(N) * .5
res = opt.fmin(f, df, x0, messages=0,
maxiter=1000, gtol=1e-10)
assert numpy.allclose(res[0], 0, atol=1e-3)
break
except:
# RESTART
pass
else:
raise AssertionError("Test failed for {} restarts".format(restarts))
def testRosen(self):
N = 2
f = rosen
df = rosen_der
x0 = numpy.random.randn(N) * .5
opt = CGD()
restarts = 10
for _ in range(restarts):
try:
x0 = numpy.random.randn(N) * .5
res = opt.fmin(f, df, x0, messages=0,
maxiter=1000, gtol=1e-10)
assert numpy.allclose(res[0], 1, atol=1e-5)
break
except:
# RESTART
pass
else:
raise AssertionError("Test failed for {} restarts".format(restarts))
if __name__ == "__main__":
# import sys;sys.argv = ['', 'Test.testMinimizeSquare']
# import sys;sys.argv = ['',
# 'Test.testMinimizeSquare',
# 'Test.testRosen',
# ]
# unittest.main()
N = 2
A = numpy.random.rand(N) * numpy.eye(N)
b = numpy.random.rand(N)
# f = lambda x: numpy.dot(x.T.dot(A), x) + numpy.dot(x.T, b)
b = numpy.random.rand(N) * 0
# f = lambda x: numpy.dot(x.T.dot(A), x) - numpy.dot(x.T, b)
# df = lambda x: numpy.dot(A, x) - b
f = rosen
df = rosen_der
x0 = numpy.random.randn(N) * .5
@ -48,14 +93,21 @@ if __name__ == "__main__":
optplts, = ax.plot3D([x0[0]], [x0[1]], zs=f(x0), marker='o', color='r')
raw_input("enter to start optimize")
res = [0]
def callback(x, *a, **kw):
xopts.append(x.copy())
def callback(*r):
xopts.append(r[0].copy())
# time.sleep(.3)
optplts._verts3d = [numpy.array(xopts)[:, 0], numpy.array(xopts)[:, 1], [f(xs) for xs in xopts]]
fig.canvas.draw()
if r[-1] != RUNNING:
res[0] = r
p, c = opt.fmin_async(f, df, x0.copy(), callback, messages=True, maxiter=1000,
report_every=20, gtol=1e-12)
res = opt.fmin(f, df, x0, callback, messages=True, maxiter=1000, report_every=1)
pylab.ion()
pylab.show()
pass

View file

@ -9,21 +9,30 @@ import numpy as np
import pylab
__test__ = False
np.random.seed(0)
def ard(p):
try:
if p.ARD:
return "ARD"
except:
pass
return ""
class Test(unittest.TestCase):
D = 9
M = 5
Nsamples = 3e6
M = 3
Nsamples = 6e6
def setUp(self):
self.kerns = (
GPy.kern.rbf(self.D), GPy.kern.rbf(self.D, ARD=True),
GPy.kern.linear(self.D), GPy.kern.linear(self.D, ARD=True),
# GPy.kern.rbf(self.D), GPy.kern.rbf(self.D, ARD=True),
GPy.kern.linear(self.D, ARD=False), GPy.kern.linear(self.D, ARD=True),
GPy.kern.linear(self.D) + GPy.kern.bias(self.D),
GPy.kern.rbf(self.D) + GPy.kern.bias(self.D),
# GPy.kern.rbf(self.D) + GPy.kern.bias(self.D),
GPy.kern.linear(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
GPy.kern.rbf(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
GPy.kern.bias(self.D), GPy.kern.white(self.D),
# GPy.kern.rbf(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
# GPy.kern.bias(self.D), GPy.kern.white(self.D),
)
self.q_x_mean = np.random.randn(self.D)
self.q_x_variance = np.exp(np.random.randn(self.D))
@ -66,18 +75,21 @@ class Test(unittest.TestCase):
K_ += K
diffs.append(((psi2 - (K_ / (i + 1))) ** 2).mean())
K_ /= self.Nsamples / Nsamples
msg = "psi2: {}".format("+".join([p.name + ard(p) for p in kern.parts]))
try:
# pylab.figure("+".join([p.name for p in kern.parts]) + "psi2")
# pylab.plot(diffs)
pylab.figure(msg)
pylab.plot(diffs)
self.assertTrue(np.allclose(psi2.squeeze(), K_,
rtol=1e-1, atol=.1),
msg="{}: not matching".format("+".join([p.name for p in kern.parts])))
msg=msg + ": not matching")
except:
print "{}: not matching".format(kern.parts[0].name)
import ipdb;ipdb.set_trace()
kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
print msg + ": not matching"
if __name__ == "__main__":
import sys;sys.argv = ['',
'Test.test_psi0',
'Test.test_psi1',
# 'Test.test_psi0',
# 'Test.test_psi1',
'Test.test_psi2']
unittest.main()

View file

@ -106,18 +106,18 @@ if __name__ == "__main__":
import sys
interactive = 'i' in sys.argv
if interactive:
N, M, Q, D = 30, 5, 4, 30
X = numpy.random.rand(N, Q)
k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = numpy.random.multivariate_normal(numpy.zeros(N), K, D).T
Y -= Y.mean(axis=0)
k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M)
m.ensure_default_constraints()
m.randomize()
# self.assertTrue(m.checkgrad())
# N, M, Q, D = 30, 5, 4, 30
# X = numpy.random.rand(N, Q)
# k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
# K = k.K(X)
# Y = numpy.random.multivariate_normal(numpy.zeros(N), K, D).T
# Y -= Y.mean(axis=0)
# k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
# m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M)
# m.ensure_default_constraints()
# m.randomize()
# # self.assertTrue(m.checkgrad())
numpy.random.seed(0)
Q = 5
N = 50
M = 10
@ -126,11 +126,11 @@ if __name__ == "__main__":
X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
Z = numpy.random.permutation(X)[:M]
Y = X.dot(numpy.random.randn(Q, D))
kernel = GPy.kern.bias(Q)
kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q),
GPy.kern.linear(Q) + GPy.kern.bias(Q),
GPy.kern.rbf(Q) + GPy.kern.bias(Q)]
# kernel = GPy.kern.bias(Q)
#
# kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q),
# GPy.kern.linear(Q) + GPy.kern.bias(Q),
# GPy.kern.rbf(Q) + GPy.kern.bias(Q)]
# for k in kernels:
# m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
@ -143,11 +143,13 @@ if __name__ == "__main__":
# M=M, kernel=kernel)
# m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=kernel)
m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
M=M, kernel=GPy.kern.rbf(Q))
# m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=GPy.kern.rbf(Q))
m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
M=M, kernel=GPy.kern.linear(Q) + GPy.kern.bias(Q))
m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
M=M, kernel=GPy.kern.rbf(Q) + GPy.kern.bias(Q))
M=M, kernel=GPy.kern.linear(Q))
m3.ensure_default_constraints()
# + GPy.kern.bias(Q))
# m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=GPy.kern.rbf(Q) + GPy.kern.bias(Q))
else:
unittest.main()