merged with devel

This commit is contained in:
Alan Saul 2013-05-14 16:26:57 +01:00
commit cad2c271eb
37 changed files with 1215 additions and 621 deletions

View file

@ -203,7 +203,7 @@ class model(parameterised):
else: else:
self._set_params_transformed(initial_parameters) self._set_params_transformed(initial_parameters)
def ensure_default_constraints(self, warn=False): def ensure_default_constraints(self):
""" """
Ensure that any variables which should clearly be positive have been constrained somehow. Ensure that any variables which should clearly be positive have been constrained somehow.
""" """
@ -214,11 +214,11 @@ class model(parameterised):
for s in positive_strings: for s in positive_strings:
for i in self.grep_param_names(s): for i in self.grep_param_names(s):
if not (i in currently_constrained): if not (i in currently_constrained):
to_make_positive.append(re.escape(param_names[i])) #to_make_positive.append(re.escape(param_names[i]))
if warn: to_make_positive.append(i)
print "Warning! constraining %s positive" % s
if len(to_make_positive): if len(to_make_positive):
self.constrain_positive('(' + '|'.join(to_make_positive) + ')') #self.constrain_positive('(' + '|'.join(to_make_positive) + ')')
self.constrain_positive(np.asarray(to_make_positive))

View file

@ -6,18 +6,18 @@ import numpy as np
class transformation(object): class transformation(object):
def __init__(self): def __init__(self):
#set the domain. Suggest we use 'positive', 'bounded', etc # set the domain. Suggest we use 'positive', 'bounded', etc
self.domain = 'undefined' self.domain = 'undefined'
def f(self, x): def f(self, x):
raise NotImplementedError raise NotImplementedError
def finv(self,x): def finv(self, x):
raise NotImplementedError raise NotImplementedError
def gradfactor(self,f): def gradfactor(self, f):
""" df_dx evaluated at self.f(x)=f""" """ df_dx evaluated at self.f(x)=f"""
raise NotImplementedError raise NotImplementedError
def initialize(self,f): def initialize(self, f):
""" produce a sensible initial values for f(x)""" """ produce a sensible initial values for f(x)"""
raise NotImplementedError raise NotImplementedError
def __str__(self): def __str__(self):
@ -25,61 +25,100 @@ class transformation(object):
class logexp(transformation): class logexp(transformation):
def __init__(self): def __init__(self):
self.domain= 'positive' self.domain = 'positive'
def f(self,x): def f(self, x):
return np.log(1. + np.exp(x)) return np.log(1. + np.exp(x))
def finv(self,f): def finv(self, f):
return np.log(np.exp(f) - 1.) return np.log(np.exp(f) - 1.)
def gradfactor(self,f): def gradfactor(self, f):
ef = np.exp(f) ef = np.exp(f)
return (ef - 1.)/ef return (ef - 1.) / ef
def initialize(self, f):
return np.abs(f)
def __str__(self):
return '(+ve)'
class logexp_clipped(transformation):
def __init__(self):
self.domain = 'positive'
def f(self, x):
f = np.log(1. + np.exp(x))
return f
def finv(self, f):
return np.log(np.exp(f) - 1.)
def gradfactor(self, f):
ef = np.exp(f)
gf = (ef - 1.) / ef
return np.where(f < 1e-6, 0, gf)
def initialize(self,f): def initialize(self,f):
if np.any(f<0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f) return np.abs(f)
def __str__(self): def __str__(self):
return '(+ve)' return '(+ve)'
class exponent(transformation): class exponent(transformation):
def __init__(self): def __init__(self):
self.domain= 'positive' self.domain = 'positive'
def f(self,x): def f(self, x):
return np.exp(x) return np.exp(x)
def finv(self,x): def finv(self, x):
return np.log(x) return np.log(x)
def gradfactor(self,f): def gradfactor(self, f):
return f return f
def initialize(self,f): def initialize(self, f):
if np.any(f<0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f) return np.abs(f)
def __str__(self): def __str__(self):
return '(+ve)' return '(+ve)'
class negative_exponent(transformation): class negative_exponent(transformation):
def __init__(self): def __init__(self):
self.domain= 'negative' self.domain = 'negative'
def f(self,x): def f(self, x):
return -np.exp(x) return -np.exp(x)
def finv(self,x): def finv(self, x):
return np.log(-x) return np.log(-x)
def gradfactor(self,f): def gradfactor(self, f):
return f return f
def initialize(self,f): def initialize(self, f):
if np.any(f>0.):
print "Warning: changing parameters to satisfy constraints"
return -np.abs(f) return -np.abs(f)
def __str__(self): def __str__(self):
return '(-ve)' return '(-ve)'
class square(transformation):
def __init__(self):
self.domain = 'positive'
def f(self, x):
return x ** 2
def finv(self, x):
return np.sqrt(x)
def gradfactor(self, f):
return 2 * np.sqrt(f)
def initialize(self, f):
return np.abs(f)
def __str__(self):
return '(+sq)'
class logistic(transformation): class logistic(transformation):
def __init__(self,lower,upper): def __init__(self, lower, upper):
self.domain= 'bounded' self.domain = 'bounded'
assert lower < upper assert lower < upper
self.lower, self.upper = float(lower), float(upper) self.lower, self.upper = float(lower), float(upper)
self.difference = self.upper - self.lower self.difference = self.upper - self.lower
def f(self,x): def f(self, x):
return self.lower + self.difference/(1.+np.exp(-x)) return self.lower + self.difference / (1. + np.exp(-x))
def finv(self,f): def finv(self, f):
return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf)) return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf))
def gradfactor(self,f): def gradfactor(self, f):
return (f-self.lower)*(self.upper-f)/self.difference return (f-self.lower)*(self.upper-f)/self.difference
def initialize(self,f): def initialize(self,f):
return self.f(f*0.) if np.any(np.logical_or(f<self.lower,f>self.upper)):
print "Warning: changing parameters to satisfy constraints"
return np.where(np.logical_or(f<self.lower,f>self.upper),self.f(f*0.),f)
def __str__(self): def __str__(self):
return '({},{})'.format(self.lower,self.upper) return '({},{})'.format(self.lower, self.upper)

View file

@ -8,6 +8,7 @@ from matplotlib import pyplot as plt, pyplot
import GPy import GPy
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
from GPy.util.datasets import simulation_BGPLVM from GPy.util.datasets import simulation_BGPLVM
from GPy.core.transformations import square, logexp_clipped
default_seed = np.random.seed(123344) default_seed = np.random.seed(123344)
@ -62,33 +63,38 @@ def GPLVM_oil_100(optimize=True):
m.plot_latent(labels=m.data_labels) m.plot_latent(labels=m.data_labels)
return m return m
def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300): def BGPLVM_oil(optimize=True, N=100, Q=10, M=20, max_f_eval=300, plot=False):
data = GPy.util.datasets.oil() data = GPy.util.datasets.oil()
# create simple GP model # create simple GP model
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.001) kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2))
m = GPy.models.Bayesian_GPLVM(data['X'][:N], Q, kernel=kernel, M=M) Y = data['X'][:N]
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=kernel, M=M)
m.data_labels = data['Y'][:N].argmax(axis=1) m.data_labels = data['Y'][:N].argmax(axis=1)
m.constrain('variance', logexp_clipped())
m.constrain('length', logexp_clipped())
m['lengt'] = 100.
m.ensure_default_constraints()
# optimize # optimize
if optimize: if optimize:
m.constrain_fixed('noise', 0.05) m.unconstrain('noise'); m.constrain_fixed('noise', Y.var() / 100.)
m.ensure_default_constraints() m.optimize('scg', messages=1, max_f_eval=150)
m.optimize('scg', messages=1, max_f_eval=max(80, max_f_eval))
m.unconstrain('noise')
m.constrain_positive('noise')
m.optimize('scg', messages=1, max_f_eval=max(0, max_f_eval - 80))
else:
m.ensure_default_constraints()
y = m.likelihood.Y[0, :] m.unconstrain('noise')
fig, (latent_axes, hist_axes) = plt.subplots(1, 2) m.constrain('noise', logexp_clipped())
plt.sca(latent_axes) m.optimize('scg', messages=1, max_f_eval=max_f_eval)
m.plot_latent()
data_show = GPy.util.visualize.vector_show(y) if plot:
lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) y = m.likelihood.Y[0, :]
raw_input('Press enter to finish') fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
plt.close('all') plt.sca(latent_axes)
m.plot_latent()
data_show = GPy.util.visualize.vector_show(y)
lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes) # , sense_axes=sense_axes)
raw_input('Press enter to finish')
plt.close('all')
# # plot # # plot
# print(m) # print(m)
# m.plot_latent(labels=m.data_labels) # m.plot_latent(labels=m.data_labels)
@ -176,7 +182,7 @@ def bgplvm_simulation_matlab_compare():
Y = sim_data['Y'] Y = sim_data['Y']
S = sim_data['S'] S = sim_data['S']
mu = sim_data['mu'] mu = sim_data['mu']
M, [_, Q] = 20, mu.shape M, [_, Q] = 3, mu.shape
from GPy.models import mrd from GPy.models import mrd
from GPy import kern from GPy import kern
@ -186,7 +192,7 @@ def bgplvm_simulation_matlab_compare():
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k,
# X=mu, # X=mu,
# X_variance=S, # X_variance=S,
_debug=True) _debug=False)
m.ensure_default_constraints() m.ensure_default_constraints()
m.auto_scale_factor = True m.auto_scale_factor = True
m['noise'] = Y.var() / 100. m['noise'] = Y.var() / 100.
@ -207,7 +213,7 @@ def bgplvm_simulation(burnin='scg', plot_sim=False,
max_burnin=100, true_X=False, max_burnin=100, true_X=False,
do_opt=True, do_opt=True,
max_f_eval=1000): max_f_eval=1000):
D1, D2, D3, N, M, Q = 10, 8, 8, 250, 10, 6 D1, D2, D3, N, M, Q = 15, 8, 8, 350, 3, 6
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim) slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
from GPy.models import mrd from GPy.models import mrd
@ -217,10 +223,12 @@ def bgplvm_simulation(burnin='scg', plot_sim=False,
Y = Ylist[0] Y = Ylist[0]
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q) k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q)
# k = kern.white(Q, .00001) + kern.bias(Q) # k = kern.white(Q, .00001) + kern.bias(Q)
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True) m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True)
# m.set('noise',) # m.set('noise',)
m.constrain('variance', logexp_clipped())
m.ensure_default_constraints() m.ensure_default_constraints()
m['noise'] = Y.var() / 100. m['noise'] = Y.var() / 100.
m['linear_variance'] = .001 m['linear_variance'] = .001
@ -304,7 +312,7 @@ def mrd_simulation(plot_sim=False):
# Y2 = np.random.multivariate_normal(np.zeros(N), k.K(X), D2).T # Y2 = np.random.multivariate_normal(np.zeros(N), k.K(X), D2).T
# Y2 -= Y2.mean(0) # Y2 -= Y2.mean(0)
# make_params = lambda ard: np.hstack([[1], ard, [1, .3]]) # make_params = lambda ard: np.hstack([[1], ard, [1, .3]])
D1, D2, D3, N, M, Q = 2000, 34, 8, 500, 3, 6 D1, D2, D3, N, M, Q = 150, 250, 300, 700, 3, 7
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim) slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
from GPy.models import mrd from GPy.models import mrd
@ -316,18 +324,19 @@ def mrd_simulation(plot_sim=False):
# Y2 = np.random.multivariate_normal(np.zeros(N), k.K(S2), D2).T # Y2 = np.random.multivariate_normal(np.zeros(N), k.K(S2), D2).T
# Y3 = np.random.multivariate_normal(np.zeros(N), k.K(S3), D3).T # Y3 = np.random.multivariate_normal(np.zeros(N), k.K(S3), D3).T
Ylist = Ylist[0:2] # Ylist = Ylist[0:2]
# k = kern.rbf(Q, ARD=True) + kern.bias(Q) + kern.white(Q) # k = kern.rbf(Q, ARD=True) + kern.bias(Q) + kern.white(Q)
k = kern.linear(Q, ARD=True) + kern.bias(Q, .01) + kern.white(Q, .001) k = kern.linear(Q, [0.01] * Q, True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
m = mrd.MRD(*Ylist, Q=Q, M=M, kernel=k, initx="concat", initz='permute', _debug=False) m = mrd.MRD(*Ylist, Q=Q, M=M, kernel=k, initx="concat", initz='permute', _debug=False)
for i, Y in enumerate(Ylist): for i, Y in enumerate(Ylist):
m.set('{}_noise'.format(i + 1), Y.var() / 100.) m['{}_noise'.format(i + 1)] = Y.var() / 100.
m.constrain('variance', logexp_clipped())
m.ensure_default_constraints() m.ensure_default_constraints()
m.auto_scale_factor = True # m.auto_scale_factor = True
# cstr = 'variance' # cstr = 'variance'
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-12, 1.) # m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-12, 1.)
@ -335,21 +344,21 @@ def mrd_simulation(plot_sim=False):
# cstr = 'linear_variance' # cstr = 'linear_variance'
# m.unconstrain(cstr), m.constrain_positive(cstr) # m.unconstrain(cstr), m.constrain_positive(cstr)
# print "initializing beta" print "initializing beta"
# cstr = "noise" cstr = "noise"
# m.unconstrain(cstr); m.constrain_fixed(cstr) m.unconstrain(cstr); m.constrain_fixed(cstr)
# m.optimize('scg', messages=1, max_f_eval=100) m.optimize('scg', messages=1, max_f_eval=2e3, gtol=100)
# print "releasing beta" print "releasing beta"
# cstr = "noise" cstr = "noise"
# m.unconstrain(cstr); m.constrain_positive(cstr) m.unconstrain(cstr); m.constrain(cstr, logexp_clipped())
np.seterr(all='call') # np.seterr(all='call')
def ipdbonerr(errtype, flags): # def ipdbonerr(errtype, flags):
import ipdb; ipdb.set_trace() # import ipdb; ipdb.set_trace()
np.seterrcall(ipdbonerr) # np.seterrcall(ipdbonerr)
return m # , mtest return m # , mtest
def mrd_silhouette(): def mrd_silhouette():
@ -367,7 +376,7 @@ def brendan_faces():
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0, :] y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False) data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :], m, data_show, ax) lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
plt.close('all') plt.close('all')
@ -380,11 +389,12 @@ def stick():
# optimize # optimize
m.ensure_default_constraints() m.ensure_default_constraints()
m.optimize(messages=1, max_f_eval=10000) m.optimize(messages=1, max_f_eval=10000)
m._set_params(m._get_params())
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0, :] y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :], m, data_show, ax) lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
plt.close('all') plt.close('all')
@ -406,7 +416,7 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True):
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0, :] y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.skeleton_show(y[None, :], data['skel']) data_show = GPy.util.visualize.skeleton_show(y[None, :], data['skel'])
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :], m, data_show, ax) lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
plt.close('all') plt.close('all')

View file

@ -1,6 +1,6 @@
#Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012) # Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012)
#Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman # Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
# HOLDERS AND CONTRIBUTORS "AS IS" AND ANY # HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
@ -25,7 +25,7 @@
import numpy as np import numpy as np
import sys import sys
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=1e-6, ftol=1e-6): def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=None, ftol=None, gtol=None):
""" """
Optimisation through Scaled Conjugate Gradients (SCG) Optimisation through Scaled Conjugate Gradients (SCG)
@ -38,19 +38,25 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
flog : a list of all the objective values flog : a list of all the objective values
""" """
if xtol is None:
xtol = 1e-6
if ftol is None:
ftol = 1e-6
if gtol is None:
gtol = 1e-5
sigma0 = 1.0e-4 sigma0 = 1.0e-4
fold = f(x, *optargs) # Initial function value. fold = f(x, *optargs) # Initial function value.
function_eval = 1 function_eval = 1
fnow = fold fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient. gradnew = gradf(x, *optargs) # Initial gradient.
current_grad = np.dot(gradnew, gradnew)
gradold = gradnew.copy() gradold = gradnew.copy()
d = -gradnew # Initial search direction. d = -gradnew # Initial search direction.
success = True # Force calculation of directional derivs. success = True # Force calculation of directional derivs.
nsuccess = 0 # nsuccess counts number of successes. nsuccess = 0 # nsuccess counts number of successes.
beta = 1.0 # Initial scale parameter. beta = 1.0 # Initial scale parameter.
betamin = 1.0e-15 # Lower bound on scale. betamin = 1.0e-15 # Lower bound on scale.
betamax = 1.0e100 # Upper bound on scale. betamax = 1.0e100 # Upper bound on scale.
status = "Not converged" status = "Not converged"
flog = [fold] flog = [fold]
@ -67,21 +73,21 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
d = -gradnew d = -gradnew
mu = np.dot(d, gradnew) mu = np.dot(d, gradnew)
kappa = np.dot(d, d) kappa = np.dot(d, d)
sigma = sigma0/np.sqrt(kappa) sigma = sigma0 / np.sqrt(kappa)
xplus = x + sigma*d xplus = x + sigma * d
gplus = gradf(xplus, *optargs) gplus = gradf(xplus, *optargs)
theta = np.dot(d, (gplus - gradnew))/sigma theta = np.dot(d, (gplus - gradnew)) / sigma
# Increase effective curvature and evaluate step size alpha. # Increase effective curvature and evaluate step size alpha.
delta = theta + beta*kappa delta = theta + beta * kappa
if delta <= 0: if delta <= 0:
delta = beta*kappa delta = beta * kappa
beta = beta - theta/kappa beta = beta - theta / kappa
alpha = - mu/delta alpha = -mu / delta
# Calculate the comparison ratio. # Calculate the comparison ratio.
xnew = x + alpha*d xnew = x + alpha * d
fnew = f(xnew, *optargs) fnew = f(xnew, *optargs)
function_eval += 1 function_eval += 1
@ -89,8 +95,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
status = "Maximum number of function evaluations exceeded" status = "Maximum number of function evaluations exceeded"
return x, flog, function_eval, status return x, flog, function_eval, status
Delta = 2.*(fnew - fold)/(alpha*mu) Delta = 2.*(fnew - fold) / (alpha * mu)
if Delta >= 0.: if Delta >= 0.:
success = True success = True
nsuccess += 1 nsuccess += 1
x = xnew x = xnew
@ -100,19 +106,19 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
fnow = fold fnow = fold
# Store relevant variables # Store relevant variables
flog.append(fnow) # Current function value flog.append(fnow) # Current function value
iteration += 1 iteration += 1
if display: if display:
print '\r', print '\r',
print 'Iteration: {0:>5g} Objective:{1:> 12e} Scale:{2:> 12e}'.format(iteration, fnow, beta), print 'i: {0:>5g} f:{1:> 12e} b:{2:> 12e} |g|:{3:> 12e}'.format(iteration, fnow, beta, current_grad),
# print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r', # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush() sys.stdout.flush()
if success: if success:
# Test for termination # Test for termination
if (np.max(np.abs(alpha*d)) < xtol) or (np.abs(fnew-fold) < ftol): if (np.max(np.abs(alpha * d)) < xtol) or (np.abs(fnew - fold) < ftol):
status='converged' status = 'converged'
return x, flog, function_eval, status return x, flog, function_eval, status
else: else:
@ -120,24 +126,27 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
fold = fnew fold = fnew
gradold = gradnew gradold = gradnew
gradnew = gradf(x, *optargs) gradnew = gradf(x, *optargs)
current_grad = np.dot(gradnew, gradnew)
# If the gradient is zero then we are done. # If the gradient is zero then we are done.
if np.dot(gradnew,gradnew) == 0: if current_grad <= gtol:
status = 'converged'
return x, flog, function_eval, status return x, flog, function_eval, status
# Adjust beta according to comparison ratio. # Adjust beta according to comparison ratio.
if Delta < 0.25: if Delta < 0.25:
beta = min(4.0*beta, betamax) beta = min(4.0 * beta, betamax)
if Delta > 0.75: if Delta > 0.75:
beta = max(0.5*beta, betamin) beta = max(0.5 * beta, betamin)
# Update search direction using Polak-Ribiere formula, or re-start # Update search direction using Polak-Ribiere formula, or re-start
# in direction of negative gradient after nparams steps. # in direction of negative gradient after nparams steps.
if nsuccess == x.size: if nsuccess == x.size:
d = -gradnew d = -gradnew
# beta = 1. # TODO: betareset!!
nsuccess = 0 nsuccess = 0
elif success: elif success:
gamma = np.dot(gradold - gradnew,gradnew)/(mu) gamma = np.dot(gradold - gradnew, gradnew) / (mu)
d = gamma*d - gradnew d = gamma * d - gradnew
# If we get here, then we haven't terminated in the given number of # If we get here, then we haven't terminated in the given number of
# iterations. # iterations.

View file

@ -97,51 +97,66 @@ class opt_SGD(Optimizer):
return subset return subset
def shift_constraints(self, j): def shift_constraints(self, j):
# back them up
bounded_i = copy.deepcopy(self.model.constrained_bounded_indices)
bounded_l = copy.deepcopy(self.model.constrained_bounded_lowers)
bounded_u = copy.deepcopy(self.model.constrained_bounded_uppers)
for b in range(len(bounded_i)): # for each group of constraints constrained_indices = copy.deepcopy(self.model.constrained_indices)
for bc in range(len(bounded_i[b])):
pos = np.where(j == bounded_i[b][bc])[0] for c, constraint in enumerate(constrained_indices):
mask = (np.ones_like(constrained_indices[c]) == 1)
for i in range(len(constrained_indices[c])):
pos = np.where(j == constrained_indices[c][i])[0]
if len(pos) == 1: if len(pos) == 1:
pos2 = np.where(self.model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0] self.model.constrained_indices[c][i] = pos
self.model.constrained_bounded_indices[b][pos2] = pos[0]
else: else:
if len(self.model.constrained_bounded_indices[b]) == 1: mask[i] = False
# if it's the last index to be removed
# the logic here is just a mess. If we remove the last one, then all the
# b-indices change and we have to iterate through everything to find our
# current index. Can't deal with this right now.
raise NotImplementedError
else: # just remove it from the indices self.model.constrained_indices[c] = self.model.constrained_indices[c][mask]
mask = self.model.constrained_bounded_indices[b] != bc return constrained_indices
self.model.constrained_bounded_indices[b] = self.model.constrained_bounded_indices[b][mask] # back them up
# bounded_i = copy.deepcopy(self.model.constrained_bounded_indices)
# bounded_l = copy.deepcopy(self.model.constrained_bounded_lowers)
# bounded_u = copy.deepcopy(self.model.constrained_bounded_uppers)
# for b in range(len(bounded_i)): # for each group of constraints
# for bc in range(len(bounded_i[b])):
# pos = np.where(j == bounded_i[b][bc])[0]
# if len(pos) == 1:
# pos2 = np.where(self.model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0]
# self.model.constrained_bounded_indices[b][pos2] = pos[0]
# else:
# if len(self.model.constrained_bounded_indices[b]) == 1:
# # if it's the last index to be removed
# # the logic here is just a mess. If we remove the last one, then all the
# # b-indices change and we have to iterate through everything to find our
# # current index. Can't deal with this right now.
# raise NotImplementedError
# else: # just remove it from the indices
# mask = self.model.constrained_bounded_indices[b] != bc
# self.model.constrained_bounded_indices[b] = self.model.constrained_bounded_indices[b][mask]
# here we shif the positive constraints. We cycle through each positive # # here we shif the positive constraints. We cycle through each positive
# constraint # # constraint
positive = self.model.constrained_positive_indices.copy() # positive = self.model.constrained_positive_indices.copy()
mask = (np.ones_like(positive) == 1) # mask = (np.ones_like(positive) == 1)
for p in range(len(positive)): # for p in range(len(positive)):
# we now check whether the constrained index appears in the j vector # # we now check whether the constrained index appears in the j vector
# (the vector of the "active" indices) # # (the vector of the "active" indices)
pos = np.where(j == self.model.constrained_positive_indices[p])[0] # pos = np.where(j == self.model.constrained_positive_indices[p])[0]
if len(pos) == 1: # if len(pos) == 1:
self.model.constrained_positive_indices[p] = pos # self.model.constrained_positive_indices[p] = pos
else: # else:
mask[p] = False # mask[p] = False
self.model.constrained_positive_indices = self.model.constrained_positive_indices[mask] # self.model.constrained_positive_indices = self.model.constrained_positive_indices[mask]
return (bounded_i, bounded_l, bounded_u), positive # return (bounded_i, bounded_l, bounded_u), positive
def restore_constraints(self, b, p): def restore_constraints(self, c):#b, p):
self.model.constrained_bounded_indices = b[0] # self.model.constrained_bounded_indices = b[0]
self.model.constrained_bounded_lowers = b[1] # self.model.constrained_bounded_lowers = b[1]
self.model.constrained_bounded_uppers = b[2] # self.model.constrained_bounded_uppers = b[2]
self.model.constrained_positive_indices = p # self.model.constrained_positive_indices = p
self.model.constrained_indices = c
def get_param_shapes(self, N = None, Q = None): def get_param_shapes(self, N = None, Q = None):
model_name = self.model.__class__.__name__ model_name = self.model.__class__.__name__
@ -168,9 +183,15 @@ class opt_SGD(Optimizer):
if self.model.N == 0 or Y.std() == 0.0: if self.model.N == 0 or Y.std() == 0.0:
return 0, step, self.model.N return 0, step, self.model.N
self.model.likelihood._mean = Y.mean() self.model.likelihood._bias = Y.mean()
self.model.likelihood._std = Y.std() self.model.likelihood._scale = Y.std()
self.model.likelihood.set_data(Y) self.model.likelihood.set_data(Y)
# self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision
sigma = self.model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache
self.model.likelihood._set_params(sigma)
j = self.subset_parameter_vector(self.x_opt, samples, shapes) j = self.subset_parameter_vector(self.x_opt, samples, shapes)
self.model.X = X[samples] self.model.X = X[samples]
@ -181,27 +202,30 @@ class opt_SGD(Optimizer):
self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T) self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T)
self.model.likelihood.trYYT = np.trace(self.model.likelihood.YYT) self.model.likelihood.trYYT = np.trace(self.model.likelihood.YYT)
b, p = self.shift_constraints(j) ci = self.shift_constraints(j)
f, fp = f_fp(self.x_opt[j]) f, fp = f_fp(self.x_opt[j])
step[j] = self.momentum * step[j] + self.learning_rate[j] * fp step[j] = self.momentum * step[j] + self.learning_rate[j] * fp
self.x_opt[j] -= step[j] self.x_opt[j] -= step[j]
self.restore_constraints(ci)
self.restore_constraints(b, p) self.model.grads[j] = fp
# restore likelihood _mean and _std, otherwise when we call set_data(y) on # restore likelihood _bias and _scale, otherwise when we call set_data(y) on
# the next feature, it will get normalized with the mean and std of this one. # the next feature, it will get normalized with the mean and std of this one.
self.model.likelihood._mean = 0 self.model.likelihood._bias = 0
self.model.likelihood._std = 1 self.model.likelihood._scale = 1
return f, step, self.model.N return f, step, self.model.N
def opt(self, f_fp=None, f=None, fp=None): def opt(self, f_fp=None, f=None, fp=None):
self.x_opt = self.model._get_params_transformed() self.x_opt = self.model._get_params_transformed()
self.model.grads = np.zeros_like(self.x_opt)
X, Y = self.model.X.copy(), self.model.likelihood.Y.copy() X, Y = self.model.X.copy(), self.model.likelihood.Y.copy()
self.model.likelihood.YYT = None self.model.likelihood.YYT = None
self.model.likelihood.trYYT = None self.model.likelihood.trYYT = None
self.model.likelihood._mean = 0.0 self.model.likelihood._bias = 0.0
self.model.likelihood._std = 1.0 self.model.likelihood._scale = 1.0
N, Q = self.model.X.shape N, Q = self.model.X.shape
D = self.model.likelihood.Y.shape[1] D = self.model.likelihood.Y.shape[1]
@ -225,6 +249,11 @@ class opt_SGD(Optimizer):
self.model.D = len(j) self.model.D = len(j)
self.model.likelihood.D = len(j) self.model.likelihood.D = len(j)
self.model.likelihood.set_data(Y[:, j]) self.model.likelihood.set_data(Y[:, j])
# self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision
sigma = self.model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache
self.model.likelihood._set_params(sigma)
if missing_data: if missing_data:
shapes = self.get_param_shapes(N, Q) shapes = self.get_param_shapes(N, Q)
@ -250,7 +279,6 @@ class opt_SGD(Optimizer):
# plt.clf() # plt.clf()
# plt.plot(self.param_traces['noise']) # plt.plot(self.param_traces['noise'])
# import pdb; pdb.set_trace()
# for k in self.param_traces.keys(): # for k in self.param_traces.keys():
# self.param_traces[k].append(self.model.get(k)[0]) # self.param_traces[k].append(self.model.get(k)[0])
@ -262,6 +290,9 @@ class opt_SGD(Optimizer):
self.model.likelihood.N = N self.model.likelihood.N = N
self.model.likelihood.D = D self.model.likelihood.D = D
self.model.likelihood.Y = Y self.model.likelihood.Y = Y
sigma = self.model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache
self.model.likelihood._set_params(sigma)
self.trace.append(self.f_opt) self.trace.append(self.f_opt)
if self.iteration_file is not None: if self.iteration_file is not None:

View file

@ -3,7 +3,8 @@ Created on 24 Apr 2013
@author: maxz @author: maxz
''' '''
from GPy.inference.gradient_descent_update_rules import FletcherReeves from GPy.inference.gradient_descent_update_rules import FletcherReeves, \
PolakRibiere
from Queue import Empty from Queue import Empty
from multiprocessing import Value from multiprocessing import Value
from multiprocessing.queues import Queue from multiprocessing.queues import Queue
@ -12,6 +13,7 @@ from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2
from threading import Thread from threading import Thread
import numpy import numpy
import sys import sys
import time
RUNNING = "running" RUNNING = "running"
CONVERGED = "converged" CONVERGED = "converged"
@ -71,7 +73,10 @@ class _Async_Optimization(Thread):
def callback_return(self, *a): def callback_return(self, *a):
self.callback(*a) self.callback(*a)
self.callback(self.SENTINEL) if self.outq is not None:
self.outq.put(self.SENTINEL)
if self.messages:
print ""
self.runsignal.clear() self.runsignal.clear()
def run(self, *args, **kwargs): def run(self, *args, **kwargs):
@ -105,7 +110,7 @@ class _CGDAsync(_Async_Optimization):
status = MAX_F_EVAL status = MAX_F_EVAL
gi = -self.df(xi, *a, **kw) gi = -self.df(xi, *a, **kw)
if numpy.dot(gi.T, gi) < self.gtol: if numpy.dot(gi.T, gi) <= self.gtol:
status = CONVERGED status = CONVERGED
break break
if numpy.isnan(numpy.dot(gi.T, gi)): if numpy.isnan(numpy.dot(gi.T, gi)):
@ -123,10 +128,8 @@ class _CGDAsync(_Async_Optimization):
xi, xi,
si, gi, si, gi,
fi, fi_old) fi, fi_old)
if alphai is not None and fi2 < fi: if alphai is None:
fi, fi_old = fi2, fi_old2 alphai, _, _, fi2, fi_old2, gfi = \
else:
alphai, _, _, fi, fi_old, gfi = \
line_search_wolfe2(self.f, self.df, line_search_wolfe2(self.f, self.df,
xi, si, gi, xi, si, gi,
fi, fi_old) fi, fi_old)
@ -134,11 +137,14 @@ class _CGDAsync(_Async_Optimization):
# This line search also failed to find a better solution. # This line search also failed to find a better solution.
status = LINE_SEARCH status = LINE_SEARCH
break break
if fi2 < fi:
fi, fi_old = fi2, fi_old2
if gfi is not None: if gfi is not None:
gi = gfi gi = gfi
if numpy.isnan(fi) or fi_old < fi: if numpy.isnan(fi) or fi_old < fi:
gi, ur, si = self.reset(xi, *a, **kw) gi, ur, si = self.reset(xi, *a, **kw)
else: else:
xi += numpy.dot(alphai, si) xi += numpy.dot(alphai, si)
if self.messages: if self.messages:
@ -164,10 +170,11 @@ class Async_Optimize(object):
try: try:
for ret in iter(lambda: q.get(timeout=1), self.SENTINEL): for ret in iter(lambda: q.get(timeout=1), self.SENTINEL):
self.callback(*ret) self.callback(*ret)
self.runsignal.clear()
except Empty: except Empty:
pass pass
def opt_async(self, f, df, x0, callback, update_rule=FletcherReeves, def opt_async(self, f, df, x0, callback, update_rule=PolakRibiere,
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6, messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, *args, **kwargs): report_every=10, *args, **kwargs):
self.runsignal.set() self.runsignal.set()
@ -193,13 +200,21 @@ class Async_Optimize(object):
while self.runsignal.is_set(): while self.runsignal.is_set():
try: try:
p.join(1) p.join(1)
# c.join(1) if c: c.join(1)
except KeyboardInterrupt: except KeyboardInterrupt:
# print "^C" # print "^C"
self.runsignal.clear() self.runsignal.clear()
p.join() p.join()
c.join() if c: c.join()
if c and c.is_alive(): if c and c.is_alive():
# self.runsignal.set()
# while self.runsignal.is_set():
# try:
# c.join(.1)
# except KeyboardInterrupt:
# # print "^C"
# self.runsignal.clear()
# c.join()
print "WARNING: callback still running, optimisation done!" print "WARNING: callback still running, optimisation done!"
return p.result return p.result

View file

@ -41,3 +41,13 @@ class FletcherReeves(GDUpdateRule):
if tmp: if tmp:
return tmp / numpy.dot(self.gradold.T, self.gradnatold) return tmp / numpy.dot(self.gradold.T, self.gradnatold)
return tmp return tmp
class PolakRibiere(GDUpdateRule):
'''
Fletcher Reeves update rule for gamma
'''
def _gamma(self, *a, **kw):
tmp = numpy.dot((self.grad - self.gradold).T, self.gradnat)
if tmp:
return tmp / numpy.dot(self.gradold.T, self.gradnatold)
return tmp

View file

@ -29,7 +29,8 @@ class Optimizer():
:rtype: optimizer object. :rtype: optimizer object.
""" """
def __init__(self, x_init, messages=False, model = None, max_f_eval=1e4, max_iters = 1e3, ftol=None, gtol=None, xtol=None, callback=None): def __init__(self, x_init, messages=False, model=None, max_f_eval=1e4, max_iters=1e3,
ftol=None, gtol=None, xtol=None):
self.opt_name = None self.opt_name = None
self.x_init = x_init self.x_init = x_init
self.messages = messages self.messages = messages
@ -45,7 +46,6 @@ class Optimizer():
self.gtol = gtol self.gtol = gtol
self.ftol = ftol self.ftol = ftol
self.model = model self.model = model
self.callback = callback
def run(self, **kwargs): def run(self, **kwargs):
start = dt.datetime.now() start = dt.datetime.now()
@ -95,8 +95,6 @@ class opt_tnc(Optimizer):
opt_dict['ftol'] = self.ftol opt_dict['ftol'] = self.ftol
if self.gtol is not None: if self.gtol is not None:
opt_dict['pgtol'] = self.gtol opt_dict['pgtol'] = self.gtol
if self.callback is not None:
opt_dict['callback'] = self.callback
opt_result = optimize.fmin_tnc(f_fp, self.x_init, messages = self.messages, opt_result = optimize.fmin_tnc(f_fp, self.x_init, messages = self.messages,
maxfun = self.max_f_eval, **opt_dict) maxfun = self.max_f_eval, **opt_dict)
@ -131,8 +129,6 @@ class opt_lbfgsb(Optimizer):
print "WARNING: l-bfgs-b doesn't have an ftol arg, so I'm going to ignore it" print "WARNING: l-bfgs-b doesn't have an ftol arg, so I'm going to ignore it"
if self.gtol is not None: if self.gtol is not None:
opt_dict['pgtol'] = self.gtol opt_dict['pgtol'] = self.gtol
if self.callback is not None:
opt_dict['callback'] = self.callback
opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint = iprint, opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint = iprint,
maxfun = self.max_f_eval, **opt_dict) maxfun = self.max_f_eval, **opt_dict)
@ -160,8 +156,6 @@ class opt_simplex(Optimizer):
opt_dict['ftol'] = self.ftol opt_dict['ftol'] = self.ftol
if self.gtol is not None: if self.gtol is not None:
print "WARNING: simplex doesn't have an gtol arg, so I'm going to ignore it" print "WARNING: simplex doesn't have an gtol arg, so I'm going to ignore it"
if self.callback is not None:
opt_dict['callback'] = self.callback
opt_result = optimize.fmin(f, self.x_init, (), disp = self.messages, opt_result = optimize.fmin(f, self.x_init, (), disp = self.messages,
maxfun = self.max_f_eval, full_output=True, **opt_dict) maxfun = self.max_f_eval, full_output=True, **opt_dict)
@ -194,8 +188,6 @@ class opt_rasm(Optimizer):
print "WARNING: minimize doesn't have an ftol arg, so I'm going to ignore it" print "WARNING: minimize doesn't have an ftol arg, so I'm going to ignore it"
if self.gtol is not None: if self.gtol is not None:
print "WARNING: minimize doesn't have an gtol arg, so I'm going to ignore it" print "WARNING: minimize doesn't have an gtol arg, so I'm going to ignore it"
if self.callback is not None:
print "WARNING: minimize doesn't have a callback arg, so I'm going to ignore it"
opt_result = rasm.minimize(self.x_init, f_fp, (), messages = self.messages, opt_result = rasm.minimize(self.x_init, f_fp, (), messages = self.messages,
maxnumfuneval = self.max_f_eval) maxnumfuneval = self.max_f_eval)
@ -214,9 +206,11 @@ class opt_SCG(Optimizer):
def opt(self, f_fp = None, f = None, fp = None): def opt(self, f_fp = None, f = None, fp = None):
assert not f is None assert not f is None
assert not fp is None assert not fp is None
if self.callback is not None: opt_result = SCG(f, fp, self.x_init, display=self.messages,
print "WARNING: SCG doesn't have a callback arg, so I'm going to ignore it" maxiters=self.max_iters,
opt_result = SCG(f,fp,self.x_init, display=self.messages, maxiters=self.max_iters, max_f_eval=self.max_f_eval, xtol=self.xtol, ftol=self.ftol) max_f_eval=self.max_f_eval,
xtol=self.xtol, ftol=self.ftol,
gtol=self.gtol)
self.x_opt = opt_result[0] self.x_opt = opt_result[0]
self.trace = opt_result[1] self.trace = opt_result[1]
self.f_opt = self.trace[-1] self.f_opt = self.trace[-1]

View file

@ -36,12 +36,16 @@ class Brownian(kernpart):
return ['variance'] return ['variance']
def K(self,X,X2,target): def K(self,X,X2,target):
if X2 is None:
X2 = X
target += self.variance*np.fmin(X,X2.T) target += self.variance*np.fmin(X,X2.T)
def Kdiag(self,X,target): def Kdiag(self,X,target):
target += self.variance*X.flatten() target += self.variance*X.flatten()
def dK_dtheta(self,dL_dK,X,X2,target): def dK_dtheta(self,dL_dK,X,X2,target):
if X2 is None:
X2 = X
target += np.sum(np.fmin(X,X2.T)*dL_dK) target += np.sum(np.fmin(X,X2.T)*dL_dK)
def dKdiag_dtheta(self,dL_dKdiag,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
try: try:
from constructors import rbf_sympy, sympykern # these depend on sympy from constructors import rbf_sympy, sympykern # these depend on sympy
except: except:

View file

@ -38,7 +38,6 @@ class bias(kernpart):
def dK_dtheta(self,dL_dKdiag,X,X2,target): def dK_dtheta(self,dL_dKdiag,X,X2,target):
target += dL_dKdiag.sum() target += dL_dKdiag.sum()
def dKdiag_dtheta(self,dL_dKdiag,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += dL_dKdiag.sum() target += dL_dKdiag.sum()

View file

@ -20,7 +20,6 @@ from periodic_exponential import periodic_exponential as periodic_exponentialpar
from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part
from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part
from prod import prod as prodpart from prod import prod as prodpart
from prod_orthogonal import prod_orthogonal as prod_orthogonalpart
from symmetric import symmetric as symmetric_part from symmetric import symmetric as symmetric_part
from coregionalise import coregionalise as coregionalise_part from coregionalise import coregionalise as coregionalise_part
from rational_quadratic import rational_quadratic as rational_quadraticpart from rational_quadratic import rational_quadratic as rational_quadraticpart
@ -255,7 +254,7 @@ def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,
part = periodic_Matern52part(D,variance, lengthscale, period, n_freq, lower, upper) part = periodic_Matern52part(D,variance, lengthscale, period, n_freq, lower, upper)
return kern(D, [part]) return kern(D, [part])
def prod(k1,k2): def prod(k1,k2,tensor=False):
""" """
Construct a product kernel over D from two kernels over D Construct a product kernel over D from two kernels over D
@ -263,19 +262,8 @@ def prod(k1,k2):
:type k1, k2: kernpart :type k1, k2: kernpart
:rtype: kernel object :rtype: kernel object
""" """
part = prodpart(k1,k2) part = prodpart(k1,k2,tensor)
return kern(k1.D, [part]) return kern(part.D, [part])
def prod_orthogonal(k1,k2):
"""
Construct a product kernel over D1 x D2 from a kernel over D1 and another over D2.
:param k1, k2: the kernels to multiply
:type k1, k2: kernpart
:rtype: kernel object
"""
part = prod_orthogonalpart(k1,k2)
return kern(k1.D+k2.D, [part])
def symmetric(k): def symmetric(k):
""" """

View file

@ -7,7 +7,6 @@ import pylab as pb
from ..core.parameterised import parameterised from ..core.parameterised import parameterised
from kernpart import kernpart from kernpart import kernpart
import itertools import itertools
from prod_orthogonal import prod_orthogonal
from prod import prod from prod import prod
from ..util.linalg import symmetrify from ..util.linalg import symmetrify
@ -84,96 +83,72 @@ class kern(parameterised):
count += p.Nparam count += p.Nparam
def __add__(self, other): def __add__(self, other):
assert self.D == other.D """
newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices) Shortcut for `add`.
# transfer constraints: """
newkern.constrained_indices = self.constrained_indices + [i+self.Nparam for i in other.constrained_indices] return self.add(other)
newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
return newkern
def add(self, other): def add(self, other,tensor=False):
""" """
Add another kernel to this one. Both kernels are defined on the same _space_ Add another kernel to this one. Both kernels are defined on the same _space_
:param other: the other kernel to be added :param other: the other kernel to be added
:type other: GPy.kern :type other: GPy.kern
""" """
return self + other if tensor:
D = self.D + other.D
self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices]
def add_orthogonal(self, other): newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
"""
Add another kernel to this one. Both kernels are defined on separate spaces
:param other: the other kernel to be added
:type other: GPy.kern
"""
# deal with input slices
D = self.D + other.D
self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices]
newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices) # transfer constraints:
newkern.constrained_indices = self.constrained_indices + [x+self.Nparam for x in other.constrained_indices]
# transfer constraints: newkern.constraints = self.constraints + other.constraints
newkern.constrained_indices = self.constrained_indices + [x+self.Nparam for x in other.constrained_indices] newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
newkern.constraints = self.constraints + other.constraints newkern.fixed_values = self.fixed_values + other.fixed_values
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices] newkern.constraints = self.constraints + other.constraints
newkern.fixed_values = self.fixed_values + other.fixed_values newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
newkern.constraints = self.constraints + other.constraints else:
newkern.constrained_bounded_uppers = self.constrained_bounded_uppers + other.constrained_bounded_uppers assert self.D == other.D
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices] newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices)
# transfer constraints:
newkern.constrained_indices = self.constrained_indices + [i+self.Nparam for i in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
return newkern return newkern
def __mul__(self, other): def __mul__(self, other):
""" """
Shortcut for `prod_orthogonal`. Note that `+` assumes that we sum 2 kernels defines on the same space whereas `*` assumes that the kernels are defined on different subspaces. Shortcut for `prod`.
""" """
return self.prod(other) return self.prod(other)
def prod(self, other): def prod(self, other,tensor=False):
""" """
multiply two kernels defined on the same spaces. multiply two kernels (either on the same space, or on the tensor product of the input space)
:param other: the other kernel to be added :param other: the other kernel to be added
:type other: GPy.kern :type other: GPy.kern
""" """
K1 = self.copy() K1 = self.copy()
K2 = other.copy() K2 = other.copy()
newkernparts = [prod(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)]
slices = [] slices = []
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices): for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
s1, s2 = [False] * K1.D, [False] * K2.D s1, s2 = [False]*K1.D, [False]*K2.D
s1[sl1], s2[sl2] = [True], [True] s1[sl1], s2[sl2] = [True], [True]
slices += [s1 + s2] slices += [s1+s2]
newkernparts = [prod(k1, k2,tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)]
if tensor:
newkern = kern(K1.D + K2.D, newkernparts, slices)
else:
newkern = kern(K1.D, newkernparts, slices)
newkern = kern(K1.D, newkernparts, slices)
newkern._follow_constrains(K1, K2) newkern._follow_constrains(K1, K2)
return newkern
def prod_orthogonal(self, other):
"""
multiply two kernels. Both kernels are defined on separate spaces.
:param other: the other kernel to be added
:type other: GPy.kern
"""
K1 = self.copy()
K2 = other.copy()
newkernparts = [prod_orthogonal(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)]
slices = []
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
s1, s2 = [False] * K1.D, [False] * K2.D
s1[sl1], s2[sl2] = [True], [True]
slices += [s1 + s2]
newkern = kern(K1.D + K2.D, newkernparts, slices)
newkern._follow_constrains(K1, K2)
return newkern return newkern
def _follow_constrains(self, K1, K2): def _follow_constrains(self, K1, K2):
@ -277,7 +252,7 @@ class kern(parameterised):
which_parts = [True]*self.Nparts which_parts = [True]*self.Nparts
assert X.shape[1] == self.D assert X.shape[1] == self.D
target = np.zeros(X.shape[0]) target = np.zeros(X.shape[0])
[p.Kdiag(X[:, i_s], target=target) for p, i_s in zip(self.parts, self.input_slices)] [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self.parts, self.input_slices, which_parts) if part_on]
return target return target
def dKdiag_dtheta(self, dL_dKdiag, X): def dKdiag_dtheta(self, dL_dKdiag, X):
@ -469,9 +444,9 @@ class kern(parameterised):
return target_mu, target_S return target_mu, target_S
def plot(self, x=None, plot_limits=None, which_functions='all', resolution=None, *args, **kwargs): def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs):
if which_functions == 'all': if which_parts == 'all':
which_functions = [True] * self.Nparts which_parts = [True] * self.Nparts
if self.D == 1: if self.D == 1:
if x is None: if x is None:
x = np.zeros((1, 1)) x = np.zeros((1, 1))
@ -488,7 +463,7 @@ class kern(parameterised):
raise ValueError, "Bad limits for plotting" raise ValueError, "Bad limits for plotting"
Xnew = np.linspace(xmin, xmax, resolution or 201)[:, None] Xnew = np.linspace(xmin, xmax, resolution or 201)[:, None]
Kx = self.K(Xnew, x, slices2=which_functions) Kx = self.K(Xnew, x, which_parts)
pb.plot(Xnew, Kx, *args, **kwargs) pb.plot(Xnew, Kx, *args, **kwargs)
pb.xlim(xmin, xmax) pb.xlim(xmin, xmax)
pb.xlabel("x") pb.xlabel("x")
@ -514,7 +489,7 @@ class kern(parameterised):
xg = np.linspace(xmin[0], xmax[0], resolution) xg = np.linspace(xmin[0], xmax[0], resolution)
yg = np.linspace(xmin[1], xmax[1], resolution) yg = np.linspace(xmin[1], xmax[1], resolution)
Xnew = np.vstack((xx.flatten(), yy.flatten())).T Xnew = np.vstack((xx.flatten(), yy.flatten())).T
Kx = self.K(Xnew, x, slices2=which_functions) Kx = self.K(Xnew, x, which_parts)
Kx = Kx.reshape(resolution, resolution).T Kx = Kx.reshape(resolution, resolution).T
pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs) pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs)
pb.xlim(xmin[0], xmax[0]) pb.xlim(xmin[0], xmax[0])

View file

@ -5,6 +5,7 @@
from kernpart import kernpart from kernpart import kernpart
import numpy as np import numpy as np
from ..util.linalg import tdot from ..util.linalg import tdot
from scipy import weave
class linear(kernpart): class linear(kernpart):
""" """
@ -171,33 +172,91 @@ class linear(kernpart):
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
AZZA = self.ZA.T[:, None, :, None] * self.ZA[None, :, None, :] AZZA = self.ZA.T[:, None, :, None] * self.ZA[None, :, None, :]
AZZA = AZZA + AZZA.swapaxes(1, 2) AZZA = AZZA + AZZA.swapaxes(1, 2)
target_S += (dL_dpsi2[:, :, :, None] * self.ZA[None, :, None, :] * self.ZA[None, None, :, :]).sum(1).sum(1) AZZA_2 = AZZA/2.
dpsi2_dmu = (dL_dpsi2[:, :, :, None] * np.tensordot(mu, AZZA, (-1, 0))).sum(1).sum(1) #muAZZA = np.tensordot(mu,AZZA,(-1,0))
target_mu += dpsi2_dmu #target_mu_dummy, target_S_dummy = np.zeros_like(target_mu), np.zeros_like(target_S)
#target_mu_dummy += (dL_dpsi2[:, :, :, None] * muAZZA).sum(1).sum(1)
#target_S_dummy += (dL_dpsi2[:, :, :, None] * self.ZA[None, :, None, :] * self.ZA[None, None, :, :]).sum(1).sum(1)
#Using weave, we can exploiut the symmetry of this problem:
code = """
int n, m, mm,q,qq;
double factor,tmp;
#pragma omp parallel for private(m,mm,q,qq,factor,tmp)
for(n=0;n<N;n++){
for(m=0;m<M;m++){
for(mm=0;mm<=m;mm++){
//add in a factor of 2 for the off-diagonal terms (and then count them only once)
if(m==mm)
factor = dL_dpsi2(n,m,mm);
else
factor = 2.0*dL_dpsi2(n,m,mm);
for(q=0;q<Q;q++){
//take the dot product of mu[n,:] and AZZA[:,m,mm,q] TODO: blas!
tmp = 0.0;
for(qq=0;qq<Q;qq++){
tmp += mu(n,qq)*AZZA(qq,m,mm,q);
}
target_mu(n,q) += factor*tmp;
target_S(n,q) += factor*AZZA_2(q,m,mm,q);
}
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']}
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','M','Q','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
# mu2_S = np.sum(self.mu2_S, 0) # Q, #psi2_dZ = dL_dpsi2[:, :, :, None] * self.variances * self.ZAinner[:, :, None, :]
# import ipdb;ipdb.set_trace() #dummy_target = np.zeros_like(target)
# psi2_dZ_real = np.zeros((mu.shape[0], Z.shape[0], Z.shape[1])) #dummy_target += psi2_dZ.sum(0).sum(0)
# for n in range(mu.shape[0]):
# for m in range(Z.shape[0]): AZA = self.variances*self.ZAinner
# tmp = self.variances * (tdot(self._mu[n:n + 1].T) + np.diag(S[n])) code="""
# psi2_dZ_real[n, m, :] = np.dot(tmp, ( int n,m,mm,q;
# self._Z[m:m + 1] * self.variances).T).T #pragma omp parallel for private(n,mm,q)
# tmp = self._Z[m:m + 1] * self.variances for(m=0;m<M;m++){
# tmp = np.dot(tmp, (tdot(self._mu[n:n + 1].T) + np.diag(S[n]))) for(q=0;q<Q;q++){
# psi2_dZ_real[n, m, :] = tmp * self.variances for(mm=0;mm<M;mm++){
# for m_prime in range(Z.shape[0]): for(n=0;n<N;n++){
# if m == m_prime: target(m,q) += dL_dpsi2(n,m,mm)*AZA(n,mm,q);
# psi2_dZ_real[n, m, :] *= 2 }
# prod = (dL_dpsi2[:, :, :, None] * np.eye(Z.shape[0])[None, :, :, None] * (self.ZAinner * self.variances).swapaxes(0, 1)[:, :, None, :]) }
# psi2_dZ = prod.swapaxes(1, 2) + prod }
psi2_dZ = dL_dpsi2[:, :, :, None] * self.variances * self.ZAinner[:, :, None, :] }
target += psi2_dZ.sum(0).sum(0) """
# import ipdb;ipdb.set_trace() support_code = """
# psi2_dZ_old = (dL_dpsi2[:, :, :, None] * (self.mu2_S[:, None, None, :] * (Z * np.square(self.variances)[None, :])[None, None, :, :])).sum(0).sum(1) #include <omp.h>
# target += (dL_dpsi2[:, :, :, None] * psi2_dZ_real[:, :, None, :]).sum(0).sum(0) * 2 # (self.variances * np.dot(self.inner, self.ZA.T)).sum(1) #include <math.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']}
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','M','Q','AZA','target','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options)
#---------------------------------------# #---------------------------------------#
# Precomputations # # Precomputations #

View file

@ -4,108 +4,108 @@
from kernpart import kernpart from kernpart import kernpart
import numpy as np import numpy as np
import hashlib import hashlib
#from scipy import integrate # This may not be necessary (Nicolas, 20th Feb)
class prod(kernpart): class prod(kernpart):
""" """
Computes the product of 2 kernels that are defined on the same space Computes the product of 2 kernels
:param k1, k2: the kernels to multiply :param k1, k2: the kernels to multiply
:type k1, k2: kernpart :type k1, k2: kernpart
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
:type tensor: Boolean
:rtype: kernel object :rtype: kernel object
""" """
def __init__(self,k1,k2): def __init__(self,k1,k2,tensor=False):
assert k1.D == k2.D, "Error: The input spaces of the kernels to multiply must have the same dimension"
self.D = k1.D
self.Nparam = k1.Nparam + k2.Nparam self.Nparam = k1.Nparam + k2.Nparam
self.name = k1.name + '<times>' + k2.name self.name = k1.name + '<times>' + k2.name
self.k1 = k1 self.k1 = k1
self.k2 = k2 self.k2 = k2
if tensor:
self.D = k1.D + k2.D
self.slice1 = slice(0,self.k1.D)
self.slice2 = slice(self.k1.D,self.k1.D+self.k2.D)
else:
assert k1.D == k2.D, "Error: The input spaces of the kernels to sum don't have the same dimension."
self.D = k1.D
self.slice1 = slice(0,self.D)
self.slice2 = slice(0,self.D)
self._X, self._X2, self._params = np.empty(shape=(3,1))
self._set_params(np.hstack((k1._get_params(),k2._get_params()))) self._set_params(np.hstack((k1._get_params(),k2._get_params())))
def _get_params(self): def _get_params(self):
"""return the value of the parameters.""" """return the value of the parameters."""
return self.params return np.hstack((self.k1._get_params(), self.k2._get_params()))
def _set_params(self,x): def _set_params(self,x):
"""set the value of the parameters.""" """set the value of the parameters."""
self.k1._set_params(x[:self.k1.Nparam]) self.k1._set_params(x[:self.k1.Nparam])
self.k2._set_params(x[self.k1.Nparam:]) self.k2._set_params(x[self.k1.Nparam:])
self.params = x
def _get_param_names(self): def _get_param_names(self):
"""return parameter names.""" """return parameter names."""
return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()] return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()]
def K(self,X,X2,target): def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2.""" self._K_computations(X,X2)
if X2 is None: target += self._K1 * self._K2
target1 = np.zeros((X.shape[0],X2.shape[0]))
target2 = np.zeros((X.shape[0],X2.shape[0]))
else:
target1 = np.zeros((X.shape[0],X.shape[0]))
target2 = np.zeros((X.shape[0],X.shape[0]))
self.k1.K(X,X2,target1)
self.k2.K(X,X2,target2)
target += target1 * target2
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
target1 = np.zeros((X.shape[0],))
target2 = np.zeros((X.shape[0],))
self.k1.Kdiag(X,target1)
self.k2.Kdiag(X,target2)
target += target1 * target2
def dK_dtheta(self,dL_dK,X,X2,target): def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X self._K_computations(X,X2)
K1 = np.zeros((X.shape[0],X2.shape[0])) if X2 is None:
K2 = np.zeros((X.shape[0],X2.shape[0])) self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.Nparam])
self.k1.K(X,X2,K1) self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.Nparam:])
self.k2.K(X,X2,K2) else:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.Nparam])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.Nparam:])
k1_target = np.zeros(self.k1.Nparam) def Kdiag(self,X,target):
k2_target = np.zeros(self.k2.Nparam) """Compute the diagonal of the covariance matrix associated to X."""
self.k1.dK_dtheta(dL_dK*K2, X, X2, k1_target) target1 = np.zeros(X.shape[0])
self.k2.dK_dtheta(dL_dK*K1, X, X2, k2_target) target2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],target1)
self.k2.Kdiag(X[:,self.slice2],target2)
target += target1 * target2
target[:self.k1.Nparam] += k1_target def dKdiag_dtheta(self,dL_dKdiag,X,target):
target[self.k1.Nparam:] += k2_target K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2)
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.Nparam])
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.Nparam:])
def dK_dX(self,dL_dK,X,X2,target): def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X self._K_computations(X,X2)
K1 = np.zeros((X.shape[0],X2.shape[0])) self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target)
K2 = np.zeros((X.shape[0],X2.shape[0])) self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target)
self.k1.K(X,X2,K1)
self.k2.K(X,X2,K2)
self.k1.dK_dX(dL_dK*K2, X, X2, target) def dKdiag_dX(self, dL_dKdiag, X, target):
self.k2.dK_dX(dL_dK*K1, X, X2, target) K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2)
def dKdiag_dX(self,dL_dKdiag,X,target): self.k1.dK_dX(dL_dKdiag*K2, X[:,self.slice1], target)
target1 = np.zeros((X.shape[0],)) self.k2.dK_dX(dL_dKdiag*K1, X[:,self.slice2], target)
target2 = np.zeros((X.shape[0],))
self.k1.Kdiag(X,target1)
self.k2.Kdiag(X,target2)
self.k1.dKdiag_dX(dL_dKdiag*target2, X, target) def _K_computations(self,X,X2):
self.k2.dKdiag_dX(dL_dKdiag*target1, X, target) if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
self._X = X.copy()
def dKdiag_dtheta(self,dL_dKdiag,X,target): self._params == self._get_params().copy()
"""Compute the diagonal of the covariance matrix associated to X.""" if X2 is None:
target1 = np.zeros((X.shape[0],)) self._X2 = None
target2 = np.zeros((X.shape[0],)) self._K1 = np.zeros((X.shape[0],X.shape[0]))
self.k1.Kdiag(X,target1) self._K2 = np.zeros((X.shape[0],X.shape[0]))
self.k2.Kdiag(X,target2) self.k1.K(X[:,self.slice1],None,self._K1)
self.k2.K(X[:,self.slice2],None,self._K2)
k1_target = np.zeros(self.k1.Nparam) else:
k2_target = np.zeros(self.k2.Nparam) self._X2 = X2.copy()
self.k1.dKdiag_dtheta(dL_dKdiag*target2, X, k1_target) self._K1 = np.zeros((X.shape[0],X2.shape[0]))
self.k2.dKdiag_dtheta(dL_dKdiag*target1, X, k2_target) self._K2 = np.zeros((X.shape[0],X2.shape[0]))
self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1)
target[:self.k1.Nparam] += k1_target self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2)
target[self.k1.Nparam:] += k2_target

View file

@ -1,6 +1,6 @@
import numpy as np import numpy as np
from scipy import stats, linalg from scipy import stats, linalg
from ..util.linalg import pdinv,mdot,jitchol from ..util.linalg import pdinv,mdot,jitchol,DSYR
from likelihood import likelihood from likelihood import likelihood
class EP(likelihood): class EP(likelihood):
@ -32,6 +32,7 @@ class EP(likelihood):
self.precision = np.ones(self.N)[:,None] self.precision = np.ones(self.N)[:,None]
self.Z = 0 self.Z = 0
self.YYT = None self.YYT = None
self.V = self.precision * self.Y
def restart(self): def restart(self):
self.tau_tilde = np.zeros(self.N) self.tau_tilde = np.zeros(self.N)
@ -41,6 +42,7 @@ class EP(likelihood):
self.precision = np.ones(self.N)[:,None] self.precision = np.ones(self.N)[:,None]
self.Z = 0 self.Z = 0
self.YYT = None self.YYT = None
self.V = self.precision * self.Y
def predictive_values(self,mu,var,full_cov): def predictive_values(self,mu,var,full_cov):
if full_cov: if full_cov:
@ -67,6 +69,7 @@ class EP(likelihood):
self.YYT = np.dot(self.Y,self.Y.T) self.YYT = np.dot(self.Y,self.Y.T)
self.covariance_matrix = np.diag(1./self.tau_tilde) self.covariance_matrix = np.diag(1./self.tau_tilde)
self.precision = self.tau_tilde[:,None] self.precision = self.tau_tilde[:,None]
self.V = self.precision * self.Y
def fit_full(self,K): def fit_full(self,K):
""" """
@ -110,11 +113,12 @@ class EP(likelihood):
#Site parameters update #Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau self.tau_tilde[i] += Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v self.v_tilde[i] += Delta_v
#Posterior distribution parameters update #Posterior distribution parameters update
si=Sigma[:,i].reshape(self.N,1) DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i])))
Sigma = Sigma - Delta_tau/(1.+ Delta_tau*Sigma[i,i])*np.dot(si,si.T) #si=Sigma[:,i:i+1]
#Sigma -= Delta_tau/(1.+ Delta_tau*Sigma[i,i])*np.dot(si,si.T)#DSYR
mu = np.dot(Sigma,self.v_tilde) mu = np.dot(Sigma,self.v_tilde)
self.iterations += 1 self.iterations += 1
#Sigma recomptutation with Cholesky decompositon #Sigma recomptutation with Cholesky decompositon
@ -196,9 +200,9 @@ class EP(likelihood):
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v self.v_tilde[i] = self.v_tilde[i] + Delta_v
#Posterior distribution parameters update #Posterior distribution parameters update
#LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
#L = jitchol(LLT) L = jitchol(LLT)
cholupdate(L,Kmn[:,i]*np.sqrt(Delta_tau)) #cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau))
V,info = linalg.lapack.flapack.dtrtrs(L,Kmn,lower=1) V,info = linalg.lapack.flapack.dtrtrs(L,Kmn,lower=1)
Sigma_diag = np.sum(V*V,-2) Sigma_diag = np.sum(V*V,-2)
si = np.sum(V.T*V[:,i],-1) si = np.sum(V.T*V[:,i],-1)
@ -251,6 +255,7 @@ class EP(likelihood):
R = R0.copy() R = R0.copy()
Diag = Diag0.copy() Diag = Diag0.copy()
Sigma_diag = Knn_diag Sigma_diag = Knn_diag
RPT0 = np.dot(R0,P0.T)
""" """
Initial values - Cavity distribution parameters: Initial values - Cavity distribution parameters:
@ -306,13 +311,7 @@ class EP(likelihood):
Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde) Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde)
Diag = Diag0 * Iplus_Dprod_i Diag = Diag0 * Iplus_Dprod_i
P = Iplus_Dprod_i[:,None] * P0 P = Iplus_Dprod_i[:,None] * P0
#Diag = Diag0/(1.+ Diag0 * self.tau_tilde)
#P = (Diag / Diag0)[:,None] * P0
RPT0 = np.dot(R0,P0.T)
L = jitchol(np.eye(M) + np.dot(RPT0,((1. - Iplus_Dprod_i)/Diag0)[:,None]*RPT0.T)) L = jitchol(np.eye(M) + np.dot(RPT0,((1. - Iplus_Dprod_i)/Diag0)[:,None]*RPT0.T))
#L = jitchol(np.eye(M) + np.dot(RPT0,(1./Diag0 - Iplus_Dprod_i/Diag0)[:,None]*RPT0.T))
#L = jitchol(np.eye(M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T))
R,info = linalg.lapack.flapack.dtrtrs(L,R0,lower=1) R,info = linalg.lapack.flapack.dtrtrs(L,R0,lower=1)
RPT = np.dot(R,P.T) RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)

View file

@ -11,33 +11,34 @@ class Gaussian(likelihood):
:param normalize: whether to normalize the data before computing (predictions will be in original scales) :param normalize: whether to normalize the data before computing (predictions will be in original scales)
:type normalize: False|True :type normalize: False|True
""" """
def __init__(self,data,variance=1.,normalize=False): def __init__(self, data, variance=1., normalize=False):
self.is_heteroscedastic = False self.is_heteroscedastic = False
self.Nparams = 1 self.Nparams = 1
self.Z = 0. # a correction factor which accounts for the approximation made self.Z = 0. # a correction factor which accounts for the approximation made
N, self.D = data.shape N, self.D = data.shape
#normalization # normalization
if normalize: if normalize:
self._bias = data.mean(0)[None,:] self._bias = data.mean(0)[None, :]
self._scale = data.std(0)[None,:] self._scale = data.std(0)[None, :]
# Don't scale outputs which have zero variance to zero. # Don't scale outputs which have zero variance to zero.
self._scale[np.nonzero(self._scale==0.)] = 1.0e-3 self._scale[np.nonzero(self._scale == 0.)] = 1.0e-3
else: else:
self._bias = np.zeros((1,self.D)) self._bias = np.zeros((1, self.D))
self._scale = np.ones((1,self.D)) self._scale = np.ones((1, self.D))
self.set_data(data) self.set_data(data)
self._variance = np.asarray(variance) + 1.
self._set_params(np.asarray(variance)) self._set_params(np.asarray(variance))
def set_data(self,data): def set_data(self, data):
self.data = data self.data = data
self.N,D = data.shape self.N, D = data.shape
assert D == self.D assert D == self.D
self.Y = (self.data - self._bias)/self._scale self.Y = (self.data - self._bias) / self._scale
if D > self.N: if D > self.N:
self.YYT = np.dot(self.Y,self.Y.T) self.YYT = np.dot(self.Y, self.Y.T)
self.trYYT = np.trace(self.YYT) self.trYYT = np.trace(self.YYT)
else: else:
self.YYT = None self.YYT = None
@ -49,27 +50,30 @@ class Gaussian(likelihood):
def _get_param_names(self): def _get_param_names(self):
return ["noise_variance"] return ["noise_variance"]
def _set_params(self,x): def _set_params(self, x):
self._variance = float(x) x = float(x)
self.covariance_matrix = np.eye(self.N)*self._variance if self._variance != x:
self.precision = 1./self._variance self._variance = x
self.covariance_matrix = np.eye(self.N) * self._variance
self.precision = 1. / self._variance
self.V = (self.precision) * self.Y
def predictive_values(self,mu,var, full_cov): def predictive_values(self, mu, var, full_cov):
""" """
Un-normalize the prediction and add the likelihood variance, then return the 5%, 95% interval Un-normalize the prediction and add the likelihood variance, then return the 5%, 95% interval
""" """
mean = mu*self._scale + self._bias mean = mu * self._scale + self._bias
if full_cov: if full_cov:
if self.D >1: if self.D > 1:
raise NotImplementedError, "TODO" raise NotImplementedError, "TODO"
#Note. for D>1, we need to re-normalise all the outputs independently. # Note. for D>1, we need to re-normalise all the outputs independently.
# This will mess up computations of diag(true_var), below. # This will mess up computations of diag(true_var), below.
#note that the upper, lower quantiles should be the same shape as mean # note that the upper, lower quantiles should be the same shape as mean
true_var = (var + np.eye(var.shape[0])*self._variance)*self._scale**2 true_var = (var + np.eye(var.shape[0]) * self._variance) * self._scale ** 2
_5pc = mean - 2.*np.sqrt(np.diag(true_var)) _5pc = mean - 2.*np.sqrt(np.diag(true_var))
_95pc = mean + 2.*np.sqrt(np.diag(true_var)) _95pc = mean + 2.*np.sqrt(np.diag(true_var))
else: else:
true_var = (var + self._variance)*self._scale**2 true_var = (var + self._variance) * self._scale ** 2
_5pc = mean - 2.*np.sqrt(true_var) _5pc = mean - 2.*np.sqrt(true_var)
_95pc = mean + 2.*np.sqrt(true_var) _95pc = mean + 2.*np.sqrt(true_var)
return mean, true_var, _5pc, _95pc return mean, true_var, _5pc, _95pc
@ -80,5 +84,5 @@ class Gaussian(likelihood):
""" """
pass pass
def _gradients(self,partial): def _gradients(self, partial):
return np.sum(partial) return np.sum(partial)

View file

@ -16,6 +16,7 @@ class likelihood:
self.is_heteroscedastic : enables significant computational savings in GP self.is_heteroscedastic : enables significant computational savings in GP
self.precision : a scalar or vector representation of the effective target precision self.precision : a scalar or vector representation of the effective target precision
self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N
self.V : self.precision * self.Y
""" """
def __init__(self,data): def __init__(self,data):
raise ValueError, "this class is not to be instantiated" raise ValueError, "this class is not to be instantiated"

View file

@ -7,8 +7,7 @@ import scipy as sp
import pylab as pb import pylab as pb
from ..util.plot import gpplot from ..util.plot import gpplot
from scipy.special import gammaln, gamma from scipy.special import gammaln, gamma
#from GPy.likelihoods.likelihood_functions import likelihood_function from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
class likelihood_function: class likelihood_function:
""" """
@ -49,11 +48,11 @@ class probit(likelihood_function):
:param tau_i: precision of the cavity distribution (float) :param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float) :param v_i: mean/variance of the cavity distribution (float)
""" """
if data_i == 0: data_i = -1 #NOTE Binary classification algorithm works better with classes {-1,1}, 1D-plotting works better with classes {0,1}. #if data_i == 0: data_i = -1 #NOTE Binary classification algorithm works better with classes {-1,1}, 1D-plotting works better with classes {0,1}.
# TODO: some version of assert # TODO: some version of assert
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i) z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = stats.norm.cdf(z) Z_hat = std_norm_cdf(z)
phi = stats.norm.pdf(z) phi = std_norm_pdf(z)
mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i)) mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat) sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
return Z_hat, mu_hat, sigma2_hat return Z_hat, mu_hat, sigma2_hat

View file

@ -54,6 +54,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
self._savedgradients = [] self._savedgradients = []
self._savederrors = [] self._savederrors = []
self._savedpsiKmm = [] self._savedpsiKmm = []
self._savedABCD = []
sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs) sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs)
@ -135,6 +136,19 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
self._savedparams.append([self.f_call, self._get_params()]) self._savedparams.append([self.f_call, self._get_params()])
self._savedgradients.append([self.f_call, self._log_likelihood_gradients()]) self._savedgradients.append([self.f_call, self._log_likelihood_gradients()])
self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]]) self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]])
# sf2 = self.scale_factor ** 2
if self.likelihood.is_heteroscedastic:
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y)
# B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
else:
A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
# B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
self._savedABCD.append([self.f_call, A, B, C, D])
# print "\nkl:", kl, "ll:", ll # print "\nkl:", kl, "ll:", ll
return ll - kl return ll - kl
@ -169,7 +183,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w') ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
return ax return ax
def plot_X_1d(self, fig=None, axes=None, fig_num="MRD X 1d", colors=None): def plot_X_1d(self, fig=None, axes=None, fig_num="LVM mu S 1d", colors=None):
""" """
Plot latent space X in 1D: Plot latent space X in 1D:
@ -196,7 +210,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
else: else:
ax = axes[i] ax = axes[i]
ax.plot(self.X, c='k', alpha=.3) ax.plot(self.X, c='k', alpha=.3)
plots.extend(ax.plot(self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{}}}$".format(i))) plots.extend(ax.plot(self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
ax.fill_between(np.arange(self.X.shape[0]), ax.fill_between(np.arange(self.X.shape[0]),
self.X.T[i] - 2 * np.sqrt(self.X_variance.T[i]), self.X.T[i] - 2 * np.sqrt(self.X_variance.T[i]),
self.X.T[i] + 2 * np.sqrt(self.X_variance.T[i]), self.X.T[i] + 2 * np.sqrt(self.X_variance.T[i]),
@ -251,6 +265,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
gradient_dict = dict(self._savedgradients) gradient_dict = dict(self._savedgradients)
kmm_dict = dict(self._savedpsiKmm) kmm_dict = dict(self._savedpsiKmm)
iters = np.array(param_dict.keys()) iters = np.array(param_dict.keys())
ABCD_dict = np.array(self._savedABCD)
self.showing = 0 self.showing = 0
# ax2 = pylab.subplot2grid(splotshape, (1, 0), 2, 4) # ax2 = pylab.subplot2grid(splotshape, (1, 0), 2, 4)
@ -281,14 +296,26 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
ha='center', va='center') ha='center', va='center')
figs[-1].canvas.draw() figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(.15, 0, 1, .86)) figs[-1].tight_layout(rect=(.15, 0, 1, .86))
# figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6)))
# fig = figs[-1]
# ax6 = fig.add_subplot(121)
# ax6.text(.5, .5, r"${\mathbf{K}_{mm}}$", color='magenta', alpha=.5, transform=ax6.transAxes,
# ha='center', va='center')
# ax7 = fig.add_subplot(122)
# ax7.text(.5, .5, r"${\frac{dL}{dK_{mm}}}$", color='magenta', alpha=.5, transform=ax7.transAxes,
# ha='center', va='center')
figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6))) figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6)))
fig = figs[-1] fig = figs[-1]
ax6 = fig.add_subplot(121) ax8 = fig.add_subplot(121)
ax6.text(.5, .5, r"${\mathbf{K}_{mm}}$", color='magenta', alpha=.5, transform=ax6.transAxes, ax8.text(.5, .5, r"${\mathbf{A,B,C,D}}$", color='k', alpha=.5, transform=ax8.transAxes,
ha='center', va='center')
ax7 = fig.add_subplot(122)
ax7.text(.5, .5, r"${\frac{dL}{dK_{mm}}}$", color='magenta', alpha=.5, transform=ax7.transAxes,
ha='center', va='center') ha='center', va='center')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 1], label='A')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 2], label='B')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 3], label='C')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 4], label='D')
ax8.legend()
figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(.15, 0, 1, .86))
X, S, Z, theta = self._debug_filter_params(param_dict[self.showing]) X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing]) Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
@ -330,16 +357,16 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
ax5.set_xticks(np.arange(len(theta))) ax5.set_xticks(np.arange(len(theta)))
ax5.set_xticklabels(self._get_param_names()[-len(theta):], rotation=17) ax5.set_xticklabels(self._get_param_names()[-len(theta):], rotation=17)
imkmm = ax6.imshow(kmm_dict[self.showing][0]) # imkmm = ax6.imshow(kmm_dict[self.showing][0])
from mpl_toolkits.axes_grid1 import make_axes_locatable # from mpl_toolkits.axes_grid1 import make_axes_locatable
divider = make_axes_locatable(ax6) # divider = make_axes_locatable(ax6)
caxkmm = divider.append_axes("right", "5%", pad="1%") # caxkmm = divider.append_axes("right", "5%", pad="1%")
cbarkmm = pylab.colorbar(imkmm, cax=caxkmm) # cbarkmm = pylab.colorbar(imkmm, cax=caxkmm)
#
imkmmdl = ax7.imshow(kmm_dict[self.showing][1]) # imkmmdl = ax7.imshow(kmm_dict[self.showing][1])
divider = make_axes_locatable(ax7) # divider = make_axes_locatable(ax7)
caxkmmdl = divider.append_axes("right", "5%", pad="1%") # caxkmmdl = divider.append_axes("right", "5%", pad="1%")
cbarkmmdl = pylab.colorbar(imkmmdl, cax=caxkmmdl) # cbarkmmdl = pylab.colorbar(imkmmdl, cax=caxkmmdl)
# Qleg = ax1.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)], # Qleg = ax1.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
# loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.15, 1, 1.15), # loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.15, 1, 1.15),
@ -420,13 +447,13 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
thetagrads.set_offsets(np.array([xtheta.ravel(), theta.ravel()]).T) thetagrads.set_offsets(np.array([xtheta.ravel(), theta.ravel()]).T)
thetagrads.set_UVC(Utheta, thetag) thetagrads.set_UVC(Utheta, thetag)
imkmm.set_data(kmm_dict[self.showing][0]) # imkmm.set_data(kmm_dict[self.showing][0])
imkmm.autoscale() # imkmm.autoscale()
cbarkmm.update_normal(imkmm) # cbarkmm.update_normal(imkmm)
#
imkmmdl.set_data(kmm_dict[self.showing][1]) # imkmmdl.set_data(kmm_dict[self.showing][1])
imkmmdl.autoscale() # imkmmdl.autoscale()
cbarkmmdl.update_normal(imkmmdl) # cbarkmmdl.update_normal(imkmmdl)
ax2.relim() ax2.relim()
# ax3.relim() # ax3.relim()
@ -452,4 +479,4 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
cidp = figs[0].canvas.mpl_connect('button_press_event', onclick) cidp = figs[0].canvas.mpl_connect('button_press_event', onclick)
cidd = figs[0].canvas.mpl_connect('motion_notify_event', motion) cidd = figs[0].canvas.mpl_connect('motion_notify_event', motion)
return ax1, ax2, ax3, ax4, ax5, ax6, ax7 return ax1, ax2, ax3, ax4, ax5 # , ax6, ax7

314
GPy/models/FITC.py Normal file
View file

@ -0,0 +1,314 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, tdot, symmetrify,pdinv
#from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
from ..util.plot import gpplot
from .. import kern
from scipy import stats, linalg
from sparse_GP import sparse_GP
def backsub_both_sides(L,X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1)
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T
class FITC(sparse_GP):
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
this function does nothing
Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in sparse_GP.
The true precison is now 'true_precision' not 'precision'.
"""
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP
#@profile
def _computations(self):
#factor Kmm
self.Lm = jitchol(self.Kmm)
self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1)
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1).copy()
self.Diag0 = self.psi0 - np.diag(self.Qnn)
self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #Includes Diag0 in the precision
self.V_star = self.beta_star * self.likelihood.Y
# The rather complex computations of self.A
if self.has_uncertain_inputs:
raise NotImplementedError
else:
if self.likelihood.is_heteroscedastic:
assert self.likelihood.D == 1
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.N)))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
# factor B
self.B = np.eye(self.M) + self.A
self.LB = jitchol(self.B)
self.LBi,info = linalg.lapack.flapack.dtrtrs(self.LB,np.eye(self.M),lower=1)
self.psi1V = np.dot(self.psi1, self.V_star)
# back substutue C into psi1V
tmp, info1 = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(tmp), lower=1, trans=0)
Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1)
b_psi1_Ki = self.beta_star * Kmmipsi1.T
Ki_pbp_Ki = np.dot(Kmmipsi1,b_psi1_Ki)
Kmmi = np.dot(self.Lmi.T,self.Lmi)
LBiLmi = np.dot(self.LBi,self.Lmi)
LBL_inv = np.dot(LBiLmi.T,LBiLmi)
VVT = np.outer(self.V_star,self.V_star)
VV_p_Ki = np.dot(VVT,Kmmipsi1.T)
Ki_pVVp_Ki = np.dot(Kmmipsi1,VV_p_Ki)
psi1beta = self.psi1*self.beta_star.T
H = self.Kmm + mdot(self.psi1,self.beta_star*self.psi1.T)
Hi, LH, LHi, logdetH = pdinv(H)
betapsi1TLmiLBi = np.dot(psi1beta.T,LBiLmi.T)
alpha = np.array([np.dot(a.T,a) for a in betapsi1TLmiLBi])[:,None]
gamma_1 = mdot(VVT,self.psi1.T,Hi)
pHip = mdot(self.psi1.T,Hi,self.psi1)
gamma_2 = mdot(self.beta_star*pHip,self.V_star)
gamma_3 = self.V_star * mdot(self.V_star.T,pHip*self.beta_star).T
dL_dpsi0 = -0.5 * self.beta_star#dA_dpsi0
dL_dpsi0 += .5 *alpha #dC_dpsi0
dL_dpsi0 += 0.5*mdot(self.beta_star*pHip,self.V_star)**2 - self.V_star * mdot(self.V_star.T,pHip*self.beta_star).T #dD_dpsi0
dA_dpsi1 = b_psi1_Ki
dC_dpsi1 = -np.dot(psi1beta.T,LBL_inv)
dD_dpsi1 = gamma_1
dD_dpsi1 += -mdot(psi1beta.T,Hi,self.psi1,gamma_1)
#dL_dpsi1 = b_psi1_Ki #dA_dpsi1
#dL_dpsi1 += -np.dot(psi1beta.T,LBL_inv) #dC_dpsi1
#dL_dpsi1 += gamma_1 - mdot(psi1beta.T,Hi,self.psi1,gamma_1) #dD_dpsi1
dL_dKmm = -0.5 * np.dot(Kmmipsi1,b_psi1_Ki) #dA_dKmm
dL_dKmm += -.5*Kmmi + .5*LBL_inv + mdot(LBL_inv,psi1beta,Kmmipsi1.T) #dC_dKmm
dL_dKmm += -.5 * mdot(Hi,self.psi1,gamma_1) #dD_dKmm
dA_dpsi0 = .5 * self.V_star**2
dA_dpsi0_theta = self.kern.dKdiag_dtheta(dA_dpsi0,X=self.X)
dA_dpsi1_theta = 0
dA_dpsi1_X = 0
dA_dKmm_theta = 0
dA_dKmm_X = 0
_dC_dpsi1_dtheta = 0
_dC_dpsi1_dX = 0
_dC_dKmm_dtheta = 0
_dC_dKmm_dX = 0
_dD_dpsi1_dtheta_1 = 0
_dD_dpsi1_dX_1 = 0
_dD_dKmm_dtheta_1 = 0
_dD_dKmm_dX_1 = 0
_dD_dpsi1_dtheta_2 = 0
_dD_dpsi1_dX_2 = 0
_dD_dKmm_dtheta_2 = 0
_dD_dKmm_dX_2 = 0
for psi1_n,V_n,X_n,alpha_n,gamma_n,gamma_k in zip(self.psi1.T,self.V_star,self.X,alpha,gamma_2,gamma_3):
psin_K = np.dot(psi1_n[None,:],Kmmi)
_dA_dpsi1 = -V_n**2 * np.dot(psi1_n[None,:],Kmmi)
_dC_dpsi1 = - alpha_n * np.dot(psi1_n[None,:],Kmmi)
_dD_dpsi1_1 = - gamma_n**2 * np.dot(psi1_n[None,:],Kmmi)
_dD_dpsi1_2 = 2. * gamma_k * np.dot(psi1_n[None,:],Kmmi)
_dA_dKmm = .5*V_n**2 * np.dot(psin_K.T,psin_K)
_dC_dKmm = .5 * alpha_n * np.dot(psin_K.T,psin_K)
_dD_dKmm_1 = .5*gamma_n**2 * np.dot(psin_K.T,psin_K)
_dD_dKmm_2 = - gamma_n * np.dot(psin_K.T,psin_K)
_dKmm = .5*V_n**2 * np.dot(psin_K.T,psin_K) #Diag_dA_dKmm
_dKmm += .5 * alpha_n * np.dot(psin_K.T,psin_K) #Diag_dC_dKmm
_dKmm += .5*gamma_n**2 * np.dot(psin_K.T,psin_K) - gamma_n * np.dot(psin_K.T,psin_K) #Diag_dD_dKmm
_dKmm_dtheta = self.kern.dK_dtheta(_dKmm,self.Z)
dA_dpsi1_theta += self.kern.dK_dtheta(_dA_dpsi1,X_n[None,:],self.Z)
_dC_dpsi1_dtheta += self.kern.dK_dtheta(_dC_dpsi1,X_n[None,:],self.Z)
_dD_dpsi1_dtheta_1 += self.kern.dK_dtheta(_dD_dpsi1_1,X_n[None,:],self.Z)
_dD_dpsi1_dtheta_2 += self.kern.dK_dtheta(_dD_dpsi1_2,X_n[None,:],self.Z)
dA_dKmm_theta += self.kern.dK_dtheta(_dA_dKmm,self.Z)
_dC_dKmm_dtheta += self.kern.dK_dtheta(_dC_dKmm,self.Z)
_dD_dKmm_dtheta_1 += self.kern.dK_dtheta(_dD_dKmm_1,self.Z)
_dD_dKmm_dtheta_2 += self.kern.dK_dtheta(_dD_dKmm_2,self.Z)
dA_dpsi1_X += self.kern.dK_dX(_dA_dpsi1.T,self.Z,X_n[None,:])
_dC_dpsi1_dX += self.kern.dK_dX(_dC_dpsi1.T,self.Z,X_n[None,:])
_dD_dpsi1_dX_1 += self.kern.dK_dX(_dD_dpsi1_1.T,self.Z,X_n[None,:])
_dD_dpsi1_dX_2 += self.kern.dK_dX(_dD_dpsi1_2.T,self.Z,X_n[None,:])
dA_dKmm_X += 2.*self.kern.dK_dX(_dA_dKmm,self.Z)
_dC_dKmm_dX += 2.*self.kern.dK_dX(_dC_dKmm,self.Z)
_dD_dKmm_dX_1 += 2.*self.kern.dK_dX(_dD_dKmm_1,self.Z)
_dD_dKmm_dX_2 += 2.*self.kern.dK_dX(_dD_dKmm_2,self.Z)
self._dL_dtheta = self.kern.dKdiag_dtheta(dL_dpsi0,self.X)
self._dL_dtheta += self.kern.dK_dtheta(dA_dpsi1 + dC_dpsi1 + dD_dpsi1,self.X,self.Z)
self._dL_dtheta += self.kern.dK_dtheta(dL_dKmm,X=self.Z)
self._dL_dtheta += dA_dpsi0_theta
self._dL_dtheta += dA_dKmm_theta + _dC_dKmm_dtheta + _dD_dKmm_dtheta_1 + _dD_dKmm_dtheta_2
self._dL_dtheta += dA_dpsi1_theta + _dC_dpsi1_dtheta + _dD_dpsi1_dtheta_2 + _dD_dpsi1_dtheta_1
self._dL_dX = self.kern.dK_dX(dA_dpsi1.T,self.Z,self.X) + self.kern.dK_dX(dC_dpsi1.T,self.Z,self.X) + self.kern.dK_dX(dD_dpsi1.T,self.Z,self.X)
self._dL_dX += 2. * self.kern.dK_dX(dL_dKmm,X=self.Z)
self._dL_dX += dA_dpsi1_X + dA_dKmm_X + _dC_dpsi1_dX + _dC_dKmm_dX + _dD_dpsi1_dX_2 + _dD_dKmm_dX_2 + _dD_dpsi1_dX_1 + _dD_dKmm_dX_1
# the partial derivative vector for the likelihood
if self.likelihood.Nparams == 0:
# save computation here.
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented"
else:
# likelihood is not heterscedatic
dbstar_dnoise = self.likelihood.precision * (self.beta_star**2 * self.Diag0[:,None] - self.beta_star)
Lmi_psi1 = mdot(self.Lmi,self.psi1)
LBiLmipsi1 = np.dot(self.LBi,Lmi_psi1)
aux_0 = np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_2 = np.dot(LBiLmipsi1.T,self._LBi_Lmi_psi1V)
dA_dnoise = 0.5 * self.D * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.D * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T)
alpha = mdot(LBiLmipsi1,self.V_star)
alpha_ = mdot(LBiLmipsi1.T,alpha)
dD_dnoise_2 = -0.5 * self.D * np.sum(alpha_**2 * dbstar_dnoise )
dD_dnoise_1 = mdot(self.V_star.T,self.psi1.T,self.Lmi.T,self.LBi.T,self.LBi,self.Lmi,self.psi1,dbstar_dnoise*self.likelihood.Y)
dD_dnoise_2 = 0.5*mdot(self.V_star.T,self.psi1.T,Hi,self.psi1,dbstar_dnoise*self.psi1.T,Hi,self.psi1,self.V_star)
dD_dnoise = dD_dnoise_1 + dD_dnoise_2
self.partial_for_likelihood = dA_dnoise + dC_dnoise + dD_dnoise
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
C = -self.D * (np.sum(np.log(np.diag(self.LB))))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + C + D
def _log_likelihood_gradients(self):
pass
return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
def dL_dtheta(self):
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
#dL_dtheta = self.dA_dtheta + self.dlogB_dtheta + self.dD_dtheta
dL_dtheta = self._dL_dtheta #FIXME
return dL_dtheta
def dL_dZ(self):
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
#dL_dZ = self.dA_dX + self.dlogB_dX + self.dD_dX
dL_dZ = self._dL_dX #FIXME
return dL_dZ
def _raw_predict(self, Xnew, which_parts, full_cov=False):
if self.likelihood.is_heteroscedastic:
Iplus_Dprod_i = 1./(1.+ self.Diag0 * self.likelihood.precision.flatten())
self.Diag = self.Diag0 * Iplus_Dprod_i
self.P = Iplus_Dprod_i[:,None] * self.psi1.T
self.RPT0 = np.dot(self.Lmi,self.psi1)
self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T))
self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
self.w = self.Diag * self.likelihood.v_tilde
self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde))
self.mu = self.w + np.dot(self.P,self.gamma)
"""
Make a prediction for the generalized FITC model
Arguments
---------
X : Input prediction data - Nx1 numpy array (floats)
"""
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
# Ci = I + (RPT0)Di(RPT0).T
# C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T
# = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T
# = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
# = I - V.T * V
U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1)
C = np.eye(self.M) - np.dot(V.T,V)
mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:])
#self.C = C
#self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
#self.mu_u = mu_u
#self.U = U
# q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T)
mu_H = np.dot(mu_u,self.mu)
self.mu_H = mu_H
Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
KR0T = np.dot(Kx.T,self.Lmi.T)
mu_star = np.dot(KR0T,mu_H)
if full_cov:
Kxx = self.kern.K(Xnew,which_parts=which_parts)
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
else:
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
Kxx_ = self.kern.K(Xnew,which_parts=which_parts) # TODO: RA, is this line needed?
var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) # TODO: RA, is this line needed?
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None]
return mu_star[:,None],var
else:
raise NotImplementedError, "homoscedastic fitc not implemented"
"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu,var[:,None]
"""

View file

@ -3,10 +3,11 @@
import numpy as np import numpy as np
from scipy import linalg
import pylab as pb import pylab as pb
from .. import kern from .. import kern
from ..core import model from ..core import model
from ..util.linalg import pdinv, mdot from ..util.linalg import pdinv, mdot, tdot
from ..util.plot import gpplot, x_frame1D, x_frame2D, Tango from ..util.plot import gpplot, x_frame1D, x_frame2D, Tango
from ..likelihoods import EP, Laplace from ..likelihoods import EP, Laplace
@ -59,6 +60,7 @@ class GP(model):
""" """
TODO: one day we might like to learn Z by gradient methods? TODO: one day we might like to learn Z by gradient methods?
""" """
#FIXME: this doesn;t live here.
return np.zeros_like(self.Z) return np.zeros_like(self.Z)
def _set_params(self, p): def _set_params(self, p):
@ -78,10 +80,14 @@ class GP(model):
# the gradient of the likelihood wrt the covariance matrix # the gradient of the likelihood wrt the covariance matrix
if self.likelihood.YYT is None: if self.likelihood.YYT is None:
alpha = np.dot(self.Ki, self.likelihood.Y) #alpha = np.dot(self.Ki, self.likelihood.Y)
self.dL_dK = 0.5 * (np.dot(alpha, alpha.T) - self.D * self.Ki) alpha,_ = linalg.lapack.flapack.dpotrs(self.L, self.likelihood.Y,lower=1)
self.dL_dK = 0.5 * (tdot(alpha) - self.D * self.Ki)
else: else:
tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki) #tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1)
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
self.dL_dK = 0.5 * (tmp - self.D * self.Ki) self.dL_dK = 0.5 * (tmp - self.D * self.Ki)
def _get_params(self): def _get_params(self):
@ -113,7 +119,9 @@ class GP(model):
Computes the model fit using YYT if it's available Computes the model fit using YYT if it's available
""" """
if self.likelihood.YYT is None: if self.likelihood.YYT is None:
return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y))) tmp, _ = linalg.lapack.flapack.dtrtrs(self.L, np.asfortranarray(self.likelihood.Y), lower=1)
return -0.5 * np.sum(np.square(tmp))
#return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y)))
else: else:
return -0.5 * np.sum(np.multiply(self.Ki, self.likelihood.YYT)) return -0.5 * np.sum(np.multiply(self.Ki, self.likelihood.YYT))
@ -157,18 +165,15 @@ class GP(model):
return np.hstack((dL_dthetaK, dL_dthetaL)) return np.hstack((dL_dthetaK, dL_dthetaL))
#return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) #return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False): def _raw_predict(self, _Xnew, which_parts='all', full_cov=False,stop=False):
""" """
Internal helper function for making predictions, does not account Internal helper function for making predictions, does not account
for normalization or likelihood for normalization or likelihood
#TODO: which_parts does nothing
""" """
Kx = self.kern.K(self.X, _Xnew,which_parts=which_parts) Kx = self.kern.K(_Xnew,self.X,which_parts=which_parts).T
mu = np.dot(np.dot(Kx.T, self.Ki), self.likelihood.Y) #KiKx = np.dot(self.Ki, Kx)
KiKx = np.dot(self.Ki, Kx) KiKx, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(Kx), lower=1)
mu = np.dot(KiKx.T, self.likelihood.Y)
if full_cov: if full_cov:
Kxx = self.kern.K(_Xnew, which_parts=which_parts) Kxx = self.kern.K(_Xnew, which_parts=which_parts)
var = Kxx - np.dot(KiKx.T, Kx) var = Kxx - np.dot(KiKx.T, Kx)
@ -176,6 +181,8 @@ class GP(model):
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts) Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
var = Kxx - np.sum(np.multiply(KiKx, Kx), 0) var = Kxx - np.sum(np.multiply(KiKx, Kx), 0)
var = var[:, None] var = var[:, None]
if stop:
debug_this
return mu, var return mu, var
@ -212,7 +219,8 @@ class GP(model):
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False): def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False):
""" """
Plot the GP's view of the world, where the data is normalized and the likelihood is Gaussian Plot the GP's view of the world, where the data is normalized and the
likelihood is Gaussian.
:param samples: the number of a posteriori samples to plot :param samples: the number of a posteriori samples to plot
:param which_data: which if the training data to plot (default all) :param which_data: which if the training data to plot (default all)
@ -227,8 +235,8 @@ class GP(model):
- In two dimsensions, a contour-plot shows the mean predicted function - In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, we've no implemented this yet !TODO! - In higher dimensions, we've no implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions using which_data and which_functions Can plot only part of the data and part of the posterior functions
Plot the data's view of the world, with non-normalized values and GP predictions passed through the likelihood using which_data and which_functions
""" """
if which_data == 'all': if which_data == 'all':
which_data = slice(None) which_data = slice(None)

View file

@ -45,7 +45,7 @@ class GPLVM(GP):
return np.hstack((self.X.flatten(), GP._get_params(self))) return np.hstack((self.X.flatten(), GP._get_params(self)))
def _set_params(self,x): def _set_params(self,x):
self.X = x[:self.X.size].reshape(self.N,self.Q).copy() self.X = x[:self.N*self.Q].reshape(self.N,self.Q).copy()
GP._set_params(self, x[self.X.size:]) GP._set_params(self, x[self.X.size:])
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):

View file

@ -12,3 +12,4 @@ from sparse_GPLVM import sparse_GPLVM
from Bayesian_GPLVM import Bayesian_GPLVM from Bayesian_GPLVM import Bayesian_GPLVM
from mrd import MRD from mrd import MRD
from generalized_FITC import generalized_FITC from generalized_FITC import generalized_FITC
from FITC import FITC

View file

@ -9,6 +9,12 @@ from .. import kern
from scipy import stats, linalg from scipy import stats, linalg
from sparse_GP import sparse_GP from sparse_GP import sparse_GP
def backsub_both_sides(L,X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1)
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T
class generalized_FITC(sparse_GP): class generalized_FITC(sparse_GP):
""" """
Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC. Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC.
@ -33,7 +39,7 @@ class generalized_FITC(sparse_GP):
self.Z = Z self.Z = Z
self.M = self.Z.shape[0] self.M = self.Z.shape[0]
self._precision = likelihood.precision self.true_precision = likelihood.precision
sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False) sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False)
@ -51,13 +57,16 @@ class generalized_FITC(sparse_GP):
For a Gaussian (or direct: TODO) likelihood, no iteration is required: For a Gaussian (or direct: TODO) likelihood, no iteration is required:
this function does nothing this function does nothing
Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in sparse_GP.
The true precison is now 'true_precision' not 'precision'.
""" """
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs" raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else: else:
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._precision = self.likelihood.precision # Save the true precision self.true_precision = self.likelihood.precision # Save the true precision
self.likelihood.precision = self._precision/(1. + self._precision*self.Diag0[:,None]) # Add the diagonal element of the FITC approximation self.likelihood.precision = self.true_precision/(1. + self.true_precision*self.Diag0[:,None]) # Add the diagonal element of the FITC approximation
self._set_params(self._get_params()) # update the GP self._set_params(self._get_params()) # update the GP
def _FITC_computations(self): def _FITC_computations(self):
@ -69,23 +78,23 @@ class generalized_FITC(sparse_GP):
- removes the extra terms computed in the sparse_GP approximation - removes the extra terms computed in the sparse_GP approximation
- computes the likelihood gradients wrt the true precision. - computes the likelihood gradients wrt the true precision.
""" """
#NOTE the true precison is now '_precison' not 'precision' #NOTE the true precison is now 'true_precision' not 'precision'
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
# Compute generalized FITC's diagonal term of the covariance # Compute generalized FITC's diagonal term of the covariance
self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1) self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1)
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1)
#self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm)
#self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1)
#a = kj
self.Diag0 = self.psi0 - np.diag(self.Qnn) self.Diag0 = self.psi0 - np.diag(self.Qnn)
Iplus_Dprod_i = 1./(1.+ self.Diag0 * self._precision.flatten()) Iplus_Dprod_i = 1./(1.+ self.Diag0 * self.true_precision.flatten())
self.Diag = self.Diag0 * Iplus_Dprod_i self.Diag = self.Diag0 * Iplus_Dprod_i
#self.Diag = self.Diag0/(1.+ self.Diag0 * self._precision.flatten())
self.P = Iplus_Dprod_i[:,None] * self.psi1.T self.P = Iplus_Dprod_i[:,None] * self.psi1.T
#self.P = (self.Diag / self.Diag0)[:,None] * self.psi1.T
self.RPT0 = np.dot(self.Lmi,self.psi1) self.RPT0 = np.dot(self.Lmi,self.psi1)
self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T)) self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T))
#self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - Iplus_Dprod_i/self.Diag0)[:,None]*self.RPT0.T))
#self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T))
self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1) self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1)
self.RPT = np.dot(self.R,self.P.T) self.RPT = np.dot(self.R,self.P.T)
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT) self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
@ -94,7 +103,16 @@ class generalized_FITC(sparse_GP):
self.mu = self.w + np.dot(self.P,self.gamma) self.mu = self.w + np.dot(self.P,self.gamma)
# Remove extra term from dL_dpsi1 # Remove extra term from dL_dpsi1
self.dL_dpsi1 -= mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB self.dL_dpsi1 -= mdot(self.Lmi.T,Lmipsi1*self.likelihood.precision.flatten().reshape(1,self.N))
#self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm)
#self.dL_dpsi1 -= mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
#########333333
#self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
#########333333
else: else:
raise NotImplementedError, "homoscedastic fitc not implemented" raise NotImplementedError, "homoscedastic fitc not implemented"
# Remove extra term from dL_dpsi1 # Remove extra term from dL_dpsi1
@ -140,8 +158,11 @@ class generalized_FITC(sparse_GP):
A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y) A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
else: else:
A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2)) C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.M*np.log(sf2))
D = 0.5*np.trace(self.Cpsi1VVpsi1) #C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
D = 0.5*np.sum(np.square(self._LBi_Lmi_psi1V))
#self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
#D_ = 0.5*np.trace(self.Cpsi1VVpsi1)
return A+C+D return A+C+D
def _raw_predict(self, Xnew, which_parts, full_cov=False): def _raw_predict(self, Xnew, which_parts, full_cov=False):

View file

@ -273,7 +273,7 @@ class MRD(model):
def _handle_plotting(self, fig_num, axes, plotf): def _handle_plotting(self, fig_num, axes, plotf):
if axes is None: if axes is None:
fig = pylab.figure(num=fig_num, figsize=(4 * len(self.bgplvms), 3 * len(self.bgplvms))) fig = pylab.figure(num=fig_num, figsize=(4 * len(self.bgplvms), 3))
for i, g in enumerate(self.bgplvms): for i, g in enumerate(self.bgplvms):
if axes is None: if axes is None:
ax = fig.add_subplot(1, len(self.bgplvms), i + 1) ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
@ -287,6 +287,9 @@ class MRD(model):
else: else:
return pylab.gcf() return pylab.gcf()
def plot_X_1d(self):
return self.gref.plot_X_1d()
def plot_X(self, fig_num="MRD Predictions", axes=None): def plot_X(self, fig_num="MRD Predictions", axes=None):
fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.X)) fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.X))
return fig return fig

View file

@ -3,17 +3,12 @@
import numpy as np import numpy as np
import pylab as pb import pylab as pb
from ..util.linalg import mdot, jitchol, tdot, symmetrify from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides
from ..util.plot import gpplot from ..util.plot import gpplot
from .. import kern from .. import kern
from GP import GP from GP import GP
from scipy import linalg from scipy import linalg
def backsub_both_sides(L,X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1)
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T
class sparse_GP(GP): class sparse_GP(GP):
""" """
Variational sparse GP model Variational sparse GP model
@ -35,22 +30,22 @@ class sparse_GP(GP):
""" """
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False): def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
self.scale_factor = 100.0# a scaling factor to help keep the algorithm stable # self.scale_factor = 100.0 # a scaling factor to help keep the algorithm stable
self.auto_scale_factor = False # self.auto_scale_factor = False
self.Z = Z self.Z = Z
self.M = Z.shape[0] self.M = Z.shape[0]
self.likelihood = likelihood self.likelihood = likelihood
if X_variance is None: if X_variance is None:
self.has_uncertain_inputs=False self.has_uncertain_inputs = False
else: else:
assert X_variance.shape==X.shape assert X_variance.shape == X.shape
self.has_uncertain_inputs=True self.has_uncertain_inputs = True
self.X_variance = X_variance self.X_variance = X_variance
GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X) GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X)
#normalize X uncertainty also # normalize X uncertainty also
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.X_variance /= np.square(self._Xstd) self.X_variance /= np.square(self._Xstd)
@ -59,141 +54,152 @@ class sparse_GP(GP):
# kernel computations, using BGPLVM notation # kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z) self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_variance) self.psi0 = self.kern.psi0(self.Z, self.X, self.X_variance)
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_variance).T self.psi1 = self.kern.psi1(self.Z, self.X, self.X_variance).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_variance) self.psi2 = self.kern.psi2(self.Z, self.X, self.X_variance)
else: else:
self.psi0 = self.kern.Kdiag(self.X) self.psi0 = self.kern.Kdiag(self.X)
self.psi1 = self.kern.K(self.Z,self.X) self.psi1 = self.kern.K(self.Z, self.X)
self.psi2 = None self.psi2 = None
def _computations(self): def _computations(self):
sf = self.scale_factor # sf = self.scale_factor
sf2 = sf**2 # sf2 = sf ** 2
#factor Kmm # factor Kmm
self.Lm = jitchol(self.Kmm) self.Lm = jitchol(self.Kmm)
#The rather complex computations of self.A # The rather complex computations of self.A
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
assert self.likelihood.D == 1 #TODO: what if the likelihood is heterscedatic and there are multiple independent outputs? assert self.likelihood.D == 1 # TODO: what if the likelihood is heterscedatic and there are multiple independent outputs?
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0) # psi2_beta_scaled = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1) / sf2)).sum(0)
psi2_beta_scaled = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1))).sum(0)
evals, evecs = linalg.eigh(psi2_beta_scaled) evals, evecs = linalg.eigh(psi2_beta_scaled)
clipped_evals = np.clip(evals,0.,1e6) # TODO: make clipping configurable clipped_evals = np.clip(evals, 0., 1e15) # TODO: make clipping configurable
if not np.allclose(evals, clipped_evals): if not np.allclose(evals, clipped_evals):
print "Warning: clipping posterior eigenvalues" print "Warning: clipping posterior eigenvalues"
tmp = evecs*np.sqrt(clipped_evals) tmp = evecs * np.sqrt(clipped_evals)
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1) tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp) self.A = tdot(tmp)
else: else:
tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf) # tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N)) / sf)
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1) tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N)))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp) self.A = tdot(tmp)
else: else:
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0) # psi2_beta_scaled = (self.psi2 * (self.likelihood.precision / sf2)).sum(0)
psi2_beta_scaled = (self.psi2 * (self.likelihood.precision)).sum(0)
evals, evecs = linalg.eigh(psi2_beta_scaled) evals, evecs = linalg.eigh(psi2_beta_scaled)
clipped_evals = np.clip(evals,0.,1e6) # TODO: make clipping configurable clipped_evals = np.clip(evals, 0., 1e15) # TODO: make clipping configurable
if not np.allclose(evals, clipped_evals): if not np.allclose(evals, clipped_evals):
print "Warning: clipping posterior eigenvalues" print "Warning: clipping posterior eigenvalues"
tmp = evecs*np.sqrt(clipped_evals) tmp = evecs * np.sqrt(clipped_evals)
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1) tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp) self.A = tdot(tmp)
else: else:
tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf) # tmp = self.psi1 * (np.sqrt(self.likelihood.precision) / sf)
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1) tmp = self.psi1 * (np.sqrt(self.likelihood.precision))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp) self.A = tdot(tmp)
#factor B # factor B
self.B = np.eye(self.M)/sf2 + self.A # self.B = np.eye(self.M) / sf2 + self.A
self.B = np.eye(self.M) + self.A
self.LB = jitchol(self.B) self.LB = jitchol(self.B)
self.V = (self.likelihood.precision/self.scale_factor)*self.likelihood.Y # TODO: make a switch for either first compute psi1V, or VV.T
self.psi1V = np.dot(self.psi1, self.V) self.psi1V = np.dot(self.psi1, self.likelihood.V)
#back substutue C into psi1V # back substutue C into psi1V
tmp,info1 = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.psi1V),lower=1,trans=0) tmp, info1 = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
self._LBi_Lmi_psi1V,_ = linalg.lapack.flapack.dtrtrs(self.LB,np.asfortranarray(tmp),lower=1,trans=0) self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(tmp), lower=1, trans=0)
tmp,info2 = linalg.lapack.flapack.dpotrs(self.LB,tmp,lower=1) tmp, info2 = linalg.lapack.flapack.dpotrs(self.LB, tmp, lower=1)
self.Cpsi1V,info3 = linalg.lapack.flapack.dtrtrs(self.Lm,tmp,lower=1,trans=1) self.Cpsi1V, info3 = linalg.lapack.flapack.dtrtrs(self.Lm, tmp, lower=1, trans=1)
# Compute dL_dKmm # Compute dL_dKmm
tmp = tdot(self._LBi_Lmi_psi1V) tmp = tdot(self._LBi_Lmi_psi1V)
self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.D*np.eye(self.M) + tmp) self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.D * np.eye(self.M) + tmp)
tmp = -0.5*self.DBi_plus_BiPBi/sf2 # tmp = -0.5 * self.DBi_plus_BiPBi / sf2
tmp += -0.5*self.B*sf2*self.D # tmp += -0.5 * self.B * sf2 * self.D
tmp += self.D*np.eye(self.M) tmp = -0.5 * self.DBi_plus_BiPBi
self.dL_dKmm = backsub_both_sides(self.Lm,tmp) tmp += -0.5 * self.B * self.D
tmp += self.D * np.eye(self.M)
self.dL_dKmm = backsub_both_sides(self.Lm, tmp)
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten() self.dL_dpsi0 = -0.5 * self.D * (self.likelihood.precision * np.ones([self.N, 1])).flatten()
self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T) self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T)
dL_dpsi2_beta = 0.5*backsub_both_sides(self.Lm,self.D*np.eye(self.M) - self.DBi_plus_BiPBi) dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.D * np.eye(self.M) - self.DBi_plus_BiPBi)
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.dL_dpsi2 = self.likelihood.precision[:,None,None]*dL_dpsi2_beta[None,:,:] self.dL_dpsi2 = self.likelihood.precision[:, None, None] * dL_dpsi2_beta[None, :, :]
else: else:
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta,self.psi1*self.likelihood.precision.reshape(1,self.N)) self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, self.psi1 * self.likelihood.precision.reshape(1, self.N))
self.dL_dpsi2 = None self.dL_dpsi2 = None
else: else:
dL_dpsi2 = self.likelihood.precision*dL_dpsi2_beta dL_dpsi2 = self.likelihood.precision * dL_dpsi2_beta
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
#repeat for each of the N psi_2 matrices # repeat for each of the N psi_2 matrices
self.dL_dpsi2 = np.repeat(dL_dpsi2[None,:,:],self.N,axis=0) self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.N, axis=0)
else: else:
#subsume back into psi1 (==Kmn) # subsume back into psi1 (==Kmn)
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2,self.psi1) self.dL_dpsi1 += 2.*np.dot(dL_dpsi2, self.psi1)
self.dL_dpsi2 = None self.dL_dpsi2 = None
#the partial derivative vector for the likelihood # the partial derivative vector for the likelihood
if self.likelihood.Nparams ==0: if self.likelihood.Nparams == 0:
#save computation here. # save computation here.
self.partial_for_likelihood = None self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic: elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented" raise NotImplementedError, "heteroscedatic derivates not implemented"
else: else:
#likelihood is not heterscedatic # likelihood is not heterscedatic
self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * self.likelihood.trYYT*self.likelihood.precision**2 self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2) # self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision * sf2)
self.partial_for_likelihood += self.likelihood.precision*(0.5*np.sum(self.A*self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V))) self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
def log_likelihood(self): def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """ """ Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor**2 # sf2 = self.scale_factor ** 2
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y) A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y)
B = -0.5*self.D*(np.sum(self.likelihood.precision.flatten()*self.psi0) - np.trace(self.A)*sf2) # B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
else: else:
A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT A = -0.5 * self.N * self.D * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2) # B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.M*np.log(sf2)) B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
D = 0.5*np.sum(np.square(self._LBi_Lmi_psi1V)) # C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5 * self.M * np.log(sf2))
return A+B+C+D C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + B + C + D
def _set_params(self, p): def _set_params(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q) self.Z = p[:self.M * self.Q].reshape(self.M, self.Q)
self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam]) self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:]) self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:])
self._compute_kernel_matrices() self._compute_kernel_matrices()
#if self.auto_scale_factor: # if self.auto_scale_factor:
# self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision) # self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
#if self.auto_scale_factor: # if self.auto_scale_factor:
#if self.likelihood.is_heteroscedastic: # if self.likelihood.is_heteroscedastic:
#self.scale_factor = max(100,np.sqrt(self.psi2_beta_scaled.sum(0).mean())) # self.scale_factor = max(100,np.sqrt(self.psi2_beta_scaled.sum(0).mean()))
#else: # else:
#self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision) # self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
self.scale_factor = 1. # self.scale_factor = 100.
self._computations() self._computations()
def _get_params(self): def _get_params(self):
return np.hstack([self.Z.flatten(),GP._get_params(self)]) return np.hstack([self.Z.flatten(), GP._get_params(self)])
def _get_param_names(self): def _get_param_names(self):
return sum([['iip_%i_%i'%(i,j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])],[]) + GP._get_param_names(self) return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])], []) + GP._get_param_names(self)
def update_likelihood_approximation(self): def update_likelihood_approximation(self):
""" """
@ -205,9 +211,9 @@ class sparse_GP(GP):
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
raise NotImplementedError, "EP approximation not implemented for uncertain inputs" raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
else: else:
self.likelihood.fit_DTC(self.Kmm,self.psi1) self.likelihood.fit_DTC(self.Kmm, self.psi1)
#self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) # self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP self._set_params(self._get_params()) # update the GP
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
@ -217,13 +223,13 @@ class sparse_GP(GP):
""" """
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
""" """
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z) dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_variance) dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_variance) dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z,self.X, self.X_variance) dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else: else:
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1,self.Z,self.X) dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.Z, self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X) dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
return dL_dtheta return dL_dtheta
@ -243,17 +249,17 @@ class sparse_GP(GP):
def _raw_predict(self, Xnew, which_parts='all', full_cov=False): def _raw_predict(self, Xnew, which_parts='all', full_cov=False):
"""Internal helper function for making predictions, does not account for normalization""" """Internal helper function for making predictions, does not account for normalization"""
Bi,_ = linalg.lapack.flapack.dpotri(self.LB,lower=0) # WTH? this lower switch should be 1, but that doesn't work! Bi, _ = linalg.lapack.flapack.dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work!
symmetrify(Bi) symmetrify(Bi)
Kmmi_LmiBLmi = backsub_both_sides(self.Lm,np.eye(self.M) - Bi) Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.M) - Bi)
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts) Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
mu = np.dot(Kx.T, self.Cpsi1V/self.scale_factor) mu = np.dot(Kx.T, self.Cpsi1V) # / self.scale_factor)
if full_cov: if full_cov:
Kxx = self.kern.K(Xnew,which_parts=which_parts) Kxx = self.kern.K(Xnew, which_parts=which_parts)
var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) #NOTE this won't work for plotting var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting
else: else:
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts) Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
var = Kxx - np.sum(Kx*np.dot(Kmmi_LmiBLmi, Kx),0) var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
return mu,var[:,None] return mu, var[:, None]

View file

@ -23,6 +23,7 @@ class warpedGP(GP):
self.warping_function = TanhWarpingFunction_d(warping_terms) self.warping_function = TanhWarpingFunction_d(warping_terms)
self.warping_params = (np.random.randn(self.warping_function.n_terms*3+1,) * 1) self.warping_params = (np.random.randn(self.warping_function.n_terms*3+1,) * 1)
Y = self._scale_data(Y)
self.has_uncertain_inputs = False self.has_uncertain_inputs = False
self.Y_untransformed = Y.copy() self.Y_untransformed = Y.copy()
self.predict_in_warped_space = False self.predict_in_warped_space = False
@ -30,6 +31,14 @@ class warpedGP(GP):
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X) GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
def _scale_data(self, Y):
self._Ymax = Y.max()
self._Ymin = Y.min()
return (Y-self._Ymin)/(self._Ymax-self._Ymin) - 0.5
def _unscale_data(self, Y):
return (Y + 0.5)*(self._Ymax - self._Ymin) + self._Ymin
def _set_params(self, x): def _set_params(self, x):
self.warping_params = x[:self.warping_function.num_parameters] self.warping_params = x[:self.warping_function.num_parameters]
Y = self.transform_data() Y = self.transform_data()
@ -79,5 +88,5 @@ class warpedGP(GP):
if self.predict_in_warped_space: if self.predict_in_warped_space:
mu = self.warping_function.f_inv(mu, self.warping_params) mu = self.warping_function.f_inv(mu, self.warping_params)
var = self.warping_function.f_inv(var, self.warping_params) var = self.warping_function.f_inv(var, self.warping_params)
mu = self._unscale_data(mu)
return mu, var return mu, var

View file

@ -9,6 +9,7 @@ from GPy.inference.conjugate_gradient_descent import CGD, RUNNING
import pylab import pylab
import time import time
from scipy.optimize.optimize import rosen, rosen_der from scipy.optimize.optimize import rosen, rosen_der
from GPy.inference.gradient_descent_update_rules import PolakRibiere
class Test(unittest.TestCase): class Test(unittest.TestCase):
@ -25,12 +26,12 @@ class Test(unittest.TestCase):
restarts = 10 restarts = 10
for _ in range(restarts): for _ in range(restarts):
try: try:
x0 = numpy.random.randn(N) * 300 x0 = numpy.random.randn(N) * 10
res = opt.opt(f, df, x0, messages=0, res = opt.opt(f, df, x0, messages=0, maxiter=1000, gtol=1e-15)
maxiter=1000, gtol=1e-10) assert numpy.allclose(res[0], 0, atol=1e-5)
assert numpy.allclose(res[0], 0, atol=1e-3)
break break
except: except AssertionError:
import ipdb;ipdb.set_trace()
# RESTART # RESTART
pass pass
else: else:
@ -46,9 +47,9 @@ class Test(unittest.TestCase):
restarts = 10 restarts = 10
for _ in range(restarts): for _ in range(restarts):
try: try:
x0 = numpy.random.randn(N) * .5 x0 = (numpy.random.randn(N) * .5) + numpy.ones(N)
res = opt.opt(f, df, x0, messages=0, res = opt.opt(f, df, x0, messages=0,
maxiter=5e2, gtol=1e-2) maxiter=1e3, gtol=1e-12)
assert numpy.allclose(res[0], 1, atol=.1) assert numpy.allclose(res[0], 1, atol=.1)
break break
except: except:
@ -67,14 +68,16 @@ if __name__ == "__main__":
N = 2 N = 2
A = numpy.random.rand(N) * numpy.eye(N) A = numpy.random.rand(N) * numpy.eye(N)
b = numpy.random.rand(N) * 0 b = numpy.random.rand(N) * 0
# f = lambda x: numpy.dot(x.T.dot(A), x) - numpy.dot(x.T, b) f = lambda x: numpy.dot(x.T.dot(A), x) - numpy.dot(x.T, b)
# df = lambda x: numpy.dot(A, x) - b df = lambda x: numpy.dot(A, x) - b
f = rosen # f = rosen
df = rosen_der # df = rosen_der
x0 = numpy.random.randn(N) * .5 x0 = (numpy.random.randn(N) * .5) + numpy.ones(N)
print x0
opt = CGD() opt = CGD()
pylab.ion()
fig = pylab.figure("cgd optimize") fig = pylab.figure("cgd optimize")
if fig.axes: if fig.axes:
ax = fig.axes[0] ax = fig.axes[0]
@ -83,13 +86,14 @@ if __name__ == "__main__":
ax = fig.add_subplot(111, projection='3d') ax = fig.add_subplot(111, projection='3d')
interpolation = 40 interpolation = 40
# x, y = numpy.linspace(.5, 1.5, interpolation)[:, None], numpy.linspace(.5, 1.5, interpolation)[:, None]
x, y = numpy.linspace(-1, 1, interpolation)[:, None], numpy.linspace(-1, 1, interpolation)[:, None] x, y = numpy.linspace(-1, 1, interpolation)[:, None], numpy.linspace(-1, 1, interpolation)[:, None]
X, Y = numpy.meshgrid(x, y) X, Y = numpy.meshgrid(x, y)
fXY = numpy.array([f(numpy.array([x, y])) for x, y in zip(X.flatten(), Y.flatten())]).reshape(interpolation, interpolation) fXY = numpy.array([f(numpy.array([x, y])) for x, y in zip(X.flatten(), Y.flatten())]).reshape(interpolation, interpolation)
ax.plot_wireframe(X, Y, fXY) ax.plot_wireframe(X, Y, fXY)
xopts = [x0.copy()] xopts = [x0.copy()]
optplts, = ax.plot3D([x0[0]], [x0[1]], zs=f(x0), marker='o', color='r') optplts, = ax.plot3D([x0[0]], [x0[1]], zs=f(x0), marker='', color='r')
raw_input("enter to start optimize") raw_input("enter to start optimize")
res = [0] res = [0]
@ -102,11 +106,7 @@ if __name__ == "__main__":
if r[-1] != RUNNING: if r[-1] != RUNNING:
res[0] = r res[0] = r
p, c = opt.opt_async(f, df, x0.copy(), callback, messages=True, maxiter=1000, res[0] = opt.opt(f, df, x0.copy(), callback, messages=True, maxiter=1000,
report_every=20, gtol=1e-12) report_every=7, gtol=1e-12, update_rule=PolakRibiere)
pylab.ion()
pylab.show()
pass pass

View file

@ -16,7 +16,7 @@ import cPickle
import types import types
import ctypes import ctypes
from ctypes import byref, c_char, c_int, c_double # TODO from ctypes import byref, c_char, c_int, c_double # TODO
#import scipy.lib.lapack.flapack #import scipy.lib.lapack
import scipy as sp import scipy as sp
try: try:
@ -139,8 +139,9 @@ def pdinv(A, *args):
L = jitchol(A, *args) L = jitchol(A, *args)
logdet = 2.*np.sum(np.log(np.diag(L))) logdet = 2.*np.sum(np.log(np.diag(L)))
Li = chol_inv(L) Li = chol_inv(L)
Ai = linalg.lapack.flapack.dpotri(L)[0] Ai, _ = linalg.lapack.flapack.dpotri(L)
Ai = np.tril(Ai) + np.tril(Ai,-1).T #Ai = np.tril(Ai) + np.tril(Ai,-1).T
symmetrify(Ai)
return Ai, L, Li, logdet return Ai, L, Li, logdet
@ -250,6 +251,18 @@ def tdot(*args, **kwargs):
else: else:
return tdot_numpy(*args,**kwargs) return tdot_numpy(*args,**kwargs)
def DSYR(A,x,alpha=1.):
N = c_int(A.shape[0])
LDA = c_int(A.shape[0])
UPLO = c_char('l')
ALPHA = c_double(alpha)
A_ = A.ctypes.data_as(ctypes.c_void_p)
x_ = x.ctypes.data_as(ctypes.c_void_p)
INCX = c_int(1)
_blaslib.dsyr_(byref(UPLO), byref(N), byref(ALPHA),
x_, byref(INCX), A_, byref(LDA))
symmetrify(A,upper=True)
def symmetrify(A,upper=False): def symmetrify(A,upper=False):
""" """
Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
@ -259,33 +272,38 @@ def symmetrify(A,upper=False):
N,M = A.shape N,M = A.shape
assert N==M assert N==M
c_contig_code = """ c_contig_code = """
int iN;
for (int i=1; i<N; i++){ for (int i=1; i<N; i++){
iN = i*N;
for (int j=0; j<i; j++){ for (int j=0; j<i; j++){
A[i+j*N] = A[i*N+j]; A[i+j*N] = A[iN+j];
} }
} }
""" """
f_contig_code = """ f_contig_code = """
int iN;
for (int i=1; i<N; i++){ for (int i=1; i<N; i++){
iN = i*N;
for (int j=0; j<i; j++){ for (int j=0; j<i; j++){
A[i*N+j] = A[i+j*N]; A[iN+j] = A[i+j*N];
} }
} }
""" """
if A.flags['C_CONTIGUOUS'] and upper: if A.flags['C_CONTIGUOUS'] and upper:
weave.inline(f_contig_code,['A','N']) weave.inline(f_contig_code,['A','N'], extra_compile_args=['-O3'])
elif A.flags['C_CONTIGUOUS'] and not upper: elif A.flags['C_CONTIGUOUS'] and not upper:
weave.inline(c_contig_code,['A','N']) weave.inline(c_contig_code,['A','N'], extra_compile_args=['-O3'])
elif A.flags['F_CONTIGUOUS'] and upper: elif A.flags['F_CONTIGUOUS'] and upper:
weave.inline(c_contig_code,['A','N']) weave.inline(c_contig_code,['A','N'], extra_compile_args=['-O3'])
elif A.flags['F_CONTIGUOUS'] and not upper: elif A.flags['F_CONTIGUOUS'] and not upper:
weave.inline(f_contig_code,['A','N']) weave.inline(f_contig_code,['A','N'], extra_compile_args=['-O3'])
else: else:
tmp = np.tril(A) tmp = np.tril(A)
A[:] = 0.0 A[:] = 0.0
A += tmp A += tmp
A += np.tril(tmp,-1).T A += np.tril(tmp,-1).T
def symmetrify_murray(A): def symmetrify_murray(A):
A += A.T A += A.T
nn = A.shape[0] nn = A.shape[0]
@ -318,3 +336,13 @@ def cholupdate(L,x):
x = x.copy() x = x.copy()
N = x.size N = x.size
weave.inline(code, support_code=support_code, arg_names=['N','L','x'], type_converters=weave.converters.blitz) weave.inline(code, support_code=support_code, arg_names=['N','L','x'], type_converters=weave.converters.blitz)
def backsub_both_sides(L, X,transpose='left'):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
if transpose=='left':
tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1)
return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T
else:
tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=0)
return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=0)[0].T

View file

@ -0,0 +1,35 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import weave
def std_norm_pdf(x):
"""Standard Gaussian density function"""
return 1./np.sqrt(2.*np.pi)*np.exp(-.5*x**2)
def std_norm_cdf(x):
"""
Cumulative standard Gaussian distribution
Based on Abramowitz, M. and Stegun, I. (1970)
"""
support_code = "#include <math.h>"
code = """
double sign = 1.0;
if (x < 0.0){
sign = -1.0;
x = -x;
}
x = x/sqrt(2.0);
double t = 1.0/(1.0 + 0.3275911*x);
double erf = 1. - exp(-x*x)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))));
return_val = 0.5*(1.0 + sign*erf);
"""
x = float(x)
return weave.inline(code,arg_names=['x'],support_code=support_code)

View file

@ -14,7 +14,7 @@ class data_show:
""" """
def __init__(self, vals, axes=None): def __init__(self, vals, axes=None):
self.vals = vals self.vals = vals.copy()
# If no axes are defined, create some. # If no axes are defined, create some.
if axes==None: if axes==None:
fig = plt.figure() fig = plt.figure()
@ -32,12 +32,12 @@ class vector_show(data_show):
""" """
def __init__(self, vals, axes=None): def __init__(self, vals, axes=None):
data_show.__init__(self, vals, axes) data_show.__init__(self, vals, axes)
self.vals = vals.T self.vals = vals.T.copy()
self.handle = self.axes.plot(np.arange(0, len(vals))[:, None], self.vals)[0] self.handle = self.axes.plot(np.arange(0, len(vals))[:, None], self.vals)[0]
def modify(self, vals): def modify(self, vals):
xdata, ydata = self.handle.get_data() xdata, ydata = self.handle.get_data()
self.vals = vals.T self.vals = vals.T.copy()
self.handle.set_data(xdata, self.vals) self.handle.set_data(xdata, self.vals)
self.axes.figure.canvas.draw() self.axes.figure.canvas.draw()
@ -52,7 +52,7 @@ class lvm(data_show):
:param latent_axes: the axes where the latent visualization should be plotted. :param latent_axes: the axes where the latent visualization should be plotted.
""" """
if vals == None: if vals == None:
vals = model.X[0] vals = model.X[0].copy()
data_show.__init__(self, vals, axes=latent_axes) data_show.__init__(self, vals, axes=latent_axes)
@ -83,7 +83,6 @@ class lvm(data_show):
def modify(self, vals): def modify(self, vals):
"""When latent values are modified update the latent representation and ulso update the output visualization.""" """When latent values are modified update the latent representation and ulso update the output visualization."""
y = self.model.predict(vals)[0] y = self.model.predict(vals)[0]
self.data_visualize.modify(y) self.data_visualize.modify(y)
self.latent_handle.set_data(vals[self.latent_index[0]], vals[self.latent_index[1]]) self.latent_handle.set_data(vals[self.latent_index[0]], vals[self.latent_index[1]])
@ -216,11 +215,11 @@ class image_show(data_show):
plt.show() plt.show()
def set_image(self, vals): def set_image(self, vals):
self.vals = np.reshape(vals, self.dimensions, order='F') self.vals = np.reshape(vals, self.dimensions, order='F').copy()
if self.transpose: if self.transpose:
self.vals = self.vals.T self.vals = self.vals.T.copy()
if not self.scale: if not self.scale:
self.vals = self.vals self.vals = self.vals.copy()
#if self.invert: #if self.invert:
# self.vals = -self.vals # self.vals = -self.vals
@ -304,7 +303,7 @@ class stick_show(mocap_data_show):
mocap_data_show.__init__(self, vals, axes, connect) mocap_data_show.__init__(self, vals, axes, connect)
def process_values(self, vals): def process_values(self, vals):
self.vals = vals.reshape((3, vals.shape[1]/3)).T self.vals = vals.reshape((3, vals.shape[1]/3)).T.copy()
class skeleton_show(mocap_data_show): class skeleton_show(mocap_data_show):
"""data_show class for visualizing motion capture data encoded as a skeleton with angles.""" """data_show class for visualizing motion capture data encoded as a skeleton with angles."""

View file

@ -185,7 +185,7 @@ class TanhWarpingFunction_d(WarpingFunction):
return z return z
def f_inv(self, y, psi, iterations = 30): def f_inv(self, z, psi, max_iterations = 1000):
""" """
calculate the numerical inverse of f calculate the numerical inverse of f
@ -194,13 +194,19 @@ class TanhWarpingFunction_d(WarpingFunction):
""" """
y = y.copy() z = z.copy()
z = np.ones_like(y) y = np.ones_like(z)
it = 0
update = np.inf
for i in range(iterations): while it == 0 or (np.abs(update).sum() > 1e-10 and it < max_iterations):
z -= (self.f(z, psi) - y)/self.fgrad_y(z,psi) update = (self.f(y, psi) - z)/self.fgrad_y(y, psi)
y -= update
it += 1
if it == max_iterations:
print "WARNING!!! Maximum number of iterations reached in f_inv "
return z return y
def fgrad_y(self, y, psi, return_precalc = False): def fgrad_y(self, y, psi, return_precalc = False):

Binary file not shown.

Before

Width:  |  Height:  |  Size: 37 KiB

After

Width:  |  Height:  |  Size: 75 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 78 KiB

After

Width:  |  Height:  |  Size: 130 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 61 KiB

After

Width:  |  Height:  |  Size: 62 KiB

Before After
Before After

View file

@ -55,18 +55,18 @@ In ``GPy``, kernel objects can be added or multiplied. In both cases, two kinds
* a kernel over :math:`\mathbb{R} \times \mathbb{R}`: :math:`k(x,y) = k_1(x,y) \times k_2(x,y)` * a kernel over :math:`\mathbb{R} \times \mathbb{R}`: :math:`k(x,y) = k_1(x,y) \times k_2(x,y)`
* a kernel over :math:`\mathbb{R}^2 \times \mathbb{R}^2`: :math:`k(\mathbf{x},\mathbf{y}) = k_1(x_1,y_1) \times k_2(x_2,y_2)` * a kernel over :math:`\mathbb{R}^2 \times \mathbb{R}^2`: :math:`k(\mathbf{x},\mathbf{y}) = k_1(x_1,y_1) \times k_2(x_2,y_2)`
These two options are available in GPy under the name ``prod`` and ``prod_orthogonal`` (resp. ``add`` and ``add_orthogonal`` for the addition). Here is a quick example :: These two options are available in GPy using the flag ``tensor`` in the ``add`` and ``prod`` functions. Here is a quick example ::
k1 = GPy.kern.rbf(1,1.,2.) k1 = GPy.kern.rbf(1,1.,2.)
k2 = GPy.kern.Matern32(1, 0.5, 0.2) k2 = GPy.kern.Matern32(1, 0.5, 0.2)
# Product of kernels # Product of kernels
k_prod = k1.prod(k2) k_prod = k1.prod(k2) # By default, tensor=False
k_prodorth = k1.prod_orthogonal(k2) k_prodtens = k1.prod(k2,tensor=True)
# Sum of kernels # Sum of kernels
k_add = k1.add(k2) k_add = k1.add(k2) # By default, tensor=False
k_addorth = k1.add_orthogonal(k2) k_addtens = k1.add(k2,tensor=True)
.. # plots .. # plots
pb.figure(figsize=(8,8)) pb.figure(figsize=(8,8))
@ -74,21 +74,21 @@ These two options are available in GPy under the name ``prod`` and ``prod_orthog
k_prod.plot() k_prod.plot()
pb.title('prod') pb.title('prod')
pb.subplot(2,2,2) pb.subplot(2,2,2)
k_prodorth.plot() k_prodtens.plot()
pb.title('prod_orthogonal') pb.title('tensor prod')
pb.subplot(2,2,3) pb.subplot(2,2,3)
k_add.plot() k_add.plot()
pb.title('add') pb.title('sum')
pb.subplot(2,2,4) pb.subplot(2,2,4)
k_addorth.plot() k_addtens.plot()
pb.title('add_orthogonal') pb.title('tensor sum')
pb.subplots_adjust(wspace=0.3, hspace=0.3) pb.subplots_adjust(wspace=0.3, hspace=0.3)
.. figure:: Figures/tuto_kern_overview_multadd.png .. figure:: Figures/tuto_kern_overview_multadd.png
:align: center :align: center
:height: 500px :height: 500px
A shortcut for ``add`` and ``prod`` is provided by the usual ``+`` and ``*`` operators. Here is another example where we create a periodic kernel with some decay :: A shortcut for ``add`` and ``prod`` (with default flag ``tensor=False``) is provided by the usual ``+`` and ``*`` operators. Here is another example where we create a periodic kernel with some decay ::
k1 = GPy.kern.rbf(1,1.,2) k1 = GPy.kern.rbf(1,1.,2)
k2 = GPy.kern.periodic_Matern52(1,variance=1e3, lengthscale=1, period = 1.5, lower=-5., upper = 5) k2 = GPy.kern.periodic_Matern52(1,variance=1e3, lengthscale=1, period = 1.5, lower=-5., upper = 5)
@ -113,7 +113,7 @@ A shortcut for ``add`` and ``prod`` is provided by the usual ``+`` and ``*`` ope
:align: center :align: center
:height: 300px :height: 300px
In general, ``kern`` objects can be seen as a sum of ``kernparts`` objects, where the later are covariance functions denied on the same space. For example, the following code :: In general, ``kern`` objects can be seen as a sum of ``kernparts`` objects, where the later are covariance functions defined on the same space. For example, the following code ::
k = (k1+k2)*(k1+k2) k = (k1+k2)*(k1+k2)
print k.parts[0].name, '\n', k.parts[1].name, '\n', k.parts[2].name, '\n', k.parts[3].name print k.parts[0].name, '\n', k.parts[1].name, '\n', k.parts[2].name, '\n', k.parts[3].name
@ -184,7 +184,7 @@ Let us assume that we want to define an ANOVA kernel with a Matern 3/2 kernel fo
k_cst = GPy.kern.bias(1,variance=1.) k_cst = GPy.kern.bias(1,variance=1.)
k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3) k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3)
Kanova = (k_cst + k_mat).prod_orthogonal(k_cst + k_mat) Kanova = (k_cst + k_mat).prod(k_cst + k_mat,tensor=True)
print Kanova print Kanova
Printing the resulting kernel outputs the following :: Printing the resulting kernel outputs the following ::
@ -236,14 +236,14 @@ The submodels can be represented with the option ``which_function`` of ``plot``:
pb.subplot(1,5,2) pb.subplot(1,5,2)
pb.ylabel("= ",rotation='horizontal',fontsize='30') pb.ylabel("= ",rotation='horizontal',fontsize='30')
pb.subplot(1,5,3) pb.subplot(1,5,3)
m.plot(which_functions=[False,True,False,False]) m.plot(which_parts=[False,True,False,False])
pb.ylabel("cst +",rotation='horizontal',fontsize='30') pb.ylabel("cst +",rotation='horizontal',fontsize='30')
pb.subplot(1,5,4) pb.subplot(1,5,4)
m.plot(which_functions=[False,False,True,False]) m.plot(which_parts=[False,False,True,False])
pb.ylabel("+ ",rotation='horizontal',fontsize='30') pb.ylabel("+ ",rotation='horizontal',fontsize='30')
pb.subplot(1,5,5) pb.subplot(1,5,5)
pb.ylabel("+ ",rotation='horizontal',fontsize='30') pb.ylabel("+ ",rotation='horizontal',fontsize='30')
m.plot(which_functions=[False,False,False,True]) m.plot(which_parts=[False,False,False,True])
.. pb.savefig('tuto_kern_overview_mANOVAdec.png',bbox_inches='tight') .. pb.savefig('tuto_kern_overview_mANOVAdec.png',bbox_inches='tight')
@ -252,7 +252,8 @@ The submodels can be represented with the option ``which_function`` of ``plot``:
:height: 250px :height: 250px
.. import pylab as pb .. # code
import pylab as pb
import numpy as np import numpy as np
import GPy import GPy
pb.ion() pb.ion()