Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
Nicolò Fusi 2013-05-10 11:45:16 +02:00
commit ee5b05cd2d
16 changed files with 645 additions and 272 deletions

View file

@ -6,18 +6,18 @@ import numpy as np
class transformation(object): class transformation(object):
def __init__(self): def __init__(self):
#set the domain. Suggest we use 'positive', 'bounded', etc # set the domain. Suggest we use 'positive', 'bounded', etc
self.domain = 'undefined' self.domain = 'undefined'
def f(self, x): def f(self, x):
raise NotImplementedError raise NotImplementedError
def finv(self,x): def finv(self, x):
raise NotImplementedError raise NotImplementedError
def gradfactor(self,f): def gradfactor(self, f):
""" df_dx evaluated at self.f(x)=f""" """ df_dx evaluated at self.f(x)=f"""
raise NotImplementedError raise NotImplementedError
def initialize(self,f): def initialize(self, f):
""" produce a sensible initial values for f(x)""" """ produce a sensible initial values for f(x)"""
raise NotImplementedError raise NotImplementedError
def __str__(self): def __str__(self):
@ -25,61 +25,92 @@ class transformation(object):
class logexp(transformation): class logexp(transformation):
def __init__(self): def __init__(self):
self.domain= 'positive' self.domain = 'positive'
def f(self,x): def f(self, x):
return np.log(1. + np.exp(x)) return np.log(1. + np.exp(x))
def finv(self,f): def finv(self, f):
return np.log(np.exp(f) - 1.) return np.log(np.exp(f) - 1.)
def gradfactor(self,f): def gradfactor(self, f):
ef = np.exp(f) ef = np.exp(f)
return (ef - 1.)/ef return (ef - 1.) / ef
def initialize(self,f): def initialize(self, f):
return np.abs(f)
def __str__(self):
return '(+ve)'
class logexp_clipped(transformation):
def __init__(self):
self.domain = 'positive'
def f(self, x):
f = np.log(1. + np.exp(x))
return f
def finv(self, f):
return np.log(np.exp(f) - 1.)
def gradfactor(self, f):
ef = np.exp(f)
gf = (ef - 1.) / ef
return np.where(f < 1e-6, 0, gf)
def initialize(self, f):
return np.abs(f) return np.abs(f)
def __str__(self): def __str__(self):
return '(+ve)' return '(+ve)'
class exponent(transformation): class exponent(transformation):
def __init__(self): def __init__(self):
self.domain= 'positive' self.domain = 'positive'
def f(self,x): def f(self, x):
return np.exp(x) return np.exp(x)
def finv(self,x): def finv(self, x):
return np.log(x) return np.log(x)
def gradfactor(self,f): def gradfactor(self, f):
return f return f
def initialize(self,f): def initialize(self, f):
return np.abs(f) return np.abs(f)
def __str__(self): def __str__(self):
return '(+ve)' return '(+ve)'
class negative_exponent(transformation): class negative_exponent(transformation):
def __init__(self): def __init__(self):
self.domain= 'negative' self.domain = 'negative'
def f(self,x): def f(self, x):
return -np.exp(x) return -np.exp(x)
def finv(self,x): def finv(self, x):
return np.log(-x) return np.log(-x)
def gradfactor(self,f): def gradfactor(self, f):
return f return f
def initialize(self,f): def initialize(self, f):
return -np.abs(f) return -np.abs(f)
def __str__(self): def __str__(self):
return '(-ve)' return '(-ve)'
class square(transformation):
def __init__(self):
self.domain = 'positive'
def f(self, x):
return x ** 2
def finv(self, x):
return np.sqrt(x)
def gradfactor(self, f):
return 2 * np.sqrt(f)
def initialize(self, f):
return np.abs(f)
def __str__(self):
return '(+sq)'
class logistic(transformation): class logistic(transformation):
def __init__(self,lower,upper): def __init__(self, lower, upper):
self.domain= 'bounded' self.domain = 'bounded'
assert lower < upper assert lower < upper
self.lower, self.upper = float(lower), float(upper) self.lower, self.upper = float(lower), float(upper)
self.difference = self.upper - self.lower self.difference = self.upper - self.lower
def f(self,x): def f(self, x):
return self.lower + self.difference/(1.+np.exp(-x)) return self.lower + self.difference / (1. + np.exp(-x))
def finv(self,f): def finv(self, f):
return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf)) return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf))
def gradfactor(self,f): def gradfactor(self, f):
return (f-self.lower)*(self.upper-f)/self.difference return (f - self.lower) * (self.upper - f) / self.difference
def initialize(self,f): def initialize(self, f):
return self.f(f*0.) return self.f(f * 0.)
def __str__(self): def __str__(self):
return '({},{})'.format(self.lower,self.upper) return '({},{})'.format(self.lower, self.upper)

View file

@ -8,6 +8,7 @@ from matplotlib import pyplot as plt, pyplot
import GPy import GPy
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
from GPy.util.datasets import simulation_BGPLVM from GPy.util.datasets import simulation_BGPLVM
from GPy.core.transformations import square, logexp_clipped
default_seed = np.random.seed(123344) default_seed = np.random.seed(123344)
@ -62,17 +63,21 @@ def GPLVM_oil_100(optimize=True):
m.plot_latent(labels=m.data_labels) m.plot_latent(labels=m.data_labels)
return m return m
def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300): def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300, plot=False):
data = GPy.util.datasets.oil() data = GPy.util.datasets.oil()
# create simple GP model # create simple GP model
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.001) kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2))
m = GPy.models.Bayesian_GPLVM(data['X'][:N], Q, kernel=kernel, M=M) Y = data['X'][:N]
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=kernel, M=M)
m.data_labels = data['Y'][:N].argmax(axis=1) m.data_labels = data['Y'][:N].argmax(axis=1)
# optimize # optimize
if optimize: if optimize:
m.constrain_fixed('noise', 0.05) m.constrain_fixed('noise', 1. / Y.var())
m.constrain('variance', logexp_clipped())
m['lengt'] = 1000
m.ensure_default_constraints() m.ensure_default_constraints()
m.optimize('scg', messages=1, max_f_eval=max(80, max_f_eval)) m.optimize('scg', messages=1, max_f_eval=max(80, max_f_eval))
m.unconstrain('noise') m.unconstrain('noise')
@ -81,14 +86,15 @@ def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300):
else: else:
m.ensure_default_constraints() m.ensure_default_constraints()
y = m.likelihood.Y[0, :] if plot:
fig, (latent_axes, hist_axes) = plt.subplots(1, 2) y = m.likelihood.Y[0, :]
plt.sca(latent_axes) fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
m.plot_latent() plt.sca(latent_axes)
data_show = GPy.util.visualize.vector_show(y) m.plot_latent()
lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) data_show = GPy.util.visualize.vector_show(y)
raw_input('Press enter to finish') lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], m, data_show, latent_axes=latent_axes) # , sense_axes=sense_axes)
plt.close('all') raw_input('Press enter to finish')
plt.close('all')
# # plot # # plot
# print(m) # print(m)
# m.plot_latent(labels=m.data_labels) # m.plot_latent(labels=m.data_labels)
@ -207,7 +213,7 @@ def bgplvm_simulation(burnin='scg', plot_sim=False,
max_burnin=100, true_X=False, max_burnin=100, true_X=False,
do_opt=True, do_opt=True,
max_f_eval=1000): max_f_eval=1000):
D1, D2, D3, N, M, Q = 10, 8, 8, 250, 10, 6 D1, D2, D3, N, M, Q = 15, 8, 8, 350, 3, 6
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim) slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
from GPy.models import mrd from GPy.models import mrd
@ -221,6 +227,8 @@ def bgplvm_simulation(burnin='scg', plot_sim=False,
# k = kern.white(Q, .00001) + kern.bias(Q) # k = kern.white(Q, .00001) + kern.bias(Q)
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True) m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True)
# m.set('noise',) # m.set('noise',)
m.constrain('variance', logexp_clipped())
m.ensure_default_constraints() m.ensure_default_constraints()
m['noise'] = Y.var() / 100. m['noise'] = Y.var() / 100.
m['linear_variance'] = .001 m['linear_variance'] = .001
@ -304,7 +312,7 @@ def mrd_simulation(plot_sim=False):
# Y2 = np.random.multivariate_normal(np.zeros(N), k.K(X), D2).T # Y2 = np.random.multivariate_normal(np.zeros(N), k.K(X), D2).T
# Y2 -= Y2.mean(0) # Y2 -= Y2.mean(0)
# make_params = lambda ard: np.hstack([[1], ard, [1, .3]]) # make_params = lambda ard: np.hstack([[1], ard, [1, .3]])
D1, D2, D3, N, M, Q = 2000, 34, 8, 500, 3, 6 D1, D2, D3, N, M, Q = 150, 250, 300, 700, 3, 7
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim) slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
from GPy.models import mrd from GPy.models import mrd
@ -316,18 +324,19 @@ def mrd_simulation(plot_sim=False):
# Y2 = np.random.multivariate_normal(np.zeros(N), k.K(S2), D2).T # Y2 = np.random.multivariate_normal(np.zeros(N), k.K(S2), D2).T
# Y3 = np.random.multivariate_normal(np.zeros(N), k.K(S3), D3).T # Y3 = np.random.multivariate_normal(np.zeros(N), k.K(S3), D3).T
Ylist = Ylist[0:2] # Ylist = Ylist[0:2]
# k = kern.rbf(Q, ARD=True) + kern.bias(Q) + kern.white(Q) # k = kern.rbf(Q, ARD=True) + kern.bias(Q) + kern.white(Q)
k = kern.linear(Q, ARD=True) + kern.bias(Q, .01) + kern.white(Q, .001) k = kern.linear(Q, [0.01] * Q, True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
m = mrd.MRD(*Ylist, Q=Q, M=M, kernel=k, initx="concat", initz='permute', _debug=False) m = mrd.MRD(*Ylist, Q=Q, M=M, kernel=k, initx="concat", initz='permute', _debug=False)
for i, Y in enumerate(Ylist): for i, Y in enumerate(Ylist):
m.set('{}_noise'.format(i + 1), Y.var() / 100.) m['{}_noise'.format(i + 1)] = Y.var() / 100.
m.constrain('variance', logexp_clipped())
m.ensure_default_constraints() m.ensure_default_constraints()
m.auto_scale_factor = True # m.auto_scale_factor = True
# cstr = 'variance' # cstr = 'variance'
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-12, 1.) # m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-12, 1.)
@ -335,19 +344,19 @@ def mrd_simulation(plot_sim=False):
# cstr = 'linear_variance' # cstr = 'linear_variance'
# m.unconstrain(cstr), m.constrain_positive(cstr) # m.unconstrain(cstr), m.constrain_positive(cstr)
# print "initializing beta" print "initializing beta"
# cstr = "noise" cstr = "noise"
# m.unconstrain(cstr); m.constrain_fixed(cstr) m.unconstrain(cstr); m.constrain_fixed(cstr)
# m.optimize('scg', messages=1, max_f_eval=100) m.optimize('scg', messages=1, max_f_eval=2e3, gtol=100)
# print "releasing beta" print "releasing beta"
# cstr = "noise" cstr = "noise"
# m.unconstrain(cstr); m.constrain_positive(cstr) m.unconstrain(cstr); m.constrain(cstr, logexp_clipped())
np.seterr(all='call') # np.seterr(all='call')
def ipdbonerr(errtype, flags): # def ipdbonerr(errtype, flags):
import ipdb; ipdb.set_trace() # import ipdb; ipdb.set_trace()
np.seterrcall(ipdbonerr) # np.seterrcall(ipdbonerr)
return m # , mtest return m # , mtest
@ -362,7 +371,7 @@ def brendan_faces():
# optimize # optimize
m.ensure_default_constraints() m.ensure_default_constraints()
m.optimize(messages=1, max_f_eval=10000) # m.optimize(messages=1, max_f_eval=10000)
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0, :] y = m.likelihood.Y[0, :]

View file

@ -1,6 +1,6 @@
#Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012) # Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012)
#Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman # Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
# HOLDERS AND CONTRIBUTORS "AS IS" AND ANY # HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
@ -25,7 +25,7 @@
import numpy as np import numpy as np
import sys import sys
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=1e-6, ftol=1e-6): def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=None, ftol=None, gtol=None):
""" """
Optimisation through Scaled Conjugate Gradients (SCG) Optimisation through Scaled Conjugate Gradients (SCG)
@ -38,19 +38,24 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
flog : a list of all the objective values flog : a list of all the objective values
""" """
if xtol is None:
xtol = 1e-6
if ftol is None:
ftol = 1e-6
if gtol is None:
gtol = 1e-5
sigma0 = 1.0e-4 sigma0 = 1.0e-4
fold = f(x, *optargs) # Initial function value. fold = f(x, *optargs) # Initial function value.
function_eval = 1 function_eval = 1
fnow = fold fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient. gradnew = gradf(x, *optargs) # Initial gradient.
gradold = gradnew.copy() gradold = gradnew.copy()
d = -gradnew # Initial search direction. d = -gradnew # Initial search direction.
success = True # Force calculation of directional derivs. success = True # Force calculation of directional derivs.
nsuccess = 0 # nsuccess counts number of successes. nsuccess = 0 # nsuccess counts number of successes.
beta = 1.0 # Initial scale parameter. beta = 1.0 # Initial scale parameter.
betamin = 1.0e-15 # Lower bound on scale. betamin = 1.0e-15 # Lower bound on scale.
betamax = 1.0e100 # Upper bound on scale. betamax = 1.0e100 # Upper bound on scale.
status = "Not converged" status = "Not converged"
flog = [fold] flog = [fold]
@ -67,21 +72,21 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
d = -gradnew d = -gradnew
mu = np.dot(d, gradnew) mu = np.dot(d, gradnew)
kappa = np.dot(d, d) kappa = np.dot(d, d)
sigma = sigma0/np.sqrt(kappa) sigma = sigma0 / np.sqrt(kappa)
xplus = x + sigma*d xplus = x + sigma * d
gplus = gradf(xplus, *optargs) gplus = gradf(xplus, *optargs)
theta = np.dot(d, (gplus - gradnew))/sigma theta = np.dot(d, (gplus - gradnew)) / sigma
# Increase effective curvature and evaluate step size alpha. # Increase effective curvature and evaluate step size alpha.
delta = theta + beta*kappa delta = theta + beta * kappa
if delta <= 0: if delta <= 0:
delta = beta*kappa delta = beta * kappa
beta = beta - theta/kappa beta = beta - theta / kappa
alpha = - mu/delta alpha = -mu / delta
# Calculate the comparison ratio. # Calculate the comparison ratio.
xnew = x + alpha*d xnew = x + alpha * d
fnew = f(xnew, *optargs) fnew = f(xnew, *optargs)
function_eval += 1 function_eval += 1
@ -89,8 +94,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
status = "Maximum number of function evaluations exceeded" status = "Maximum number of function evaluations exceeded"
return x, flog, function_eval, status return x, flog, function_eval, status
Delta = 2.*(fnew - fold)/(alpha*mu) Delta = 2.*(fnew - fold) / (alpha * mu)
if Delta >= 0.: if Delta >= 0.:
success = True success = True
nsuccess += 1 nsuccess += 1
x = xnew x = xnew
@ -100,19 +105,19 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
fnow = fold fnow = fold
# Store relevant variables # Store relevant variables
flog.append(fnow) # Current function value flog.append(fnow) # Current function value
iteration += 1 iteration += 1
if display: if display:
print '\r', print '\r',
print 'Iteration: {0:>5g} Objective:{1:> 12e} Scale:{2:> 12e}'.format(iteration, fnow, beta), print 'i: {0:>5g} f:{1:> 12e} b:{2:> 12e} |g|:{3:> 12e}'.format(iteration, fnow, beta, np.dot(gradnew, gradnew)),
# print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r', # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush() sys.stdout.flush()
if success: if success:
# Test for termination # Test for termination
if (np.max(np.abs(alpha*d)) < xtol) or (np.abs(fnew-fold) < ftol): if (np.max(np.abs(alpha * d)) < xtol) or (np.abs(fnew - fold) < ftol):
status='converged' status = 'converged'
return x, flog, function_eval, status return x, flog, function_eval, status
else: else:
@ -121,23 +126,25 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
gradold = gradnew gradold = gradnew
gradnew = gradf(x, *optargs) gradnew = gradf(x, *optargs)
# If the gradient is zero then we are done. # If the gradient is zero then we are done.
if np.dot(gradnew,gradnew) == 0: if (np.dot(gradnew, gradnew)) <= gtol:
status = 'converged'
return x, flog, function_eval, status return x, flog, function_eval, status
# Adjust beta according to comparison ratio. # Adjust beta according to comparison ratio.
if Delta < 0.25: if Delta < 0.25:
beta = min(4.0*beta, betamax) beta = min(4.0 * beta, betamax)
if Delta > 0.75: if Delta > 0.75:
beta = max(0.5*beta, betamin) beta = max(0.5 * beta, betamin)
# Update search direction using Polak-Ribiere formula, or re-start # Update search direction using Polak-Ribiere formula, or re-start
# in direction of negative gradient after nparams steps. # in direction of negative gradient after nparams steps.
if nsuccess == x.size: if nsuccess == x.size:
d = -gradnew d = -gradnew
# beta = 1. # TODO: betareset!!
nsuccess = 0 nsuccess = 0
elif success: elif success:
gamma = np.dot(gradold - gradnew,gradnew)/(mu) gamma = np.dot(gradold - gradnew, gradnew) / (mu)
d = gamma*d - gradnew d = gamma * d - gradnew
# If we get here, then we haven't terminated in the given number of # If we get here, then we haven't terminated in the given number of
# iterations. # iterations.

View file

@ -3,7 +3,8 @@ Created on 24 Apr 2013
@author: maxz @author: maxz
''' '''
from GPy.inference.gradient_descent_update_rules import FletcherReeves from GPy.inference.gradient_descent_update_rules import FletcherReeves, \
PolakRibiere
from Queue import Empty from Queue import Empty
from multiprocessing import Value from multiprocessing import Value
from multiprocessing.queues import Queue from multiprocessing.queues import Queue
@ -12,6 +13,7 @@ from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2
from threading import Thread from threading import Thread
import numpy import numpy
import sys import sys
import time
RUNNING = "running" RUNNING = "running"
CONVERGED = "converged" CONVERGED = "converged"
@ -71,7 +73,10 @@ class _Async_Optimization(Thread):
def callback_return(self, *a): def callback_return(self, *a):
self.callback(*a) self.callback(*a)
self.callback(self.SENTINEL) if self.outq is not None:
self.outq.put(self.SENTINEL)
if self.messages:
print ""
self.runsignal.clear() self.runsignal.clear()
def run(self, *args, **kwargs): def run(self, *args, **kwargs):
@ -105,7 +110,7 @@ class _CGDAsync(_Async_Optimization):
status = MAX_F_EVAL status = MAX_F_EVAL
gi = -self.df(xi, *a, **kw) gi = -self.df(xi, *a, **kw)
if numpy.dot(gi.T, gi) < self.gtol: if numpy.dot(gi.T, gi) <= self.gtol:
status = CONVERGED status = CONVERGED
break break
if numpy.isnan(numpy.dot(gi.T, gi)): if numpy.isnan(numpy.dot(gi.T, gi)):
@ -123,10 +128,8 @@ class _CGDAsync(_Async_Optimization):
xi, xi,
si, gi, si, gi,
fi, fi_old) fi, fi_old)
if alphai is not None and fi2 < fi: if alphai is None:
fi, fi_old = fi2, fi_old2 alphai, _, _, fi2, fi_old2, gfi = \
else:
alphai, _, _, fi, fi_old, gfi = \
line_search_wolfe2(self.f, self.df, line_search_wolfe2(self.f, self.df,
xi, si, gi, xi, si, gi,
fi, fi_old) fi, fi_old)
@ -134,11 +137,14 @@ class _CGDAsync(_Async_Optimization):
# This line search also failed to find a better solution. # This line search also failed to find a better solution.
status = LINE_SEARCH status = LINE_SEARCH
break break
if fi2 < fi:
fi, fi_old = fi2, fi_old2
if gfi is not None: if gfi is not None:
gi = gfi gi = gfi
if numpy.isnan(fi) or fi_old < fi: if numpy.isnan(fi) or fi_old < fi:
gi, ur, si = self.reset(xi, *a, **kw) gi, ur, si = self.reset(xi, *a, **kw)
else: else:
xi += numpy.dot(alphai, si) xi += numpy.dot(alphai, si)
if self.messages: if self.messages:
@ -164,10 +170,11 @@ class Async_Optimize(object):
try: try:
for ret in iter(lambda: q.get(timeout=1), self.SENTINEL): for ret in iter(lambda: q.get(timeout=1), self.SENTINEL):
self.callback(*ret) self.callback(*ret)
self.runsignal.clear()
except Empty: except Empty:
pass pass
def opt_async(self, f, df, x0, callback, update_rule=FletcherReeves, def opt_async(self, f, df, x0, callback, update_rule=PolakRibiere,
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6, messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, *args, **kwargs): report_every=10, *args, **kwargs):
self.runsignal.set() self.runsignal.set()
@ -193,13 +200,21 @@ class Async_Optimize(object):
while self.runsignal.is_set(): while self.runsignal.is_set():
try: try:
p.join(1) p.join(1)
# c.join(1) if c: c.join(1)
except KeyboardInterrupt: except KeyboardInterrupt:
# print "^C" # print "^C"
self.runsignal.clear() self.runsignal.clear()
p.join() p.join()
c.join() if c: c.join()
if c and c.is_alive(): if c and c.is_alive():
# self.runsignal.set()
# while self.runsignal.is_set():
# try:
# c.join(.1)
# except KeyboardInterrupt:
# # print "^C"
# self.runsignal.clear()
# c.join()
print "WARNING: callback still running, optimisation done!" print "WARNING: callback still running, optimisation done!"
return p.result return p.result

View file

@ -41,3 +41,13 @@ class FletcherReeves(GDUpdateRule):
if tmp: if tmp:
return tmp / numpy.dot(self.gradold.T, self.gradnatold) return tmp / numpy.dot(self.gradold.T, self.gradnatold)
return tmp return tmp
class PolakRibiere(GDUpdateRule):
'''
Fletcher Reeves update rule for gamma
'''
def _gamma(self, *a, **kw):
tmp = numpy.dot((self.grad - self.gradold).T, self.gradnat)
if tmp:
return tmp / numpy.dot(self.gradold.T, self.gradnatold)
return tmp

View file

@ -29,7 +29,8 @@ class Optimizer():
:rtype: optimizer object. :rtype: optimizer object.
""" """
def __init__(self, x_init, messages=False, model = None, max_f_eval=1e4, max_iters = 1e3, ftol=None, gtol=None, xtol=None): def __init__(self, x_init, messages=False, model=None, max_f_eval=1e4, max_iters=1e3,
ftol=None, gtol=None, xtol=None):
self.opt_name = None self.opt_name = None
self.x_init = x_init self.x_init = x_init
self.messages = messages self.messages = messages
@ -205,7 +206,11 @@ class opt_SCG(Optimizer):
def opt(self, f_fp = None, f = None, fp = None): def opt(self, f_fp = None, f = None, fp = None):
assert not f is None assert not f is None
assert not fp is None assert not fp is None
opt_result = SCG(f,fp,self.x_init, display=self.messages, maxiters=self.max_iters, max_f_eval=self.max_f_eval, xtol=self.xtol, ftol=self.ftol) opt_result = SCG(f, fp, self.x_init, display=self.messages,
maxiters=self.max_iters,
max_f_eval=self.max_f_eval,
xtol=self.xtol, ftol=self.ftol,
gtol=self.gtol)
self.x_opt = opt_result[0] self.x_opt = opt_result[0]
self.trace = opt_result[1] self.trace = opt_result[1]
self.f_opt = self.trace[-1] self.f_opt = self.trace[-1]

View file

@ -32,6 +32,7 @@ class EP(likelihood):
self.precision = np.ones(self.N)[:,None] self.precision = np.ones(self.N)[:,None]
self.Z = 0 self.Z = 0
self.YYT = None self.YYT = None
self.V = self.precision * self.Y
def restart(self): def restart(self):
self.tau_tilde = np.zeros(self.N) self.tau_tilde = np.zeros(self.N)
@ -41,6 +42,7 @@ class EP(likelihood):
self.precision = np.ones(self.N)[:,None] self.precision = np.ones(self.N)[:,None]
self.Z = 0 self.Z = 0
self.YYT = None self.YYT = None
self.V = self.precision * self.Y
def predictive_values(self,mu,var,full_cov): def predictive_values(self,mu,var,full_cov):
if full_cov: if full_cov:
@ -67,6 +69,7 @@ class EP(likelihood):
self.YYT = np.dot(self.Y,self.Y.T) self.YYT = np.dot(self.Y,self.Y.T)
self.covariance_matrix = np.diag(1./self.tau_tilde) self.covariance_matrix = np.diag(1./self.tau_tilde)
self.precision = self.tau_tilde[:,None] self.precision = self.tau_tilde[:,None]
self.V = self.precision * self.Y
def fit_full(self,K): def fit_full(self,K):
""" """

View file

@ -11,33 +11,34 @@ class Gaussian(likelihood):
:param normalize: whether to normalize the data before computing (predictions will be in original scales) :param normalize: whether to normalize the data before computing (predictions will be in original scales)
:type normalize: False|True :type normalize: False|True
""" """
def __init__(self,data,variance=1.,normalize=False): def __init__(self, data, variance=1., normalize=False):
self.is_heteroscedastic = False self.is_heteroscedastic = False
self.Nparams = 1 self.Nparams = 1
self.Z = 0. # a correction factor which accounts for the approximation made self.Z = 0. # a correction factor which accounts for the approximation made
N, self.D = data.shape N, self.D = data.shape
#normalization # normalization
if normalize: if normalize:
self._bias = data.mean(0)[None,:] self._bias = data.mean(0)[None, :]
self._scale = data.std(0)[None,:] self._scale = data.std(0)[None, :]
# Don't scale outputs which have zero variance to zero. # Don't scale outputs which have zero variance to zero.
self._scale[np.nonzero(self._scale==0.)] = 1.0e-3 self._scale[np.nonzero(self._scale == 0.)] = 1.0e-3
else: else:
self._bias = np.zeros((1,self.D)) self._bias = np.zeros((1, self.D))
self._scale = np.ones((1,self.D)) self._scale = np.ones((1, self.D))
self.set_data(data) self.set_data(data)
self._variance = np.asarray(variance) + 1.
self._set_params(np.asarray(variance)) self._set_params(np.asarray(variance))
def set_data(self,data): def set_data(self, data):
self.data = data self.data = data
self.N,D = data.shape self.N, D = data.shape
assert D == self.D assert D == self.D
self.Y = (self.data - self._bias)/self._scale self.Y = (self.data - self._bias) / self._scale
if D > self.N: if D > self.N:
self.YYT = np.dot(self.Y,self.Y.T) self.YYT = np.dot(self.Y, self.Y.T)
self.trYYT = np.trace(self.YYT) self.trYYT = np.trace(self.YYT)
else: else:
self.YYT = None self.YYT = None
@ -49,27 +50,30 @@ class Gaussian(likelihood):
def _get_param_names(self): def _get_param_names(self):
return ["noise_variance"] return ["noise_variance"]
def _set_params(self,x): def _set_params(self, x):
self._variance = float(x) x = float(x)
self.covariance_matrix = np.eye(self.N)*self._variance if self._variance != x:
self.precision = 1./self._variance self._variance = x
self.covariance_matrix = np.eye(self.N) * self._variance
self.precision = 1. / self._variance
self.V = (self.precision) * self.Y
def predictive_values(self,mu,var, full_cov): def predictive_values(self, mu, var, full_cov):
""" """
Un-normalize the prediction and add the likelihood variance, then return the 5%, 95% interval Un-normalize the prediction and add the likelihood variance, then return the 5%, 95% interval
""" """
mean = mu*self._scale + self._bias mean = mu * self._scale + self._bias
if full_cov: if full_cov:
if self.D >1: if self.D > 1:
raise NotImplementedError, "TODO" raise NotImplementedError, "TODO"
#Note. for D>1, we need to re-normalise all the outputs independently. # Note. for D>1, we need to re-normalise all the outputs independently.
# This will mess up computations of diag(true_var), below. # This will mess up computations of diag(true_var), below.
#note that the upper, lower quantiles should be the same shape as mean # note that the upper, lower quantiles should be the same shape as mean
true_var = (var + np.eye(var.shape[0])*self._variance)*self._scale**2 true_var = (var + np.eye(var.shape[0]) * self._variance) * self._scale ** 2
_5pc = mean - 2.*np.sqrt(np.diag(true_var)) _5pc = mean - 2.*np.sqrt(np.diag(true_var))
_95pc = mean + 2.*np.sqrt(np.diag(true_var)) _95pc = mean + 2.*np.sqrt(np.diag(true_var))
else: else:
true_var = (var + self._variance)*self._scale**2 true_var = (var + self._variance) * self._scale ** 2
_5pc = mean - 2.*np.sqrt(true_var) _5pc = mean - 2.*np.sqrt(true_var)
_95pc = mean + 2.*np.sqrt(true_var) _95pc = mean + 2.*np.sqrt(true_var)
return mean, true_var, _5pc, _95pc return mean, true_var, _5pc, _95pc
@ -80,5 +84,5 @@ class Gaussian(likelihood):
""" """
pass pass
def _gradients(self,partial): def _gradients(self, partial):
return np.sum(partial) return np.sum(partial)

View file

@ -16,6 +16,7 @@ class likelihood:
self.is_heteroscedastic : enables significant computational savings in GP self.is_heteroscedastic : enables significant computational savings in GP
self.precision : a scalar or vector representation of the effective target precision self.precision : a scalar or vector representation of the effective target precision
self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N
self.V : self.precision * self.Y
""" """
def __init__(self,data): def __init__(self,data):
raise ValueError, "this class is not to be instantiated" raise ValueError, "this class is not to be instantiated"

View file

@ -54,6 +54,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
self._savedgradients = [] self._savedgradients = []
self._savederrors = [] self._savederrors = []
self._savedpsiKmm = [] self._savedpsiKmm = []
self._savedABCD = []
sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs) sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs)
@ -135,6 +136,17 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
self._savedparams.append([self.f_call, self._get_params()]) self._savedparams.append([self.f_call, self._get_params()])
self._savedgradients.append([self.f_call, self._log_likelihood_gradients()]) self._savedgradients.append([self.f_call, self._log_likelihood_gradients()])
self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]]) self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]])
sf2 = self.scale_factor ** 2
if self.likelihood.is_heteroscedastic:
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y)
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
else:
A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5 * self.M * np.log(sf2))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
self._savedABCD.append([self.f_call, A, B, C, D])
# print "\nkl:", kl, "ll:", ll # print "\nkl:", kl, "ll:", ll
return ll - kl return ll - kl
@ -169,7 +181,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w') ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
return ax return ax
def plot_X_1d(self, fig=None, axes=None, fig_num="MRD X 1d", colors=None): def plot_X_1d(self, fig=None, axes=None, fig_num="LVM mu S 1d", colors=None):
""" """
Plot latent space X in 1D: Plot latent space X in 1D:
@ -196,7 +208,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
else: else:
ax = axes[i] ax = axes[i]
ax.plot(self.X, c='k', alpha=.3) ax.plot(self.X, c='k', alpha=.3)
plots.extend(ax.plot(self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{}}}$".format(i))) plots.extend(ax.plot(self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
ax.fill_between(np.arange(self.X.shape[0]), ax.fill_between(np.arange(self.X.shape[0]),
self.X.T[i] - 2 * np.sqrt(self.X_variance.T[i]), self.X.T[i] - 2 * np.sqrt(self.X_variance.T[i]),
self.X.T[i] + 2 * np.sqrt(self.X_variance.T[i]), self.X.T[i] + 2 * np.sqrt(self.X_variance.T[i]),
@ -251,6 +263,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
gradient_dict = dict(self._savedgradients) gradient_dict = dict(self._savedgradients)
kmm_dict = dict(self._savedpsiKmm) kmm_dict = dict(self._savedpsiKmm)
iters = np.array(param_dict.keys()) iters = np.array(param_dict.keys())
ABCD_dict = np.array(self._savedABCD)
self.showing = 0 self.showing = 0
# ax2 = pylab.subplot2grid(splotshape, (1, 0), 2, 4) # ax2 = pylab.subplot2grid(splotshape, (1, 0), 2, 4)
@ -281,14 +294,26 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
ha='center', va='center') ha='center', va='center')
figs[-1].canvas.draw() figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(.15, 0, 1, .86)) figs[-1].tight_layout(rect=(.15, 0, 1, .86))
# figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6)))
# fig = figs[-1]
# ax6 = fig.add_subplot(121)
# ax6.text(.5, .5, r"${\mathbf{K}_{mm}}$", color='magenta', alpha=.5, transform=ax6.transAxes,
# ha='center', va='center')
# ax7 = fig.add_subplot(122)
# ax7.text(.5, .5, r"${\frac{dL}{dK_{mm}}}$", color='magenta', alpha=.5, transform=ax7.transAxes,
# ha='center', va='center')
figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6))) figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6)))
fig = figs[-1] fig = figs[-1]
ax6 = fig.add_subplot(121) ax8 = fig.add_subplot(121)
ax6.text(.5, .5, r"${\mathbf{K}_{mm}}$", color='magenta', alpha=.5, transform=ax6.transAxes, ax8.text(.5, .5, r"${\mathbf{A,B,C,D}}$", color='k', alpha=.5, transform=ax8.transAxes,
ha='center', va='center')
ax7 = fig.add_subplot(122)
ax7.text(.5, .5, r"${\frac{dL}{dK_{mm}}}$", color='magenta', alpha=.5, transform=ax7.transAxes,
ha='center', va='center') ha='center', va='center')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 1], label='A')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 2], label='B')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 3], label='C')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 4], label='D')
ax8.legend()
figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(.15, 0, 1, .86))
X, S, Z, theta = self._debug_filter_params(param_dict[self.showing]) X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing]) Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
@ -330,16 +355,16 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
ax5.set_xticks(np.arange(len(theta))) ax5.set_xticks(np.arange(len(theta)))
ax5.set_xticklabels(self._get_param_names()[-len(theta):], rotation=17) ax5.set_xticklabels(self._get_param_names()[-len(theta):], rotation=17)
imkmm = ax6.imshow(kmm_dict[self.showing][0]) # imkmm = ax6.imshow(kmm_dict[self.showing][0])
from mpl_toolkits.axes_grid1 import make_axes_locatable # from mpl_toolkits.axes_grid1 import make_axes_locatable
divider = make_axes_locatable(ax6) # divider = make_axes_locatable(ax6)
caxkmm = divider.append_axes("right", "5%", pad="1%") # caxkmm = divider.append_axes("right", "5%", pad="1%")
cbarkmm = pylab.colorbar(imkmm, cax=caxkmm) # cbarkmm = pylab.colorbar(imkmm, cax=caxkmm)
#
imkmmdl = ax7.imshow(kmm_dict[self.showing][1]) # imkmmdl = ax7.imshow(kmm_dict[self.showing][1])
divider = make_axes_locatable(ax7) # divider = make_axes_locatable(ax7)
caxkmmdl = divider.append_axes("right", "5%", pad="1%") # caxkmmdl = divider.append_axes("right", "5%", pad="1%")
cbarkmmdl = pylab.colorbar(imkmmdl, cax=caxkmmdl) # cbarkmmdl = pylab.colorbar(imkmmdl, cax=caxkmmdl)
# Qleg = ax1.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)], # Qleg = ax1.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
# loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.15, 1, 1.15), # loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.15, 1, 1.15),
@ -420,13 +445,13 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
thetagrads.set_offsets(np.array([xtheta.ravel(), theta.ravel()]).T) thetagrads.set_offsets(np.array([xtheta.ravel(), theta.ravel()]).T)
thetagrads.set_UVC(Utheta, thetag) thetagrads.set_UVC(Utheta, thetag)
imkmm.set_data(kmm_dict[self.showing][0]) # imkmm.set_data(kmm_dict[self.showing][0])
imkmm.autoscale() # imkmm.autoscale()
cbarkmm.update_normal(imkmm) # cbarkmm.update_normal(imkmm)
#
imkmmdl.set_data(kmm_dict[self.showing][1]) # imkmmdl.set_data(kmm_dict[self.showing][1])
imkmmdl.autoscale() # imkmmdl.autoscale()
cbarkmmdl.update_normal(imkmmdl) # cbarkmmdl.update_normal(imkmmdl)
ax2.relim() ax2.relim()
# ax3.relim() # ax3.relim()
@ -452,4 +477,4 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
cidp = figs[0].canvas.mpl_connect('button_press_event', onclick) cidp = figs[0].canvas.mpl_connect('button_press_event', onclick)
cidd = figs[0].canvas.mpl_connect('motion_notify_event', motion) cidd = figs[0].canvas.mpl_connect('motion_notify_event', motion)
return ax1, ax2, ax3, ax4, ax5, ax6, ax7 return ax1, ax2, ax3, ax4, ax5 # , ax6, ax7

248
GPy/models/FITC.py Normal file
View file

@ -0,0 +1,248 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, tdot, symmetrify,pdinv
#from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
from ..util.plot import gpplot
from .. import kern
from scipy import stats, linalg
from sparse_GP import sparse_GP
def backsub_both_sides(L,X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1)
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T
class FITC(sparse_GP):
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
self.Z = Z
self.M = self.Z.shape[0]
self.true_precision = likelihood.precision
#ERASEME
#N = likelihood.Y.size
#self.beta_star = np.random.rand(N,1)
#self.Kmm_ = kernel.K(self.Z).copy()
#self.Kmmi_,a,b,c = pdinv(self.Kmm_)
#self.psi1_ = kernel.K(self.Z,X).copy()
sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False)
def _set_params(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
self._compute_kernel_matrices()
self.scale_factor = 1.
self._computations()
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
this function does nothing
Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in sparse_GP.
The true precison is now 'true_precision' not 'precision'.
"""
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP
def _computations(self):
#factor Kmm
self.Lm = jitchol(self.Kmm)
self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1)
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1)
self.Diag0 = self.psi0 - np.diag(self.Qnn)
#TODO eraseme
"""
self.psi1 = self.psi1_
self.Lm = jitchol(self.Kmm_)
self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1)
#self.true_psi1 = self.kern.K(self.Z,self.X)
#self.Qnn = mdot(self.true_psi1.T,self.Lmi.T,self.Lmi,self.true_psi1)
self.Kmmi, a,b,c = pdinv(self.Kmm)
self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1)
#self.Diag0 = self.psi0 #- np.diag(self.Qnn)
self.Diag0 = - np.diag(self.Qnn)
#Kmmi,Lm,Lmi,logdetK = pdinv(self.Kmm)
#self.Lambda = self.Kmmi_ + mdot(self.Kmmi_,self.psi1_,self.beta_star*self.psi1_.T,self.Kmmi_) + np.eye(self.M)*100
#self.Lambdai, LLm, LLmi, self.logdetLambda = pdinv(self.Lambda)
"""
#TODO uncomment
self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #Includes Diag0 in the precision
self.V_star = self.beta_star * self.likelihood.Y
# The rather complex computations of self.A
if self.has_uncertain_inputs:
raise NotImplementedError
else:
if self.likelihood.is_heteroscedastic:
assert self.likelihood.D == 1 # TODO: what if the likelihood is heterscedatic and there are multiple independent outputs?
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.N)))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
# factor B
self.B = np.eye(self.M) + self.A
self.LB = jitchol(self.B)
self.LBi,info = linalg.lapack.flapack.dtrtrs(self.LB,np.eye(self.M),lower=1)
self.psi1V = np.dot(self.psi1, self.V_star)
# back substutue C into psi1V
tmp, info1 = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(tmp), lower=1, trans=0)
# dlogbeta_dtheta
Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1)
b_psi1_Ki = self.beta_star * Kmmipsi1.T
Ki_pbp_Ki = np.dot(Kmmipsi1,b_psi1_Ki)
dlogB_dpsi0 = -.5*self.kern.dKdiag_dtheta(self.beta_star,X=self.X)
dlogB_dpsi1 = self.kern.dK_dtheta(b_psi1_Ki,self.X,self.Z)
dlogB_dKmm = -.5*self.kern.dK_dtheta(Ki_pbp_Ki,X=self.Z)
self.dlogbeta_dtheta = dlogB_dpsi0 + dlogB_dpsi1 + dlogB_dKmm
# dyby_dtheta
Kmmi = np.dot(self.Lmi.T,self.Lmi)
VVT = np.outer(self.V_star,self.V_star)
VV_p_Ki = np.dot(VVT,Kmmipsi1.T)
Ki_pVVp_Ki = np.dot(Kmmipsi1,VV_p_Ki)
dyby_dpsi0 = .5 * self.kern.dKdiag_dtheta(self.V_star**2,self.X)
dyby_dpsi1 = 0
dyby_dKmm = 0
dyby_dtheta = dyby_dpsi0
for psi1_n,V_n,X_n in zip(self.psi1.T,self.V_star,self.X):
dyby_dpsi1 = -V_n**2 * np.dot(psi1_n[None,:],Kmmi)
dyby_dtheta += self.kern.dK_dtheta(dyby_dpsi1,X_n[:,None],self.Z)
for psi1_n,V_n,X_n in zip(self.psi1.T,self.V_star,self.X):
psin_K = np.dot(psi1_n[None,:],Kmmi)
tmp = np.dot(psin_K.T,psin_K)
dyby_dKmm = .5*V_n**2 * tmp
dyby_dtheta += self.kern.dK_dtheta(dyby_dKmm,self.Z)
self.dyby_dtheta = dyby_dtheta
# dlogB_dtheta : C
#C_B
dC_B = -.5*Kmmi
C_B = self.kern.dK_dtheta(dC_B,self.Z) #check
#C_A
LBiLmi = np.dot(self.LBi,self.Lmi)
LBL_inv = np.dot(LBiLmi.T,LBiLmi)
dC_AA = .5*LBL_inv
C_AA = self.kern.dK_dtheta(dC_AA,self.Z) #check
#C_AB
psi1beta = self.psi1*self.beta_star.T
dC_ABA = mdot(LBL_inv,psi1beta,Kmmipsi1.T)
C_ABA = self.kern.dK_dtheta(dC_ABA,self.Z)
dC_ABB = -np.dot(psi1beta.T,LBL_inv) #check
C_ABB = self.kern.dK_dtheta(dC_ABB,self.X,self.Z) #check
# C_ABC
betapsi1TLmiLBi = np.dot(psi1beta.T,LBiLmi.T)
alpha = np.array([np.dot(a.T,a) for a in betapsi1TLmiLBi])[:,None]
dC_ABCA = .5 *alpha
C_ABCA = self.kern.dKdiag_dtheta(dC_ABCA,self.X) #check
C_ABCB = 0
for psi1_n,alpha_n,X_n in zip(self.psi1.T,alpha,self.X):
dC_ABCB_n = - alpha_n * np.dot(psi1_n[None,:],Kmmi)
C_ABCB += self.kern.dK_dtheta(dC_ABCB_n,X_n[:,None],self.Z) #check
C_ABCC = 0
for psi1_n,alpha_n,X_n in zip(self.psi1.T,alpha,self.X):
psin_K = np.dot(psi1_n[None,:],Kmmi)
tmp = np.dot(psin_K.T,psin_K)
dC_ABCC = .5 * alpha_n * tmp
C_ABCC += self.kern.dK_dtheta(dC_ABCC,self.Z) #check
self.dlogB_dtheta = C_B + C_AA + C_ABA + C_ABB + C_ABCA + C_ABCB + C_ABCC
# the partial derivative vector for the likelihood
if self.likelihood.Nparams == 0:
# save computation here.
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented"
else:
# likelihood is not heterscedatic
self.partial_for_likelihood = 0 #FIXME
#self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
#self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
#self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
C = -self.D * (np.sum(np.log(np.diag(self.LB))))
"""
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
#B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
C = -self.D * (np.sum(np.log(np.diag(self.LB))))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + C + D # +B
"""
return A+C
def _log_likelihood_gradients(self):
pass
return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
def dL_dtheta(self):
#dL_dtheta = self.dlogB_dtheta
#dL_dtheta = self.dyby_dtheta
#dL_dtheta = self.dlogbeta_dtheta + self.dyby_dtheta
dL_dtheta = self.dlogB_dtheta
dL_dtheta = self.dlogbeta_dtheta + self.dyby_dtheta + self.dlogB_dtheta
"""
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else:
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.Z, self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
"""
return dL_dtheta
def dL_dZ(self):
dL_dZ = np.zeros(self.M)
"""
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else:
dL_dZ += self.kern.dK_dX(self.dL_dpsi1, self.Z, self.X)
"""
return dL_dZ

View file

@ -12,3 +12,4 @@ from sparse_GPLVM import sparse_GPLVM
from Bayesian_GPLVM import Bayesian_GPLVM from Bayesian_GPLVM import Bayesian_GPLVM
from mrd import MRD from mrd import MRD
from generalized_FITC import generalized_FITC from generalized_FITC import generalized_FITC
#from FITC import FITC

View file

@ -287,6 +287,9 @@ class MRD(model):
else: else:
return pylab.gcf() return pylab.gcf()
def plot_X_1d(self):
return self.gref.plot_X_1d()
def plot_X(self, fig_num="MRD Predictions", axes=None): def plot_X(self, fig_num="MRD Predictions", axes=None):
fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.X)) fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.X))
return fig return fig

View file

@ -9,10 +9,10 @@ from .. import kern
from GP import GP from GP import GP
from scipy import linalg from scipy import linalg
def backsub_both_sides(L,X): def backsub_both_sides(L, X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky""" """ Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1) tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1)
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T
class sparse_GP(GP): class sparse_GP(GP):
""" """
@ -35,22 +35,22 @@ class sparse_GP(GP):
""" """
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False): def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
self.scale_factor = 100.0# a scaling factor to help keep the algorithm stable # self.scale_factor = 100.0 # a scaling factor to help keep the algorithm stable
self.auto_scale_factor = False # self.auto_scale_factor = False
self.Z = Z self.Z = Z
self.M = Z.shape[0] self.M = Z.shape[0]
self.likelihood = likelihood self.likelihood = likelihood
if X_variance is None: if X_variance is None:
self.has_uncertain_inputs=False self.has_uncertain_inputs = False
else: else:
assert X_variance.shape==X.shape assert X_variance.shape == X.shape
self.has_uncertain_inputs=True self.has_uncertain_inputs = True
self.X_variance = X_variance self.X_variance = X_variance
GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X) GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X)
#normalize X uncertainty also # normalize X uncertainty also
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.X_variance /= np.square(self._Xstd) self.X_variance /= np.square(self._Xstd)
@ -59,141 +59,152 @@ class sparse_GP(GP):
# kernel computations, using BGPLVM notation # kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z) self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_variance) self.psi0 = self.kern.psi0(self.Z, self.X, self.X_variance)
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_variance).T self.psi1 = self.kern.psi1(self.Z, self.X, self.X_variance).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_variance) self.psi2 = self.kern.psi2(self.Z, self.X, self.X_variance)
else: else:
self.psi0 = self.kern.Kdiag(self.X) self.psi0 = self.kern.Kdiag(self.X)
self.psi1 = self.kern.K(self.Z,self.X) self.psi1 = self.kern.K(self.Z, self.X)
self.psi2 = None self.psi2 = None
def _computations(self): def _computations(self):
sf = self.scale_factor # sf = self.scale_factor
sf2 = sf**2 # sf2 = sf ** 2
#factor Kmm # factor Kmm
self.Lm = jitchol(self.Kmm) self.Lm = jitchol(self.Kmm)
#The rather complex computations of self.A # The rather complex computations of self.A
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
assert self.likelihood.D == 1 #TODO: what if the likelihood is heterscedatic and there are multiple independent outputs? assert self.likelihood.D == 1 # TODO: what if the likelihood is heterscedatic and there are multiple independent outputs?
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0) # psi2_beta_scaled = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1) / sf2)).sum(0)
psi2_beta_scaled = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1))).sum(0)
evals, evecs = linalg.eigh(psi2_beta_scaled) evals, evecs = linalg.eigh(psi2_beta_scaled)
clipped_evals = np.clip(evals,0.,1e6) # TODO: make clipping configurable clipped_evals = np.clip(evals, 0., 1e15) # TODO: make clipping configurable
if not np.allclose(evals, clipped_evals): if not np.allclose(evals, clipped_evals):
print "Warning: clipping posterior eigenvalues" print "Warning: clipping posterior eigenvalues"
tmp = evecs*np.sqrt(clipped_evals) tmp = evecs * np.sqrt(clipped_evals)
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1) tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp) self.A = tdot(tmp)
else: else:
tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf) # tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N)) / sf)
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1) tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N)))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp) self.A = tdot(tmp)
else: else:
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0) # psi2_beta_scaled = (self.psi2 * (self.likelihood.precision / sf2)).sum(0)
psi2_beta_scaled = (self.psi2 * (self.likelihood.precision)).sum(0)
evals, evecs = linalg.eigh(psi2_beta_scaled) evals, evecs = linalg.eigh(psi2_beta_scaled)
clipped_evals = np.clip(evals,0.,1e6) # TODO: make clipping configurable clipped_evals = np.clip(evals, 0., 1e15) # TODO: make clipping configurable
if not np.allclose(evals, clipped_evals): if not np.allclose(evals, clipped_evals):
print "Warning: clipping posterior eigenvalues" print "Warning: clipping posterior eigenvalues"
tmp = evecs*np.sqrt(clipped_evals) tmp = evecs * np.sqrt(clipped_evals)
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1) tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp) self.A = tdot(tmp)
else: else:
tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf) # tmp = self.psi1 * (np.sqrt(self.likelihood.precision) / sf)
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1) tmp = self.psi1 * (np.sqrt(self.likelihood.precision))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp) self.A = tdot(tmp)
#factor B # factor B
self.B = np.eye(self.M)/sf2 + self.A # self.B = np.eye(self.M) / sf2 + self.A
self.B = np.eye(self.M) + self.A
self.LB = jitchol(self.B) self.LB = jitchol(self.B)
self.V = (self.likelihood.precision/self.scale_factor)*self.likelihood.Y # TODO: make a switch for either first compute psi1V, or VV.T
self.psi1V = np.dot(self.psi1, self.V) self.psi1V = np.dot(self.psi1, self.likelihood.V)
#back substutue C into psi1V # back substutue C into psi1V
tmp,info1 = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.psi1V),lower=1,trans=0) tmp, info1 = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
self._LBi_Lmi_psi1V,_ = linalg.lapack.flapack.dtrtrs(self.LB,np.asfortranarray(tmp),lower=1,trans=0) self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(tmp), lower=1, trans=0)
tmp,info2 = linalg.lapack.flapack.dpotrs(self.LB,tmp,lower=1) tmp, info2 = linalg.lapack.flapack.dpotrs(self.LB, tmp, lower=1)
self.Cpsi1V,info3 = linalg.lapack.flapack.dtrtrs(self.Lm,tmp,lower=1,trans=1) self.Cpsi1V, info3 = linalg.lapack.flapack.dtrtrs(self.Lm, tmp, lower=1, trans=1)
# Compute dL_dKmm # Compute dL_dKmm
tmp = tdot(self._LBi_Lmi_psi1V) tmp = tdot(self._LBi_Lmi_psi1V)
self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.D*np.eye(self.M) + tmp) self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.D * np.eye(self.M) + tmp)
tmp = -0.5*self.DBi_plus_BiPBi/sf2 # tmp = -0.5 * self.DBi_plus_BiPBi / sf2
tmp += -0.5*self.B*sf2*self.D # tmp += -0.5 * self.B * sf2 * self.D
tmp += self.D*np.eye(self.M) tmp = -0.5 * self.DBi_plus_BiPBi
self.dL_dKmm = backsub_both_sides(self.Lm,tmp) tmp += -0.5 * self.B * self.D
tmp += self.D * np.eye(self.M)
self.dL_dKmm = backsub_both_sides(self.Lm, tmp)
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten() self.dL_dpsi0 = -0.5 * self.D * (self.likelihood.precision * np.ones([self.N, 1])).flatten()
self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T) self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T)
dL_dpsi2_beta = 0.5*backsub_both_sides(self.Lm,self.D*np.eye(self.M) - self.DBi_plus_BiPBi) dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.D * np.eye(self.M) - self.DBi_plus_BiPBi)
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.dL_dpsi2 = self.likelihood.precision[:,None,None]*dL_dpsi2_beta[None,:,:] self.dL_dpsi2 = self.likelihood.precision[:, None, None] * dL_dpsi2_beta[None, :, :]
else: else:
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta,self.psi1*self.likelihood.precision.reshape(1,self.N)) self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, self.psi1 * self.likelihood.precision.reshape(1, self.N))
self.dL_dpsi2 = None self.dL_dpsi2 = None
else: else:
dL_dpsi2 = self.likelihood.precision*dL_dpsi2_beta dL_dpsi2 = self.likelihood.precision * dL_dpsi2_beta
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
#repeat for each of the N psi_2 matrices # repeat for each of the N psi_2 matrices
self.dL_dpsi2 = np.repeat(dL_dpsi2[None,:,:],self.N,axis=0) self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.N, axis=0)
else: else:
#subsume back into psi1 (==Kmn) # subsume back into psi1 (==Kmn)
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2,self.psi1) self.dL_dpsi1 += 2.*np.dot(dL_dpsi2, self.psi1)
self.dL_dpsi2 = None self.dL_dpsi2 = None
#the partial derivative vector for the likelihood # the partial derivative vector for the likelihood
if self.likelihood.Nparams ==0: if self.likelihood.Nparams == 0:
#save computation here. # save computation here.
self.partial_for_likelihood = None self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic: elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented" raise NotImplementedError, "heteroscedatic derivates not implemented"
else: else:
#likelihood is not heterscedatic # likelihood is not heterscedatic
self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * self.likelihood.trYYT*self.likelihood.precision**2 self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2) # self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision * sf2)
self.partial_for_likelihood += self.likelihood.precision*(0.5*np.sum(self.A*self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V))) self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
def log_likelihood(self): def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """ """ Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor**2 # sf2 = self.scale_factor ** 2
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y) A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y)
B = -0.5*self.D*(np.sum(self.likelihood.precision.flatten()*self.psi0) - np.trace(self.A)*sf2) # B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
else: else:
A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2) # B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.M*np.log(sf2)) B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
D = 0.5*np.sum(np.square(self._LBi_Lmi_psi1V)) # C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5 * self.M * np.log(sf2))
return A+B+C+D C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + B + C + D
def _set_params(self, p): def _set_params(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q) self.Z = p[:self.M * self.Q].reshape(self.M, self.Q)
self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam]) self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:]) self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:])
self._compute_kernel_matrices() self._compute_kernel_matrices()
#if self.auto_scale_factor: # if self.auto_scale_factor:
# self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision) # self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
#if self.auto_scale_factor: # if self.auto_scale_factor:
#if self.likelihood.is_heteroscedastic: # if self.likelihood.is_heteroscedastic:
#self.scale_factor = max(100,np.sqrt(self.psi2_beta_scaled.sum(0).mean())) # self.scale_factor = max(100,np.sqrt(self.psi2_beta_scaled.sum(0).mean()))
#else: # else:
#self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision) # self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
self.scale_factor = 1. # self.scale_factor = 100.
self._computations() self._computations()
def _get_params(self): def _get_params(self):
return np.hstack([self.Z.flatten(),GP._get_params(self)]) return np.hstack([self.Z.flatten(), GP._get_params(self)])
def _get_param_names(self): def _get_param_names(self):
return sum([['iip_%i_%i'%(i,j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])],[]) + GP._get_param_names(self) return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])], []) + GP._get_param_names(self)
def update_likelihood_approximation(self): def update_likelihood_approximation(self):
""" """
@ -205,9 +216,9 @@ class sparse_GP(GP):
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
raise NotImplementedError, "EP approximation not implemented for uncertain inputs" raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
else: else:
self.likelihood.fit_DTC(self.Kmm,self.psi1) self.likelihood.fit_DTC(self.Kmm, self.psi1)
#self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) # self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP self._set_params(self._get_params()) # update the GP
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
@ -217,13 +228,13 @@ class sparse_GP(GP):
""" """
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
""" """
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z) dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_variance) dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_variance) dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z,self.X, self.X_variance) dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else: else:
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1,self.Z,self.X) dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.Z, self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X) dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
return dL_dtheta return dL_dtheta
@ -243,17 +254,17 @@ class sparse_GP(GP):
def _raw_predict(self, Xnew, which_parts='all', full_cov=False): def _raw_predict(self, Xnew, which_parts='all', full_cov=False):
"""Internal helper function for making predictions, does not account for normalization""" """Internal helper function for making predictions, does not account for normalization"""
Bi,_ = linalg.lapack.flapack.dpotri(self.LB,lower=0) # WTH? this lower switch should be 1, but that doesn't work! Bi, _ = linalg.lapack.flapack.dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work!
symmetrify(Bi) symmetrify(Bi)
Kmmi_LmiBLmi = backsub_both_sides(self.Lm,np.eye(self.M) - Bi) Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.M) - Bi)
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts) Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
mu = np.dot(Kx.T, self.Cpsi1V/self.scale_factor) mu = np.dot(Kx.T, self.Cpsi1V) # / self.scale_factor)
if full_cov: if full_cov:
Kxx = self.kern.K(Xnew,which_parts=which_parts) Kxx = self.kern.K(Xnew, which_parts=which_parts)
var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) #NOTE this won't work for plotting var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting
else: else:
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts) Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
var = Kxx - np.sum(Kx*np.dot(Kmmi_LmiBLmi, Kx),0) var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
return mu,var[:,None] return mu, var[:, None]

View file

@ -9,6 +9,7 @@ from GPy.inference.conjugate_gradient_descent import CGD, RUNNING
import pylab import pylab
import time import time
from scipy.optimize.optimize import rosen, rosen_der from scipy.optimize.optimize import rosen, rosen_der
from GPy.inference.gradient_descent_update_rules import PolakRibiere
class Test(unittest.TestCase): class Test(unittest.TestCase):
@ -25,12 +26,12 @@ class Test(unittest.TestCase):
restarts = 10 restarts = 10
for _ in range(restarts): for _ in range(restarts):
try: try:
x0 = numpy.random.randn(N) * 300 x0 = numpy.random.randn(N) * 10
res = opt.opt(f, df, x0, messages=0, res = opt.opt(f, df, x0, messages=0, maxiter=1000, gtol=1e-15)
maxiter=1000, gtol=1e-10) assert numpy.allclose(res[0], 0, atol=1e-5)
assert numpy.allclose(res[0], 0, atol=1e-3)
break break
except: except AssertionError:
import ipdb;ipdb.set_trace()
# RESTART # RESTART
pass pass
else: else:
@ -46,9 +47,9 @@ class Test(unittest.TestCase):
restarts = 10 restarts = 10
for _ in range(restarts): for _ in range(restarts):
try: try:
x0 = numpy.random.randn(N) * .5 x0 = (numpy.random.randn(N) * .5) + numpy.ones(N)
res = opt.opt(f, df, x0, messages=0, res = opt.opt(f, df, x0, messages=0,
maxiter=5e2, gtol=1e-2) maxiter=1e3, gtol=1e-12)
assert numpy.allclose(res[0], 1, atol=.1) assert numpy.allclose(res[0], 1, atol=.1)
break break
except: except:
@ -67,14 +68,16 @@ if __name__ == "__main__":
N = 2 N = 2
A = numpy.random.rand(N) * numpy.eye(N) A = numpy.random.rand(N) * numpy.eye(N)
b = numpy.random.rand(N) * 0 b = numpy.random.rand(N) * 0
# f = lambda x: numpy.dot(x.T.dot(A), x) - numpy.dot(x.T, b) f = lambda x: numpy.dot(x.T.dot(A), x) - numpy.dot(x.T, b)
# df = lambda x: numpy.dot(A, x) - b df = lambda x: numpy.dot(A, x) - b
f = rosen # f = rosen
df = rosen_der # df = rosen_der
x0 = numpy.random.randn(N) * .5 x0 = (numpy.random.randn(N) * .5) + numpy.ones(N)
print x0
opt = CGD() opt = CGD()
pylab.ion()
fig = pylab.figure("cgd optimize") fig = pylab.figure("cgd optimize")
if fig.axes: if fig.axes:
ax = fig.axes[0] ax = fig.axes[0]
@ -83,13 +86,14 @@ if __name__ == "__main__":
ax = fig.add_subplot(111, projection='3d') ax = fig.add_subplot(111, projection='3d')
interpolation = 40 interpolation = 40
# x, y = numpy.linspace(.5, 1.5, interpolation)[:, None], numpy.linspace(.5, 1.5, interpolation)[:, None]
x, y = numpy.linspace(-1, 1, interpolation)[:, None], numpy.linspace(-1, 1, interpolation)[:, None] x, y = numpy.linspace(-1, 1, interpolation)[:, None], numpy.linspace(-1, 1, interpolation)[:, None]
X, Y = numpy.meshgrid(x, y) X, Y = numpy.meshgrid(x, y)
fXY = numpy.array([f(numpy.array([x, y])) for x, y in zip(X.flatten(), Y.flatten())]).reshape(interpolation, interpolation) fXY = numpy.array([f(numpy.array([x, y])) for x, y in zip(X.flatten(), Y.flatten())]).reshape(interpolation, interpolation)
ax.plot_wireframe(X, Y, fXY) ax.plot_wireframe(X, Y, fXY)
xopts = [x0.copy()] xopts = [x0.copy()]
optplts, = ax.plot3D([x0[0]], [x0[1]], zs=f(x0), marker='o', color='r') optplts, = ax.plot3D([x0[0]], [x0[1]], zs=f(x0), marker='', color='r')
raw_input("enter to start optimize") raw_input("enter to start optimize")
res = [0] res = [0]
@ -102,11 +106,7 @@ if __name__ == "__main__":
if r[-1] != RUNNING: if r[-1] != RUNNING:
res[0] = r res[0] = r
p, c = opt.opt_async(f, df, x0.copy(), callback, messages=True, maxiter=1000, res[0] = opt.opt(f, df, x0.copy(), callback, messages=True, maxiter=1000,
report_every=20, gtol=1e-12) report_every=7, gtol=1e-12, update_rule=PolakRibiere)
pylab.ion()
pylab.show()
pass pass

View file

@ -16,7 +16,7 @@ import cPickle
import types import types
import ctypes import ctypes
from ctypes import byref, c_char, c_int, c_double # TODO from ctypes import byref, c_char, c_int, c_double # TODO
#import scipy.lib.lapack.flapack #import scipy.lib.lapack
import scipy as sp import scipy as sp
try: try:
@ -177,8 +177,8 @@ def PCA(Y, Q):
""" """
if not np.allclose(Y.mean(axis=0), 0.0): if not np.allclose(Y.mean(axis=0), 0.0):
print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)" print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)"
#Y -= Y.mean(axis=0) #Y -= Y.mean(axis=0)
Z = linalg.svd(Y-Y.mean(axis=0), full_matrices = False) Z = linalg.svd(Y-Y.mean(axis=0), full_matrices = False)
[X, W] = [Z[0][:,0:Q], np.dot(np.diag(Z[1]), Z[2]).T[:,0:Q]] [X, W] = [Z[0][:,0:Q], np.dot(np.diag(Z[1]), Z[2]).T[:,0:Q]]