Ricardo told me to do this

This commit is contained in:
Teo de Campos 2013-05-14 17:42:36 +01:00
commit 68041174cd
28 changed files with 730 additions and 318 deletions

View file

@ -50,7 +50,9 @@ class logexp_clipped(transformation):
ef = np.exp(f)
gf = (ef - 1.) / ef
return np.where(f < 1e-6, 0, gf)
def initialize(self, f):
def initialize(self,f):
if np.any(f<0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f)
def __str__(self):
return '(+ve)'
@ -65,6 +67,8 @@ class exponent(transformation):
def gradfactor(self, f):
return f
def initialize(self, f):
if np.any(f<0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f)
def __str__(self):
return '(+ve)'
@ -79,6 +83,8 @@ class negative_exponent(transformation):
def gradfactor(self, f):
return f
def initialize(self, f):
if np.any(f>0.):
print "Warning: changing parameters to satisfy constraints"
return -np.abs(f)
def __str__(self):
return '(-ve)'
@ -108,9 +114,11 @@ class logistic(transformation):
def finv(self, f):
return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf))
def gradfactor(self, f):
return (f - self.lower) * (self.upper - f) / self.difference
def initialize(self, f):
return self.f(f * 0.)
return (f-self.lower)*(self.upper-f)/self.difference
def initialize(self,f):
if np.any(np.logical_or(f<self.lower,f>self.upper)):
print "Warning: changing parameters to satisfy constraints"
return np.where(np.logical_or(f<self.lower,f>self.upper),self.f(f*0.),f)
def __str__(self):
return '({},{})'.format(self.lower, self.upper)

View file

@ -63,7 +63,7 @@ def GPLVM_oil_100(optimize=True):
m.plot_latent(labels=m.data_labels)
return m
def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300, plot=False):
def BGPLVM_oil(optimize=True, N=100, Q=10, M=20, max_f_eval=300, plot=False):
data = GPy.util.datasets.oil()
# create simple GP model
@ -72,19 +72,19 @@ def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300, plot=False):
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=kernel, M=M)
m.data_labels = data['Y'][:N].argmax(axis=1)
m.constrain('variance', logexp_clipped())
m.constrain('length', logexp_clipped())
m['lengt'] = 100.
m.ensure_default_constraints()
# optimize
if optimize:
m.constrain_fixed('noise', 1. / Y.var())
m.constrain('variance', logexp_clipped())
m['lengt'] = 1000
m.unconstrain('noise'); m.constrain_fixed('noise', Y.var() / 100.)
m.optimize('scg', messages=1, max_f_eval=150)
m.ensure_default_constraints()
m.optimize('scg', messages=1, max_f_eval=max(80, max_f_eval))
m.unconstrain('noise')
m.constrain_positive('noise')
m.optimize('scg', messages=1, max_f_eval=max(0, max_f_eval - 80))
else:
m.ensure_default_constraints()
m.constrain('noise', logexp_clipped())
m.optimize('scg', messages=1, max_f_eval=max_f_eval)
if plot:
y = m.likelihood.Y[0, :]
@ -92,7 +92,7 @@ def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300, plot=False):
plt.sca(latent_axes)
m.plot_latent()
data_show = GPy.util.visualize.vector_show(y)
lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], m, data_show, latent_axes=latent_axes) # , sense_axes=sense_axes)
lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes) # , sense_axes=sense_axes)
raw_input('Press enter to finish')
plt.close('all')
# # plot
@ -182,7 +182,7 @@ def bgplvm_simulation_matlab_compare():
Y = sim_data['Y']
S = sim_data['S']
mu = sim_data['mu']
M, [_, Q] = 20, mu.shape
M, [_, Q] = 3, mu.shape
from GPy.models import mrd
from GPy import kern
@ -192,7 +192,7 @@ def bgplvm_simulation_matlab_compare():
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k,
# X=mu,
# X_variance=S,
_debug=True)
_debug=False)
m.ensure_default_constraints()
m.auto_scale_factor = True
m['noise'] = Y.var() / 100.
@ -223,7 +223,7 @@ def bgplvm_simulation(burnin='scg', plot_sim=False,
Y = Ylist[0]
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q)
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q)
# k = kern.white(Q, .00001) + kern.bias(Q)
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True)
# m.set('noise',)
@ -358,7 +358,7 @@ def mrd_simulation(plot_sim=False):
# import ipdb; ipdb.set_trace()
# np.seterrcall(ipdbonerr)
return m # , mtest
return m # , mtest
def mrd_silhouette():
@ -371,12 +371,12 @@ def brendan_faces():
# optimize
m.ensure_default_constraints()
# m.optimize(messages=1, max_f_eval=10000)
m.optimize(messages=1, max_f_eval=10000)
ax = m.plot_latent()
y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :], m, data_show, ax)
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish')
plt.close('all')
@ -389,11 +389,12 @@ def stick():
# optimize
m.ensure_default_constraints()
m.optimize(messages=1, max_f_eval=10000)
m._set_params(m._get_params())
ax = m.plot_latent()
y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :], m, data_show, ax)
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish')
plt.close('all')
@ -415,7 +416,7 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True):
ax = m.plot_latent()
y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.skeleton_show(y[None, :], data['skel'])
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :], m, data_show, ax)
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish')
plt.close('all')

View file

@ -49,6 +49,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
function_eval = 1
fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient.
current_grad = np.dot(gradnew, gradnew)
gradold = gradnew.copy()
d = -gradnew # Initial search direction.
success = True # Force calculation of directional derivs.
@ -110,7 +111,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
iteration += 1
if display:
print '\r',
print 'i: {0:>5g} f:{1:> 12e} b:{2:> 12e} |g|:{3:> 12e}'.format(iteration, fnow, beta, np.dot(gradnew, gradnew)),
print 'i: {0:>5g} f:{1:> 12e} b:{2:> 12e} |g|:{3:> 12e}'.format(iteration, fnow, beta, current_grad),
# print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush()
@ -125,8 +126,9 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
fold = fnew
gradold = gradnew
gradnew = gradf(x, *optargs)
current_grad = np.dot(gradnew, gradnew)
# If the gradient is zero then we are done.
if (np.dot(gradnew, gradnew)) <= gtol:
if current_grad <= gtol:
status = 'converged'
return x, flog, function_eval, status

View file

@ -97,51 +97,66 @@ class opt_SGD(Optimizer):
return subset
def shift_constraints(self, j):
# back them up
bounded_i = copy.deepcopy(self.model.constrained_bounded_indices)
bounded_l = copy.deepcopy(self.model.constrained_bounded_lowers)
bounded_u = copy.deepcopy(self.model.constrained_bounded_uppers)
for b in range(len(bounded_i)): # for each group of constraints
for bc in range(len(bounded_i[b])):
pos = np.where(j == bounded_i[b][bc])[0]
constrained_indices = copy.deepcopy(self.model.constrained_indices)
for c, constraint in enumerate(constrained_indices):
mask = (np.ones_like(constrained_indices[c]) == 1)
for i in range(len(constrained_indices[c])):
pos = np.where(j == constrained_indices[c][i])[0]
if len(pos) == 1:
pos2 = np.where(self.model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0]
self.model.constrained_bounded_indices[b][pos2] = pos[0]
self.model.constrained_indices[c][i] = pos
else:
if len(self.model.constrained_bounded_indices[b]) == 1:
# if it's the last index to be removed
# the logic here is just a mess. If we remove the last one, then all the
# b-indices change and we have to iterate through everything to find our
# current index. Can't deal with this right now.
raise NotImplementedError
mask[i] = False
else: # just remove it from the indices
mask = self.model.constrained_bounded_indices[b] != bc
self.model.constrained_bounded_indices[b] = self.model.constrained_bounded_indices[b][mask]
self.model.constrained_indices[c] = self.model.constrained_indices[c][mask]
return constrained_indices
# back them up
# bounded_i = copy.deepcopy(self.model.constrained_bounded_indices)
# bounded_l = copy.deepcopy(self.model.constrained_bounded_lowers)
# bounded_u = copy.deepcopy(self.model.constrained_bounded_uppers)
# for b in range(len(bounded_i)): # for each group of constraints
# for bc in range(len(bounded_i[b])):
# pos = np.where(j == bounded_i[b][bc])[0]
# if len(pos) == 1:
# pos2 = np.where(self.model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0]
# self.model.constrained_bounded_indices[b][pos2] = pos[0]
# else:
# if len(self.model.constrained_bounded_indices[b]) == 1:
# # if it's the last index to be removed
# # the logic here is just a mess. If we remove the last one, then all the
# # b-indices change and we have to iterate through everything to find our
# # current index. Can't deal with this right now.
# raise NotImplementedError
# else: # just remove it from the indices
# mask = self.model.constrained_bounded_indices[b] != bc
# self.model.constrained_bounded_indices[b] = self.model.constrained_bounded_indices[b][mask]
# here we shif the positive constraints. We cycle through each positive
# constraint
positive = self.model.constrained_positive_indices.copy()
mask = (np.ones_like(positive) == 1)
for p in range(len(positive)):
# we now check whether the constrained index appears in the j vector
# (the vector of the "active" indices)
pos = np.where(j == self.model.constrained_positive_indices[p])[0]
if len(pos) == 1:
self.model.constrained_positive_indices[p] = pos
else:
mask[p] = False
self.model.constrained_positive_indices = self.model.constrained_positive_indices[mask]
# # here we shif the positive constraints. We cycle through each positive
# # constraint
# positive = self.model.constrained_positive_indices.copy()
# mask = (np.ones_like(positive) == 1)
# for p in range(len(positive)):
# # we now check whether the constrained index appears in the j vector
# # (the vector of the "active" indices)
# pos = np.where(j == self.model.constrained_positive_indices[p])[0]
# if len(pos) == 1:
# self.model.constrained_positive_indices[p] = pos
# else:
# mask[p] = False
# self.model.constrained_positive_indices = self.model.constrained_positive_indices[mask]
return (bounded_i, bounded_l, bounded_u), positive
# return (bounded_i, bounded_l, bounded_u), positive
def restore_constraints(self, b, p):
self.model.constrained_bounded_indices = b[0]
self.model.constrained_bounded_lowers = b[1]
self.model.constrained_bounded_uppers = b[2]
self.model.constrained_positive_indices = p
def restore_constraints(self, c):#b, p):
# self.model.constrained_bounded_indices = b[0]
# self.model.constrained_bounded_lowers = b[1]
# self.model.constrained_bounded_uppers = b[2]
# self.model.constrained_positive_indices = p
self.model.constrained_indices = c
def get_param_shapes(self, N = None, Q = None):
model_name = self.model.__class__.__name__
@ -168,9 +183,15 @@ class opt_SGD(Optimizer):
if self.model.N == 0 or Y.std() == 0.0:
return 0, step, self.model.N
self.model.likelihood._mean = Y.mean()
self.model.likelihood._std = Y.std()
self.model.likelihood._bias = Y.mean()
self.model.likelihood._scale = Y.std()
self.model.likelihood.set_data(Y)
# self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision
sigma = self.model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache
self.model.likelihood._set_params(sigma)
j = self.subset_parameter_vector(self.x_opt, samples, shapes)
self.model.X = X[samples]
@ -181,27 +202,30 @@ class opt_SGD(Optimizer):
self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T)
self.model.likelihood.trYYT = np.trace(self.model.likelihood.YYT)
b, p = self.shift_constraints(j)
ci = self.shift_constraints(j)
f, fp = f_fp(self.x_opt[j])
step[j] = self.momentum * step[j] + self.learning_rate[j] * fp
self.x_opt[j] -= step[j]
self.restore_constraints(ci)
self.restore_constraints(b, p)
# restore likelihood _mean and _std, otherwise when we call set_data(y) on
self.model.grads[j] = fp
# restore likelihood _bias and _scale, otherwise when we call set_data(y) on
# the next feature, it will get normalized with the mean and std of this one.
self.model.likelihood._mean = 0
self.model.likelihood._std = 1
self.model.likelihood._bias = 0
self.model.likelihood._scale = 1
return f, step, self.model.N
def opt(self, f_fp=None, f=None, fp=None):
self.x_opt = self.model._get_params_transformed()
self.model.grads = np.zeros_like(self.x_opt)
X, Y = self.model.X.copy(), self.model.likelihood.Y.copy()
self.model.likelihood.YYT = None
self.model.likelihood.trYYT = None
self.model.likelihood._mean = 0.0
self.model.likelihood._std = 1.0
self.model.likelihood._bias = 0.0
self.model.likelihood._scale = 1.0
N, Q = self.model.X.shape
D = self.model.likelihood.Y.shape[1]
@ -225,6 +249,11 @@ class opt_SGD(Optimizer):
self.model.D = len(j)
self.model.likelihood.D = len(j)
self.model.likelihood.set_data(Y[:, j])
# self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision
sigma = self.model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache
self.model.likelihood._set_params(sigma)
if missing_data:
shapes = self.get_param_shapes(N, Q)
@ -250,7 +279,6 @@ class opt_SGD(Optimizer):
# plt.clf()
# plt.plot(self.param_traces['noise'])
# import pdb; pdb.set_trace()
# for k in self.param_traces.keys():
# self.param_traces[k].append(self.model.get(k)[0])
@ -262,7 +290,10 @@ class opt_SGD(Optimizer):
self.model.likelihood.N = N
self.model.likelihood.D = D
self.model.likelihood.Y = Y
sigma = self.model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache
self.model.likelihood._set_params(sigma)
self.trace.append(self.f_opt)
if self.iteration_file is not None:
f = open(self.iteration_file + "iteration%d.pickle" % it, 'w')

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
try:
from constructors import rbf_sympy, sympykern # these depend on sympy
except:

View file

@ -38,7 +38,6 @@ class bias(kernpart):
def dK_dtheta(self,dL_dKdiag,X,X2,target):
target += dL_dKdiag.sum()
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += dL_dKdiag.sum()

View file

@ -20,7 +20,6 @@ from periodic_exponential import periodic_exponential as periodic_exponentialpar
from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part
from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part
from prod import prod as prodpart
from prod_orthogonal import prod_orthogonal as prod_orthogonalpart
from symmetric import symmetric as symmetric_part
from coregionalise import coregionalise as coregionalise_part
from rational_quadratic import rational_quadratic as rational_quadraticpart
@ -255,7 +254,7 @@ def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,
part = periodic_Matern52part(D,variance, lengthscale, period, n_freq, lower, upper)
return kern(D, [part])
def prod(k1,k2):
def prod(k1,k2,tensor=False):
"""
Construct a product kernel over D from two kernels over D
@ -263,19 +262,8 @@ def prod(k1,k2):
:type k1, k2: kernpart
:rtype: kernel object
"""
part = prodpart(k1,k2)
return kern(k1.D, [part])
def prod_orthogonal(k1,k2):
"""
Construct a product kernel over D1 x D2 from a kernel over D1 and another over D2.
:param k1, k2: the kernels to multiply
:type k1, k2: kernpart
:rtype: kernel object
"""
part = prod_orthogonalpart(k1,k2)
return kern(k1.D+k2.D, [part])
part = prodpart(k1,k2,tensor)
return kern(part.D, [part])
def symmetric(k):
"""

View file

@ -7,7 +7,6 @@ import pylab as pb
from ..core.parameterised import parameterised
from kernpart import kernpart
import itertools
from prod_orthogonal import prod_orthogonal
from prod import prod
from ..util.linalg import symmetrify
@ -84,96 +83,72 @@ class kern(parameterised):
count += p.Nparam
def __add__(self, other):
assert self.D == other.D
newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices)
# transfer constraints:
newkern.constrained_indices = self.constrained_indices + [i+self.Nparam for i in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
return newkern
"""
Shortcut for `add`.
"""
return self.add(other)
def add(self, other):
def add(self, other,tensor=False):
"""
Add another kernel to this one. Both kernels are defined on the same _space_
:param other: the other kernel to be added
:type other: GPy.kern
"""
return self + other
if tensor:
D = self.D + other.D
self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices]
def add_orthogonal(self, other):
"""
Add another kernel to this one. Both kernels are defined on separate spaces
:param other: the other kernel to be added
:type other: GPy.kern
"""
# deal with input slices
D = self.D + other.D
self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices]
newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
# transfer constraints:
newkern.constrained_indices = self.constrained_indices + [x+self.Nparam for x in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values
newkern.constraints = self.constraints + other.constraints
newkern.constrained_bounded_uppers = self.constrained_bounded_uppers + other.constrained_bounded_uppers
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
# transfer constraints:
newkern.constrained_indices = self.constrained_indices + [x+self.Nparam for x in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values
newkern.constraints = self.constraints + other.constraints
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
else:
assert self.D == other.D
newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices)
# transfer constraints:
newkern.constrained_indices = self.constrained_indices + [i+self.Nparam for i in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
return newkern
def __mul__(self, other):
"""
Shortcut for `prod_orthogonal`. Note that `+` assumes that we sum 2 kernels defines on the same space whereas `*` assumes that the kernels are defined on different subspaces.
Shortcut for `prod`.
"""
return self.prod(other)
def prod(self, other):
def prod(self, other,tensor=False):
"""
multiply two kernels defined on the same spaces.
multiply two kernels (either on the same space, or on the tensor product of the input space)
:param other: the other kernel to be added
:type other: GPy.kern
"""
K1 = self.copy()
K2 = other.copy()
newkernparts = [prod(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)]
slices = []
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
s1, s2 = [False] * K1.D, [False] * K2.D
for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
s1, s2 = [False]*K1.D, [False]*K2.D
s1[sl1], s2[sl2] = [True], [True]
slices += [s1 + s2]
slices += [s1+s2]
newkernparts = [prod(k1, k2,tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)]
if tensor:
newkern = kern(K1.D + K2.D, newkernparts, slices)
else:
newkern = kern(K1.D, newkernparts, slices)
newkern = kern(K1.D, newkernparts, slices)
newkern._follow_constrains(K1, K2)
return newkern
def prod_orthogonal(self, other):
"""
multiply two kernels. Both kernels are defined on separate spaces.
:param other: the other kernel to be added
:type other: GPy.kern
"""
K1 = self.copy()
K2 = other.copy()
newkernparts = [prod_orthogonal(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)]
slices = []
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
s1, s2 = [False] * K1.D, [False] * K2.D
s1[sl1], s2[sl2] = [True], [True]
slices += [s1 + s2]
newkern = kern(K1.D + K2.D, newkernparts, slices)
newkern._follow_constrains(K1, K2)
return newkern
def _follow_constrains(self, K1, K2):
@ -277,7 +252,7 @@ class kern(parameterised):
which_parts = [True]*self.Nparts
assert X.shape[1] == self.D
target = np.zeros(X.shape[0])
[p.Kdiag(X[:, i_s], target=target) for p, i_s in zip(self.parts, self.input_slices)]
[p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self.parts, self.input_slices, which_parts) if part_on]
return target
def dKdiag_dtheta(self, dL_dKdiag, X):
@ -469,9 +444,9 @@ class kern(parameterised):
return target_mu, target_S
def plot(self, x=None, plot_limits=None, which_functions='all', resolution=None, *args, **kwargs):
if which_functions == 'all':
which_functions = [True] * self.Nparts
def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs):
if which_parts == 'all':
which_parts = [True] * self.Nparts
if self.D == 1:
if x is None:
x = np.zeros((1, 1))
@ -488,7 +463,7 @@ class kern(parameterised):
raise ValueError, "Bad limits for plotting"
Xnew = np.linspace(xmin, xmax, resolution or 201)[:, None]
Kx = self.K(Xnew, x, slices2=which_functions)
Kx = self.K(Xnew, x, which_parts)
pb.plot(Xnew, Kx, *args, **kwargs)
pb.xlim(xmin, xmax)
pb.xlabel("x")
@ -514,7 +489,7 @@ class kern(parameterised):
xg = np.linspace(xmin[0], xmax[0], resolution)
yg = np.linspace(xmin[1], xmax[1], resolution)
Xnew = np.vstack((xx.flatten(), yy.flatten())).T
Kx = self.K(Xnew, x, slices2=which_functions)
Kx = self.K(Xnew, x, which_parts)
Kx = Kx.reshape(resolution, resolution).T
pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs)
pb.xlim(xmin[0], xmax[0])

View file

@ -203,7 +203,7 @@ class linear(kernpart):
target_mu(n,q) += factor*tmp;
target_S(n,q) += factor*AZZA_2(q,m,mm,q);
}
}
}
}
}
"""

View file

@ -4,108 +4,108 @@
from kernpart import kernpart
import numpy as np
import hashlib
#from scipy import integrate # This may not be necessary (Nicolas, 20th Feb)
class prod(kernpart):
"""
Computes the product of 2 kernels that are defined on the same space
Computes the product of 2 kernels
:param k1, k2: the kernels to multiply
:type k1, k2: kernpart
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
:type tensor: Boolean
:rtype: kernel object
"""
def __init__(self,k1,k2):
assert k1.D == k2.D, "Error: The input spaces of the kernels to multiply must have the same dimension"
self.D = k1.D
def __init__(self,k1,k2,tensor=False):
self.Nparam = k1.Nparam + k2.Nparam
self.name = k1.name + '<times>' + k2.name
self.k1 = k1
self.k2 = k2
if tensor:
self.D = k1.D + k2.D
self.slice1 = slice(0,self.k1.D)
self.slice2 = slice(self.k1.D,self.k1.D+self.k2.D)
else:
assert k1.D == k2.D, "Error: The input spaces of the kernels to sum don't have the same dimension."
self.D = k1.D
self.slice1 = slice(0,self.D)
self.slice2 = slice(0,self.D)
self._X, self._X2, self._params = np.empty(shape=(3,1))
self._set_params(np.hstack((k1._get_params(),k2._get_params())))
def _get_params(self):
"""return the value of the parameters."""
return self.params
return np.hstack((self.k1._get_params(), self.k2._get_params()))
def _set_params(self,x):
"""set the value of the parameters."""
self.k1._set_params(x[:self.k1.Nparam])
self.k2._set_params(x[self.k1.Nparam:])
self.params = x
def _get_param_names(self):
"""return parameter names."""
return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()]
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
if X2 is None:
target1 = np.zeros((X.shape[0],X2.shape[0]))
target2 = np.zeros((X.shape[0],X2.shape[0]))
else:
target1 = np.zeros((X.shape[0],X.shape[0]))
target2 = np.zeros((X.shape[0],X.shape[0]))
self.k1.K(X,X2,target1)
self.k2.K(X,X2,target2)
target += target1 * target2
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
target1 = np.zeros((X.shape[0],))
target2 = np.zeros((X.shape[0],))
self.k1.Kdiag(X,target1)
self.k2.Kdiag(X,target2)
target += target1 * target2
self._K_computations(X,X2)
target += self._K1 * self._K2
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
K1 = np.zeros((X.shape[0],X2.shape[0]))
K2 = np.zeros((X.shape[0],X2.shape[0]))
self.k1.K(X,X2,K1)
self.k2.K(X,X2,K2)
self._K_computations(X,X2)
if X2 is None:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.Nparam])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.Nparam:])
else:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.Nparam])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.Nparam:])
k1_target = np.zeros(self.k1.Nparam)
k2_target = np.zeros(self.k2.Nparam)
self.k1.dK_dtheta(dL_dK*K2, X, X2, k1_target)
self.k2.dK_dtheta(dL_dK*K1, X, X2, k2_target)
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
target1 = np.zeros(X.shape[0])
target2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],target1)
self.k2.Kdiag(X[:,self.slice2],target2)
target += target1 * target2
target[:self.k1.Nparam] += k1_target
target[self.k1.Nparam:] += k2_target
def dKdiag_dtheta(self,dL_dKdiag,X,target):
K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2)
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.Nparam])
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.Nparam:])
def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
K1 = np.zeros((X.shape[0],X2.shape[0]))
K2 = np.zeros((X.shape[0],X2.shape[0]))
self.k1.K(X,X2,K1)
self.k2.K(X,X2,K2)
self._K_computations(X,X2)
self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target)
self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target)
self.k1.dK_dX(dL_dK*K2, X, X2, target)
self.k2.dK_dX(dL_dK*K1, X, X2, target)
def dKdiag_dX(self, dL_dKdiag, X, target):
K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2)
def dKdiag_dX(self,dL_dKdiag,X,target):
target1 = np.zeros((X.shape[0],))
target2 = np.zeros((X.shape[0],))
self.k1.Kdiag(X,target1)
self.k2.Kdiag(X,target2)
self.k1.dK_dX(dL_dKdiag*K2, X[:,self.slice1], target)
self.k2.dK_dX(dL_dKdiag*K1, X[:,self.slice2], target)
self.k1.dKdiag_dX(dL_dKdiag*target2, X, target)
self.k2.dKdiag_dX(dL_dKdiag*target1, X, target)
def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
target1 = np.zeros((X.shape[0],))
target2 = np.zeros((X.shape[0],))
self.k1.Kdiag(X,target1)
self.k2.Kdiag(X,target2)
k1_target = np.zeros(self.k1.Nparam)
k2_target = np.zeros(self.k2.Nparam)
self.k1.dKdiag_dtheta(dL_dKdiag*target2, X, k1_target)
self.k2.dKdiag_dtheta(dL_dKdiag*target1, X, k2_target)
target[:self.k1.Nparam] += k1_target
target[self.k1.Nparam:] += k2_target
def _K_computations(self,X,X2):
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
self._X = X.copy()
self._params == self._get_params().copy()
if X2 is None:
self._X2 = None
self._K1 = np.zeros((X.shape[0],X.shape[0]))
self._K2 = np.zeros((X.shape[0],X.shape[0]))
self.k1.K(X[:,self.slice1],None,self._K1)
self.k2.K(X[:,self.slice2],None,self._K2)
else:
self._X2 = X2.copy()
self._K1 = np.zeros((X.shape[0],X2.shape[0]))
self._K2 = np.zeros((X.shape[0],X2.shape[0]))
self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1)
self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2)

View file

@ -1,6 +1,6 @@
import numpy as np
from scipy import stats, linalg
from ..util.linalg import pdinv,mdot,jitchol
from ..util.linalg import pdinv,mdot,jitchol,DSYR
from likelihood import likelihood
class EP(likelihood):
@ -113,11 +113,12 @@ class EP(likelihood):
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
si=Sigma[:,i].reshape(self.N,1)
Sigma = Sigma - Delta_tau/(1.+ Delta_tau*Sigma[i,i])*np.dot(si,si.T)
DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i])))
#si=Sigma[:,i:i+1]
#Sigma -= Delta_tau/(1.+ Delta_tau*Sigma[i,i])*np.dot(si,si.T)#DSYR
mu = np.dot(Sigma,self.v_tilde)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon

View file

@ -7,6 +7,7 @@ from scipy import stats
import scipy as sp
import pylab as pb
from ..util.plot import gpplot
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
class likelihood_function:
"""
@ -37,11 +38,11 @@ class probit(likelihood_function):
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
if data_i == 0: data_i = -1 #NOTE Binary classification algorithm works better with classes {-1,1}, 1D-plotting works better with classes {0,1}.
#if data_i == 0: data_i = -1 #NOTE Binary classification algorithm works better with classes {-1,1}, 1D-plotting works better with classes {0,1}.
# TODO: some version of assert
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = stats.norm.cdf(z)
phi = stats.norm.pdf(z)
Z_hat = std_norm_cdf(z)
phi = std_norm_pdf(z)
mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
return Z_hat, mu_hat, sigma2_hat

View file

@ -136,14 +136,16 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
self._savedparams.append([self.f_call, self._get_params()])
self._savedgradients.append([self.f_call, self._log_likelihood_gradients()])
self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]])
sf2 = self.scale_factor ** 2
# sf2 = self.scale_factor ** 2
if self.likelihood.is_heteroscedastic:
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y)
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
# B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
else:
A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5 * self.M * np.log(sf2))
# B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
self._savedABCD.append([self.f_call, A, B, C, D])

314
GPy/models/FITC.py Normal file
View file

@ -0,0 +1,314 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, tdot, symmetrify,pdinv
#from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
from ..util.plot import gpplot
from .. import kern
from scipy import stats, linalg
from sparse_GP import sparse_GP
def backsub_both_sides(L,X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1)
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T
class FITC(sparse_GP):
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
this function does nothing
Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in sparse_GP.
The true precison is now 'true_precision' not 'precision'.
"""
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP
#@profile
def _computations(self):
#factor Kmm
self.Lm = jitchol(self.Kmm)
self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1)
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1).copy()
self.Diag0 = self.psi0 - np.diag(self.Qnn)
self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #Includes Diag0 in the precision
self.V_star = self.beta_star * self.likelihood.Y
# The rather complex computations of self.A
if self.has_uncertain_inputs:
raise NotImplementedError
else:
if self.likelihood.is_heteroscedastic:
assert self.likelihood.D == 1
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.N)))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
# factor B
self.B = np.eye(self.M) + self.A
self.LB = jitchol(self.B)
self.LBi,info = linalg.lapack.flapack.dtrtrs(self.LB,np.eye(self.M),lower=1)
self.psi1V = np.dot(self.psi1, self.V_star)
# back substutue C into psi1V
tmp, info1 = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(tmp), lower=1, trans=0)
Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1)
b_psi1_Ki = self.beta_star * Kmmipsi1.T
Ki_pbp_Ki = np.dot(Kmmipsi1,b_psi1_Ki)
Kmmi = np.dot(self.Lmi.T,self.Lmi)
LBiLmi = np.dot(self.LBi,self.Lmi)
LBL_inv = np.dot(LBiLmi.T,LBiLmi)
VVT = np.outer(self.V_star,self.V_star)
VV_p_Ki = np.dot(VVT,Kmmipsi1.T)
Ki_pVVp_Ki = np.dot(Kmmipsi1,VV_p_Ki)
psi1beta = self.psi1*self.beta_star.T
H = self.Kmm + mdot(self.psi1,self.beta_star*self.psi1.T)
Hi, LH, LHi, logdetH = pdinv(H)
betapsi1TLmiLBi = np.dot(psi1beta.T,LBiLmi.T)
alpha = np.array([np.dot(a.T,a) for a in betapsi1TLmiLBi])[:,None]
gamma_1 = mdot(VVT,self.psi1.T,Hi)
pHip = mdot(self.psi1.T,Hi,self.psi1)
gamma_2 = mdot(self.beta_star*pHip,self.V_star)
gamma_3 = self.V_star * mdot(self.V_star.T,pHip*self.beta_star).T
dL_dpsi0 = -0.5 * self.beta_star#dA_dpsi0
dL_dpsi0 += .5 *alpha #dC_dpsi0
dL_dpsi0 += 0.5*mdot(self.beta_star*pHip,self.V_star)**2 - self.V_star * mdot(self.V_star.T,pHip*self.beta_star).T #dD_dpsi0
dA_dpsi1 = b_psi1_Ki
dC_dpsi1 = -np.dot(psi1beta.T,LBL_inv)
dD_dpsi1 = gamma_1
dD_dpsi1 += -mdot(psi1beta.T,Hi,self.psi1,gamma_1)
#dL_dpsi1 = b_psi1_Ki #dA_dpsi1
#dL_dpsi1 += -np.dot(psi1beta.T,LBL_inv) #dC_dpsi1
#dL_dpsi1 += gamma_1 - mdot(psi1beta.T,Hi,self.psi1,gamma_1) #dD_dpsi1
dL_dKmm = -0.5 * np.dot(Kmmipsi1,b_psi1_Ki) #dA_dKmm
dL_dKmm += -.5*Kmmi + .5*LBL_inv + mdot(LBL_inv,psi1beta,Kmmipsi1.T) #dC_dKmm
dL_dKmm += -.5 * mdot(Hi,self.psi1,gamma_1) #dD_dKmm
dA_dpsi0 = .5 * self.V_star**2
dA_dpsi0_theta = self.kern.dKdiag_dtheta(dA_dpsi0,X=self.X)
dA_dpsi1_theta = 0
dA_dpsi1_X = 0
dA_dKmm_theta = 0
dA_dKmm_X = 0
_dC_dpsi1_dtheta = 0
_dC_dpsi1_dX = 0
_dC_dKmm_dtheta = 0
_dC_dKmm_dX = 0
_dD_dpsi1_dtheta_1 = 0
_dD_dpsi1_dX_1 = 0
_dD_dKmm_dtheta_1 = 0
_dD_dKmm_dX_1 = 0
_dD_dpsi1_dtheta_2 = 0
_dD_dpsi1_dX_2 = 0
_dD_dKmm_dtheta_2 = 0
_dD_dKmm_dX_2 = 0
for psi1_n,V_n,X_n,alpha_n,gamma_n,gamma_k in zip(self.psi1.T,self.V_star,self.X,alpha,gamma_2,gamma_3):
psin_K = np.dot(psi1_n[None,:],Kmmi)
_dA_dpsi1 = -V_n**2 * np.dot(psi1_n[None,:],Kmmi)
_dC_dpsi1 = - alpha_n * np.dot(psi1_n[None,:],Kmmi)
_dD_dpsi1_1 = - gamma_n**2 * np.dot(psi1_n[None,:],Kmmi)
_dD_dpsi1_2 = 2. * gamma_k * np.dot(psi1_n[None,:],Kmmi)
_dA_dKmm = .5*V_n**2 * np.dot(psin_K.T,psin_K)
_dC_dKmm = .5 * alpha_n * np.dot(psin_K.T,psin_K)
_dD_dKmm_1 = .5*gamma_n**2 * np.dot(psin_K.T,psin_K)
_dD_dKmm_2 = - gamma_n * np.dot(psin_K.T,psin_K)
_dKmm = .5*V_n**2 * np.dot(psin_K.T,psin_K) #Diag_dA_dKmm
_dKmm += .5 * alpha_n * np.dot(psin_K.T,psin_K) #Diag_dC_dKmm
_dKmm += .5*gamma_n**2 * np.dot(psin_K.T,psin_K) - gamma_n * np.dot(psin_K.T,psin_K) #Diag_dD_dKmm
_dKmm_dtheta = self.kern.dK_dtheta(_dKmm,self.Z)
dA_dpsi1_theta += self.kern.dK_dtheta(_dA_dpsi1,X_n[None,:],self.Z)
_dC_dpsi1_dtheta += self.kern.dK_dtheta(_dC_dpsi1,X_n[None,:],self.Z)
_dD_dpsi1_dtheta_1 += self.kern.dK_dtheta(_dD_dpsi1_1,X_n[None,:],self.Z)
_dD_dpsi1_dtheta_2 += self.kern.dK_dtheta(_dD_dpsi1_2,X_n[None,:],self.Z)
dA_dKmm_theta += self.kern.dK_dtheta(_dA_dKmm,self.Z)
_dC_dKmm_dtheta += self.kern.dK_dtheta(_dC_dKmm,self.Z)
_dD_dKmm_dtheta_1 += self.kern.dK_dtheta(_dD_dKmm_1,self.Z)
_dD_dKmm_dtheta_2 += self.kern.dK_dtheta(_dD_dKmm_2,self.Z)
dA_dpsi1_X += self.kern.dK_dX(_dA_dpsi1.T,self.Z,X_n[None,:])
_dC_dpsi1_dX += self.kern.dK_dX(_dC_dpsi1.T,self.Z,X_n[None,:])
_dD_dpsi1_dX_1 += self.kern.dK_dX(_dD_dpsi1_1.T,self.Z,X_n[None,:])
_dD_dpsi1_dX_2 += self.kern.dK_dX(_dD_dpsi1_2.T,self.Z,X_n[None,:])
dA_dKmm_X += 2.*self.kern.dK_dX(_dA_dKmm,self.Z)
_dC_dKmm_dX += 2.*self.kern.dK_dX(_dC_dKmm,self.Z)
_dD_dKmm_dX_1 += 2.*self.kern.dK_dX(_dD_dKmm_1,self.Z)
_dD_dKmm_dX_2 += 2.*self.kern.dK_dX(_dD_dKmm_2,self.Z)
self._dL_dtheta = self.kern.dKdiag_dtheta(dL_dpsi0,self.X)
self._dL_dtheta += self.kern.dK_dtheta(dA_dpsi1 + dC_dpsi1 + dD_dpsi1,self.X,self.Z)
self._dL_dtheta += self.kern.dK_dtheta(dL_dKmm,X=self.Z)
self._dL_dtheta += dA_dpsi0_theta
self._dL_dtheta += dA_dKmm_theta + _dC_dKmm_dtheta + _dD_dKmm_dtheta_1 + _dD_dKmm_dtheta_2
self._dL_dtheta += dA_dpsi1_theta + _dC_dpsi1_dtheta + _dD_dpsi1_dtheta_2 + _dD_dpsi1_dtheta_1
self._dL_dX = self.kern.dK_dX(dA_dpsi1.T,self.Z,self.X) + self.kern.dK_dX(dC_dpsi1.T,self.Z,self.X) + self.kern.dK_dX(dD_dpsi1.T,self.Z,self.X)
self._dL_dX += 2. * self.kern.dK_dX(dL_dKmm,X=self.Z)
self._dL_dX += dA_dpsi1_X + dA_dKmm_X + _dC_dpsi1_dX + _dC_dKmm_dX + _dD_dpsi1_dX_2 + _dD_dKmm_dX_2 + _dD_dpsi1_dX_1 + _dD_dKmm_dX_1
# the partial derivative vector for the likelihood
if self.likelihood.Nparams == 0:
# save computation here.
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented"
else:
# likelihood is not heterscedatic
dbstar_dnoise = self.likelihood.precision * (self.beta_star**2 * self.Diag0[:,None] - self.beta_star)
Lmi_psi1 = mdot(self.Lmi,self.psi1)
LBiLmipsi1 = np.dot(self.LBi,Lmi_psi1)
aux_0 = np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_2 = np.dot(LBiLmipsi1.T,self._LBi_Lmi_psi1V)
dA_dnoise = 0.5 * self.D * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.D * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T)
alpha = mdot(LBiLmipsi1,self.V_star)
alpha_ = mdot(LBiLmipsi1.T,alpha)
dD_dnoise_2 = -0.5 * self.D * np.sum(alpha_**2 * dbstar_dnoise )
dD_dnoise_1 = mdot(self.V_star.T,self.psi1.T,self.Lmi.T,self.LBi.T,self.LBi,self.Lmi,self.psi1,dbstar_dnoise*self.likelihood.Y)
dD_dnoise_2 = 0.5*mdot(self.V_star.T,self.psi1.T,Hi,self.psi1,dbstar_dnoise*self.psi1.T,Hi,self.psi1,self.V_star)
dD_dnoise = dD_dnoise_1 + dD_dnoise_2
self.partial_for_likelihood = dA_dnoise + dC_dnoise + dD_dnoise
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
C = -self.D * (np.sum(np.log(np.diag(self.LB))))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + C + D
def _log_likelihood_gradients(self):
pass
return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
def dL_dtheta(self):
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
#dL_dtheta = self.dA_dtheta + self.dlogB_dtheta + self.dD_dtheta
dL_dtheta = self._dL_dtheta #FIXME
return dL_dtheta
def dL_dZ(self):
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
#dL_dZ = self.dA_dX + self.dlogB_dX + self.dD_dX
dL_dZ = self._dL_dX #FIXME
return dL_dZ
def _raw_predict(self, Xnew, which_parts, full_cov=False):
if self.likelihood.is_heteroscedastic:
Iplus_Dprod_i = 1./(1.+ self.Diag0 * self.likelihood.precision.flatten())
self.Diag = self.Diag0 * Iplus_Dprod_i
self.P = Iplus_Dprod_i[:,None] * self.psi1.T
self.RPT0 = np.dot(self.Lmi,self.psi1)
self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T))
self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
self.w = self.Diag * self.likelihood.v_tilde
self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde))
self.mu = self.w + np.dot(self.P,self.gamma)
"""
Make a prediction for the generalized FITC model
Arguments
---------
X : Input prediction data - Nx1 numpy array (floats)
"""
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
# Ci = I + (RPT0)Di(RPT0).T
# C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T
# = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T
# = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
# = I - V.T * V
U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1)
C = np.eye(self.M) - np.dot(V.T,V)
mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:])
#self.C = C
#self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
#self.mu_u = mu_u
#self.U = U
# q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T)
mu_H = np.dot(mu_u,self.mu)
self.mu_H = mu_H
Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
KR0T = np.dot(Kx.T,self.Lmi.T)
mu_star = np.dot(KR0T,mu_H)
if full_cov:
Kxx = self.kern.K(Xnew,which_parts=which_parts)
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
else:
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
Kxx_ = self.kern.K(Xnew,which_parts=which_parts) # TODO: RA, is this line needed?
var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) # TODO: RA, is this line needed?
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None]
return mu_star[:,None],var
else:
raise NotImplementedError, "homoscedastic fitc not implemented"
"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu,var[:,None]
"""

View file

@ -3,10 +3,11 @@
import numpy as np
from scipy import linalg
import pylab as pb
from .. import kern
from ..core import model
from ..util.linalg import pdinv, mdot
from ..util.linalg import pdinv, mdot, tdot
from ..util.plot import gpplot, x_frame1D, x_frame2D, Tango
from ..likelihoods import EP
@ -58,13 +59,12 @@ class GP(model):
"""
TODO: one day we might like to learn Z by gradient methods?
"""
#FIXME: this doesn;t live here.
return np.zeros_like(self.Z)
def _set_params(self, p):
self.kern._set_params_transformed(p[:self.kern.Nparam_transformed()])
# self.likelihood._set_params(p[self.kern.Nparam:]) # test by Nicolas
self.likelihood._set_params(p[self.kern.Nparam_transformed():]) # test by Nicolas
self.likelihood._set_params(p[self.kern.Nparam_transformed():])
self.K = self.kern.K(self.X)
self.K += self.likelihood.covariance_matrix
@ -73,10 +73,14 @@ class GP(model):
# the gradient of the likelihood wrt the covariance matrix
if self.likelihood.YYT is None:
alpha = np.dot(self.Ki, self.likelihood.Y)
self.dL_dK = 0.5 * (np.dot(alpha, alpha.T) - self.D * self.Ki)
#alpha = np.dot(self.Ki, self.likelihood.Y)
alpha,_ = linalg.lapack.flapack.dpotrs(self.L, self.likelihood.Y,lower=1)
self.dL_dK = 0.5 * (tdot(alpha) - self.D * self.Ki)
else:
tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
#tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1)
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
self.dL_dK = 0.5 * (tmp - self.D * self.Ki)
def _get_params(self):
@ -100,7 +104,9 @@ class GP(model):
Computes the model fit using YYT if it's available
"""
if self.likelihood.YYT is None:
return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y)))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.L, np.asfortranarray(self.likelihood.Y), lower=1)
return -0.5 * np.sum(np.square(tmp))
#return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y)))
else:
return -0.5 * np.sum(np.multiply(self.Ki, self.likelihood.YYT))
@ -123,18 +129,15 @@ class GP(model):
"""
return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False):
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False,stop=False):
"""
Internal helper function for making predictions, does not account
for normalization or likelihood
#TODO: which_parts does nothing
"""
Kx = self.kern.K(self.X, _Xnew,which_parts=which_parts)
mu = np.dot(np.dot(Kx.T, self.Ki), self.likelihood.Y)
KiKx = np.dot(self.Ki, Kx)
Kx = self.kern.K(_Xnew,self.X,which_parts=which_parts).T
#KiKx = np.dot(self.Ki, Kx)
KiKx, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(Kx), lower=1)
mu = np.dot(KiKx.T, self.likelihood.Y)
if full_cov:
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
var = Kxx - np.dot(KiKx.T, Kx)
@ -142,6 +145,8 @@ class GP(model):
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
var = Kxx - np.sum(np.multiply(KiKx, Kx), 0)
var = var[:, None]
if stop:
debug_this
return mu, var
@ -178,7 +183,8 @@ class GP(model):
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False):
"""
Plot the GP's view of the world, where the data is normalized and the likelihood is Gaussian
Plot the GP's view of the world, where the data is normalized and the
likelihood is Gaussian.
:param samples: the number of a posteriori samples to plot
:param which_data: which if the training data to plot (default all)
@ -193,8 +199,8 @@ class GP(model):
- In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, we've no implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions using which_data and which_functions
Plot the data's view of the world, with non-normalized values and GP predictions passed through the likelihood
Can plot only part of the data and part of the posterior functions
using which_data and which_functions
"""
if which_data == 'all':
which_data = slice(None)

View file

@ -45,7 +45,7 @@ class GPLVM(GP):
return np.hstack((self.X.flatten(), GP._get_params(self)))
def _set_params(self,x):
self.X = x[:self.X.size].reshape(self.N,self.Q).copy()
self.X = x[:self.N*self.Q].reshape(self.N,self.Q).copy()
GP._set_params(self, x[self.X.size:])
def _log_likelihood_gradients(self):

View file

@ -12,3 +12,4 @@ from sparse_GPLVM import sparse_GPLVM
from Bayesian_GPLVM import Bayesian_GPLVM
from mrd import MRD
from generalized_FITC import generalized_FITC
from FITC import FITC

View file

@ -273,7 +273,7 @@ class MRD(model):
def _handle_plotting(self, fig_num, axes, plotf):
if axes is None:
fig = pylab.figure(num=fig_num, figsize=(4 * len(self.bgplvms), 3 * len(self.bgplvms)))
fig = pylab.figure(num=fig_num, figsize=(4 * len(self.bgplvms), 3))
for i, g in enumerate(self.bgplvms):
if axes is None:
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)

View file

@ -3,17 +3,12 @@
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, tdot, symmetrify
from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides
from ..util.plot import gpplot
from .. import kern
from GP import GP
from scipy import linalg
def backsub_both_sides(L, X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1)
return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T
class sparse_GP(GP):
"""
Variational sparse GP model
@ -104,7 +99,7 @@ class sparse_GP(GP):
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
else:
# tmp = self.psi1 * (np.sqrt(self.likelihood.precision) / sf)
# tmp = self.psi1 * (np.sqrt(self.likelihood.precision) / sf)
tmp = self.psi1 * (np.sqrt(self.likelihood.precision))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
@ -163,7 +158,7 @@ class sparse_GP(GP):
else:
# likelihood is not heterscedatic
self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
# self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision * sf2)
# self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision * sf2)
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
@ -177,7 +172,7 @@ class sparse_GP(GP):
# B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
else:
A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
A = -0.5 * self.N * self.D * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
# B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
# C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5 * self.M * np.log(sf2))

View file

@ -23,6 +23,7 @@ class warpedGP(GP):
self.warping_function = TanhWarpingFunction_d(warping_terms)
self.warping_params = (np.random.randn(self.warping_function.n_terms*3+1,) * 1)
Y = self._scale_data(Y)
self.has_uncertain_inputs = False
self.Y_untransformed = Y.copy()
self.predict_in_warped_space = False
@ -30,6 +31,14 @@ class warpedGP(GP):
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
def _scale_data(self, Y):
self._Ymax = Y.max()
self._Ymin = Y.min()
return (Y-self._Ymin)/(self._Ymax-self._Ymin) - 0.5
def _unscale_data(self, Y):
return (Y + 0.5)*(self._Ymax - self._Ymin) + self._Ymin
def _set_params(self, x):
self.warping_params = x[:self.warping_function.num_parameters]
Y = self.transform_data()
@ -79,5 +88,5 @@ class warpedGP(GP):
if self.predict_in_warped_space:
mu = self.warping_function.f_inv(mu, self.warping_params)
var = self.warping_function.f_inv(var, self.warping_params)
mu = self._unscale_data(mu)
return mu, var

View file

@ -63,7 +63,7 @@ def _mdot_r(a,b):
def jitchol(A,maxtries=5):
A = np.asfortranarray(A)
L,info = linalg.lapack.dpotrf(A,lower=1)
L,info = linalg.lapack.flapack.dpotrf(A,lower=1)
if info ==0:
return L
else:
@ -124,8 +124,9 @@ def pdinv(A, *args):
L = jitchol(A, *args)
logdet = 2.*np.sum(np.log(np.diag(L)))
Li = chol_inv(L)
Ai = linalg.lapack.dpotri(L)[0]
Ai = np.tril(Ai) + np.tril(Ai,-1).T
Ai, _ = linalg.lapack.flapack.dpotri(L)
#Ai = np.tril(Ai) + np.tril(Ai,-1).T
symmetrify(Ai)
return Ai, L, Li, logdet
@ -139,7 +140,7 @@ def chol_inv(L):
"""
return linalg.lapack.dtrtri(L, lower = True)[0]
return linalg.lapack.flapack.dtrtri(L, lower = True)[0]
def multiple_pdinv(A):
@ -156,7 +157,7 @@ def multiple_pdinv(A):
N = A.shape[-1]
chols = [jitchol(A[:,:,i]) for i in range(N)]
halflogdets = [np.sum(np.log(np.diag(L[0]))) for L in chols]
invs = [linalg.lapack.dpotri(L[0],True)[0] for L in chols]
invs = [linalg.lapack.flapack.dpotri(L[0],True)[0] for L in chols]
invs = [np.triu(I)+np.triu(I,1).T for I in invs]
return np.dstack(invs),np.array(halflogdets)
@ -177,8 +178,8 @@ def PCA(Y, Q):
"""
if not np.allclose(Y.mean(axis=0), 0.0):
print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)"
#Y -= Y.mean(axis=0)
#Y -= Y.mean(axis=0)
Z = linalg.svd(Y-Y.mean(axis=0), full_matrices = False)
[X, W] = [Z[0][:,0:Q], np.dot(np.diag(Z[1]), Z[2]).T[:,0:Q]]
@ -235,6 +236,18 @@ def tdot(*args, **kwargs):
else:
return tdot_numpy(*args,**kwargs)
def DSYR(A,x,alpha=1.):
N = c_int(A.shape[0])
LDA = c_int(A.shape[0])
UPLO = c_char('l')
ALPHA = c_double(alpha)
A_ = A.ctypes.data_as(ctypes.c_void_p)
x_ = x.ctypes.data_as(ctypes.c_void_p)
INCX = c_int(1)
_blaslib.dsyr_(byref(UPLO), byref(N), byref(ALPHA),
x_, byref(INCX), A_, byref(LDA))
symmetrify(A,upper=True)
def symmetrify(A,upper=False):
"""
Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
@ -244,33 +257,38 @@ def symmetrify(A,upper=False):
N,M = A.shape
assert N==M
c_contig_code = """
int iN;
for (int i=1; i<N; i++){
iN = i*N;
for (int j=0; j<i; j++){
A[i+j*N] = A[i*N+j];
A[i+j*N] = A[iN+j];
}
}
"""
f_contig_code = """
int iN;
for (int i=1; i<N; i++){
iN = i*N;
for (int j=0; j<i; j++){
A[i*N+j] = A[i+j*N];
A[iN+j] = A[i+j*N];
}
}
"""
if A.flags['C_CONTIGUOUS'] and upper:
weave.inline(f_contig_code,['A','N'])
weave.inline(f_contig_code,['A','N'], extra_compile_args=['-O3'])
elif A.flags['C_CONTIGUOUS'] and not upper:
weave.inline(c_contig_code,['A','N'])
weave.inline(c_contig_code,['A','N'], extra_compile_args=['-O3'])
elif A.flags['F_CONTIGUOUS'] and upper:
weave.inline(c_contig_code,['A','N'])
weave.inline(c_contig_code,['A','N'], extra_compile_args=['-O3'])
elif A.flags['F_CONTIGUOUS'] and not upper:
weave.inline(f_contig_code,['A','N'])
weave.inline(f_contig_code,['A','N'], extra_compile_args=['-O3'])
else:
tmp = np.tril(A)
A[:] = 0.0
A += tmp
A += np.tril(tmp,-1).T
def symmetrify_murray(A):
A += A.T
nn = A.shape[0]
@ -303,3 +321,13 @@ def cholupdate(L,x):
x = x.copy()
N = x.size
weave.inline(code, support_code=support_code, arg_names=['N','L','x'], type_converters=weave.converters.blitz)
def backsub_both_sides(L, X,transpose='left'):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
if transpose=='left':
tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1)
return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T
else:
tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=0)
return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=0)[0].T

View file

@ -0,0 +1,35 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import weave
def std_norm_pdf(x):
"""Standard Gaussian density function"""
return 1./np.sqrt(2.*np.pi)*np.exp(-.5*x**2)
def std_norm_cdf(x):
"""
Cumulative standard Gaussian distribution
Based on Abramowitz, M. and Stegun, I. (1970)
"""
support_code = "#include <math.h>"
code = """
double sign = 1.0;
if (x < 0.0){
sign = -1.0;
x = -x;
}
x = x/sqrt(2.0);
double t = 1.0/(1.0 + 0.3275911*x);
double erf = 1. - exp(-x*x)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))));
return_val = 0.5*(1.0 + sign*erf);
"""
x = float(x)
return weave.inline(code,arg_names=['x'],support_code=support_code)

View file

@ -15,7 +15,7 @@ class data_show:
"""
def __init__(self, vals, axes=None):
self.vals = vals
self.vals = vals.copy()
# If no axes are defined, create some.
if axes==None:
fig = plt.figure()
@ -33,12 +33,12 @@ class vector_show(data_show):
"""
def __init__(self, vals, axes=None):
data_show.__init__(self, vals, axes)
self.vals = vals.T
self.vals = vals.T.copy()
self.handle = self.axes.plot(np.arange(0, len(vals))[:, None], self.vals)[0]
def modify(self, vals):
xdata, ydata = self.handle.get_data()
self.vals = vals.T
self.vals = vals.T.copy()
self.handle.set_data(xdata, self.vals)
self.axes.figure.canvas.draw()
@ -53,7 +53,7 @@ class lvm(data_show):
:param latent_axes: the axes where the latent visualization should be plotted.
"""
if vals == None:
vals = model.X[0]
vals = model.X[0].copy()
data_show.__init__(self, vals, axes=latent_axes)
@ -77,14 +77,13 @@ class lvm(data_show):
self.latent_index = latent_index
self.latent_dim = model.Q
# The red cross which shows current latent point.
# The red cross which shows current latent point.
self.latent_values = vals
self.latent_handle = self.latent_axes.plot([0],[0],'rx',mew=2)[0]
self.modify(vals)
def modify(self, vals):
"""When latent values are modified update the latent representation and ulso update the output visualization."""
y = self.model.predict(vals)[0]
self.data_visualize.modify(y)
self.latent_handle.set_data(vals[self.latent_index[0]], vals[self.latent_index[1]])
@ -105,7 +104,7 @@ class lvm(data_show):
if self.called and self.move_on:
# Call modify code on move
self.latent_values[self.latent_index[0]]=event.xdata
self.latent_values[self.latent_index[1]]=event.ydata
self.latent_values[self.latent_index[1]]=event.ydata
self.modify(self.latent_values)
class lvm_subplots(lvm):
@ -218,11 +217,16 @@ class image_show(data_show):
self.axes.figure.canvas.draw() # Teo - original line: plt.show()
def set_image(self, vals):
<<<<<<< HEAD
dim = self.dimensions[0] * self.dimensions[1]
self.vals = np.reshape(vals[0,dim*self.selectImage+np.array(range(dim))], self.dimensions, order='F')
=======
self.vals = np.reshape(vals, self.dimensions, order='F').copy()
>>>>>>> 47a7df9756e47ba775eb04c72e41a31933013ea4
if self.transpose:
self.vals = self.vals.T
self.vals = self.vals.T.copy()
if not self.scale:
<<<<<<< HEAD
self.vals = self.vals
if self.invert:
self.vals = -self.vals
@ -238,6 +242,11 @@ class image_show(data_show):
if not self.palette == []: # applying using an image palette (e.g. if the image has been quantized)
self.vals = Image.fromarray(self.vals.astype('uint8'))
self.vals.putpalette(self.palette) # palette is a list, must be loaded before calling this function
=======
self.vals = self.vals.copy()
#if self.invert:
# self.vals = -self.vals
>>>>>>> 47a7df9756e47ba775eb04c72e41a31933013ea4
class mocap_data_show(data_show):
@ -319,7 +328,7 @@ class stick_show(mocap_data_show):
mocap_data_show.__init__(self, vals, axes, connect)
def process_values(self, vals):
self.vals = vals.reshape((3, vals.shape[1]/3)).T
self.vals = vals.reshape((3, vals.shape[1]/3)).T.copy()
class skeleton_show(mocap_data_show):
"""data_show class for visualizing motion capture data encoded as a skeleton with angles."""

View file

@ -81,7 +81,7 @@ class TanhWarpingFunction(WarpingFunction):
iterations: number of N.R. iterations
"""
y = y.copy()
z = np.ones_like(y)
@ -176,7 +176,7 @@ class TanhWarpingFunction_d(WarpingFunction):
mpsi = psi.copy()
d = psi[-1]
mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
#3. transform data
z = d*y.copy()
for i in range(len(mpsi)):
@ -185,7 +185,7 @@ class TanhWarpingFunction_d(WarpingFunction):
return z
def f_inv(self, y, psi, iterations = 30):
def f_inv(self, z, psi, max_iterations = 1000):
"""
calculate the numerical inverse of f
@ -194,13 +194,19 @@ class TanhWarpingFunction_d(WarpingFunction):
"""
y = y.copy()
z = np.ones_like(y)
z = z.copy()
y = np.ones_like(z)
it = 0
update = np.inf
for i in range(iterations):
z -= (self.f(z, psi) - y)/self.fgrad_y(z,psi)
return z
while it == 0 or (np.abs(update).sum() > 1e-10 and it < max_iterations):
update = (self.f(y, psi) - z)/self.fgrad_y(y, psi)
y -= update
it += 1
if it == max_iterations:
print "WARNING!!! Maximum number of iterations reached in f_inv "
return y
def fgrad_y(self, y, psi, return_precalc = False):

Binary file not shown.

Before

Width:  |  Height:  |  Size: 37 KiB

After

Width:  |  Height:  |  Size: 75 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 78 KiB

After

Width:  |  Height:  |  Size: 130 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 61 KiB

After

Width:  |  Height:  |  Size: 62 KiB

Before After
Before After

View file

@ -55,18 +55,18 @@ In ``GPy``, kernel objects can be added or multiplied. In both cases, two kinds
* a kernel over :math:`\mathbb{R} \times \mathbb{R}`: :math:`k(x,y) = k_1(x,y) \times k_2(x,y)`
* a kernel over :math:`\mathbb{R}^2 \times \mathbb{R}^2`: :math:`k(\mathbf{x},\mathbf{y}) = k_1(x_1,y_1) \times k_2(x_2,y_2)`
These two options are available in GPy under the name ``prod`` and ``prod_orthogonal`` (resp. ``add`` and ``add_orthogonal`` for the addition). Here is a quick example ::
These two options are available in GPy using the flag ``tensor`` in the ``add`` and ``prod`` functions. Here is a quick example ::
k1 = GPy.kern.rbf(1,1.,2.)
k2 = GPy.kern.Matern32(1, 0.5, 0.2)
# Product of kernels
k_prod = k1.prod(k2)
k_prodorth = k1.prod_orthogonal(k2)
k_prod = k1.prod(k2) # By default, tensor=False
k_prodtens = k1.prod(k2,tensor=True)
# Sum of kernels
k_add = k1.add(k2)
k_addorth = k1.add_orthogonal(k2)
k_add = k1.add(k2) # By default, tensor=False
k_addtens = k1.add(k2,tensor=True)
.. # plots
pb.figure(figsize=(8,8))
@ -74,21 +74,21 @@ These two options are available in GPy under the name ``prod`` and ``prod_orthog
k_prod.plot()
pb.title('prod')
pb.subplot(2,2,2)
k_prodorth.plot()
pb.title('prod_orthogonal')
k_prodtens.plot()
pb.title('tensor prod')
pb.subplot(2,2,3)
k_add.plot()
pb.title('add')
pb.title('sum')
pb.subplot(2,2,4)
k_addorth.plot()
pb.title('add_orthogonal')
k_addtens.plot()
pb.title('tensor sum')
pb.subplots_adjust(wspace=0.3, hspace=0.3)
.. figure:: Figures/tuto_kern_overview_multadd.png
:align: center
:height: 500px
A shortcut for ``add`` and ``prod`` is provided by the usual ``+`` and ``*`` operators. Here is another example where we create a periodic kernel with some decay ::
A shortcut for ``add`` and ``prod`` (with default flag ``tensor=False``) is provided by the usual ``+`` and ``*`` operators. Here is another example where we create a periodic kernel with some decay ::
k1 = GPy.kern.rbf(1,1.,2)
k2 = GPy.kern.periodic_Matern52(1,variance=1e3, lengthscale=1, period = 1.5, lower=-5., upper = 5)
@ -113,7 +113,7 @@ A shortcut for ``add`` and ``prod`` is provided by the usual ``+`` and ``*`` ope
:align: center
:height: 300px
In general, ``kern`` objects can be seen as a sum of ``kernparts`` objects, where the later are covariance functions denied on the same space. For example, the following code ::
In general, ``kern`` objects can be seen as a sum of ``kernparts`` objects, where the later are covariance functions defined on the same space. For example, the following code ::
k = (k1+k2)*(k1+k2)
print k.parts[0].name, '\n', k.parts[1].name, '\n', k.parts[2].name, '\n', k.parts[3].name
@ -184,7 +184,7 @@ Let us assume that we want to define an ANOVA kernel with a Matern 3/2 kernel fo
k_cst = GPy.kern.bias(1,variance=1.)
k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3)
Kanova = (k_cst + k_mat).prod_orthogonal(k_cst + k_mat)
Kanova = (k_cst + k_mat).prod(k_cst + k_mat,tensor=True)
print Kanova
Printing the resulting kernel outputs the following ::
@ -236,14 +236,14 @@ The submodels can be represented with the option ``which_function`` of ``plot``:
pb.subplot(1,5,2)
pb.ylabel("= ",rotation='horizontal',fontsize='30')
pb.subplot(1,5,3)
m.plot(which_functions=[False,True,False,False])
m.plot(which_parts=[False,True,False,False])
pb.ylabel("cst +",rotation='horizontal',fontsize='30')
pb.subplot(1,5,4)
m.plot(which_functions=[False,False,True,False])
m.plot(which_parts=[False,False,True,False])
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
pb.subplot(1,5,5)
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
m.plot(which_functions=[False,False,False,True])
m.plot(which_parts=[False,False,False,True])
.. pb.savefig('tuto_kern_overview_mANOVAdec.png',bbox_inches='tight')
@ -252,7 +252,8 @@ The submodels can be represented with the option ``which_function`` of ``plot``:
:height: 250px
.. import pylab as pb
.. # code
import pylab as pb
import numpy as np
import GPy
pb.ion()