Merge branch 'devel' into mrd

This commit is contained in:
Max Zwiessele 2013-05-14 11:47:44 +01:00
commit 1f19d40d89
24 changed files with 553 additions and 419 deletions

View file

@ -134,7 +134,7 @@ def BGPLVM_oil(optimize=True, N=100, Q=10, M=10, max_f_eval=1e3, plot=False, **k
plt.sca(latent_axes) plt.sca(latent_axes)
m.plot_latent() m.plot_latent()
data_show = GPy.util.visualize.vector_show(y) data_show = GPy.util.visualize.vector_show(y)
lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], m, data_show, latent_axes=latent_axes) # , sense_axes=sense_axes) lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes) # , sense_axes=sense_axes)
raw_input('Press enter to finish') raw_input('Press enter to finish')
plt.close('all') plt.close('all')
# # plot # # plot
@ -425,7 +425,7 @@ def brendan_faces():
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0, :] y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False) data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :], m, data_show, ax) lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
plt.close('all') plt.close('all')
@ -438,11 +438,12 @@ def stick():
# optimize # optimize
m.ensure_default_constraints() m.ensure_default_constraints()
m.optimize(messages=1, max_f_eval=10000) m.optimize(messages=1, max_f_eval=10000)
m._set_params(m._get_params())
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0, :] y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :], m, data_show, ax) lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
plt.close('all') plt.close('all')
@ -464,7 +465,7 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True):
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0, :] y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.skeleton_show(y[None, :], data['skel']) data_show = GPy.util.visualize.skeleton_show(y[None, :], data['skel'])
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :], m, data_show, ax) lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
plt.close('all') plt.close('all')

View file

@ -97,51 +97,66 @@ class opt_SGD(Optimizer):
return subset return subset
def shift_constraints(self, j): def shift_constraints(self, j):
# back them up
bounded_i = copy.deepcopy(self.model.constrained_bounded_indices)
bounded_l = copy.deepcopy(self.model.constrained_bounded_lowers)
bounded_u = copy.deepcopy(self.model.constrained_bounded_uppers)
for b in range(len(bounded_i)): # for each group of constraints constrained_indices = copy.deepcopy(self.model.constrained_indices)
for bc in range(len(bounded_i[b])):
pos = np.where(j == bounded_i[b][bc])[0] for c, constraint in enumerate(constrained_indices):
mask = (np.ones_like(constrained_indices[c]) == 1)
for i in range(len(constrained_indices[c])):
pos = np.where(j == constrained_indices[c][i])[0]
if len(pos) == 1: if len(pos) == 1:
pos2 = np.where(self.model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0] self.model.constrained_indices[c][i] = pos
self.model.constrained_bounded_indices[b][pos2] = pos[0]
else: else:
if len(self.model.constrained_bounded_indices[b]) == 1: mask[i] = False
# if it's the last index to be removed
# the logic here is just a mess. If we remove the last one, then all the
# b-indices change and we have to iterate through everything to find our
# current index. Can't deal with this right now.
raise NotImplementedError
else: # just remove it from the indices self.model.constrained_indices[c] = self.model.constrained_indices[c][mask]
mask = self.model.constrained_bounded_indices[b] != bc return constrained_indices
self.model.constrained_bounded_indices[b] = self.model.constrained_bounded_indices[b][mask] # back them up
# bounded_i = copy.deepcopy(self.model.constrained_bounded_indices)
# bounded_l = copy.deepcopy(self.model.constrained_bounded_lowers)
# bounded_u = copy.deepcopy(self.model.constrained_bounded_uppers)
# for b in range(len(bounded_i)): # for each group of constraints
# for bc in range(len(bounded_i[b])):
# pos = np.where(j == bounded_i[b][bc])[0]
# if len(pos) == 1:
# pos2 = np.where(self.model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0]
# self.model.constrained_bounded_indices[b][pos2] = pos[0]
# else:
# if len(self.model.constrained_bounded_indices[b]) == 1:
# # if it's the last index to be removed
# # the logic here is just a mess. If we remove the last one, then all the
# # b-indices change and we have to iterate through everything to find our
# # current index. Can't deal with this right now.
# raise NotImplementedError
# else: # just remove it from the indices
# mask = self.model.constrained_bounded_indices[b] != bc
# self.model.constrained_bounded_indices[b] = self.model.constrained_bounded_indices[b][mask]
# here we shif the positive constraints. We cycle through each positive # # here we shif the positive constraints. We cycle through each positive
# constraint # # constraint
positive = self.model.constrained_positive_indices.copy() # positive = self.model.constrained_positive_indices.copy()
mask = (np.ones_like(positive) == 1) # mask = (np.ones_like(positive) == 1)
for p in range(len(positive)): # for p in range(len(positive)):
# we now check whether the constrained index appears in the j vector # # we now check whether the constrained index appears in the j vector
# (the vector of the "active" indices) # # (the vector of the "active" indices)
pos = np.where(j == self.model.constrained_positive_indices[p])[0] # pos = np.where(j == self.model.constrained_positive_indices[p])[0]
if len(pos) == 1: # if len(pos) == 1:
self.model.constrained_positive_indices[p] = pos # self.model.constrained_positive_indices[p] = pos
else: # else:
mask[p] = False # mask[p] = False
self.model.constrained_positive_indices = self.model.constrained_positive_indices[mask] # self.model.constrained_positive_indices = self.model.constrained_positive_indices[mask]
return (bounded_i, bounded_l, bounded_u), positive # return (bounded_i, bounded_l, bounded_u), positive
def restore_constraints(self, b, p): def restore_constraints(self, c):#b, p):
self.model.constrained_bounded_indices = b[0] # self.model.constrained_bounded_indices = b[0]
self.model.constrained_bounded_lowers = b[1] # self.model.constrained_bounded_lowers = b[1]
self.model.constrained_bounded_uppers = b[2] # self.model.constrained_bounded_uppers = b[2]
self.model.constrained_positive_indices = p # self.model.constrained_positive_indices = p
self.model.constrained_indices = c
def get_param_shapes(self, N = None, Q = None): def get_param_shapes(self, N = None, Q = None):
model_name = self.model.__class__.__name__ model_name = self.model.__class__.__name__
@ -168,9 +183,15 @@ class opt_SGD(Optimizer):
if self.model.N == 0 or Y.std() == 0.0: if self.model.N == 0 or Y.std() == 0.0:
return 0, step, self.model.N return 0, step, self.model.N
self.model.likelihood._mean = Y.mean() self.model.likelihood._bias = Y.mean()
self.model.likelihood._std = Y.std() self.model.likelihood._scale = Y.std()
self.model.likelihood.set_data(Y) self.model.likelihood.set_data(Y)
# self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision
sigma = self.model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache
self.model.likelihood._set_params(sigma)
j = self.subset_parameter_vector(self.x_opt, samples, shapes) j = self.subset_parameter_vector(self.x_opt, samples, shapes)
self.model.X = X[samples] self.model.X = X[samples]
@ -181,27 +202,30 @@ class opt_SGD(Optimizer):
self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T) self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T)
self.model.likelihood.trYYT = np.trace(self.model.likelihood.YYT) self.model.likelihood.trYYT = np.trace(self.model.likelihood.YYT)
b, p = self.shift_constraints(j) ci = self.shift_constraints(j)
f, fp = f_fp(self.x_opt[j]) f, fp = f_fp(self.x_opt[j])
step[j] = self.momentum * step[j] + self.learning_rate[j] * fp step[j] = self.momentum * step[j] + self.learning_rate[j] * fp
self.x_opt[j] -= step[j] self.x_opt[j] -= step[j]
self.restore_constraints(ci)
self.restore_constraints(b, p) self.model.grads[j] = fp
# restore likelihood _mean and _std, otherwise when we call set_data(y) on # restore likelihood _bias and _scale, otherwise when we call set_data(y) on
# the next feature, it will get normalized with the mean and std of this one. # the next feature, it will get normalized with the mean and std of this one.
self.model.likelihood._mean = 0 self.model.likelihood._bias = 0
self.model.likelihood._std = 1 self.model.likelihood._scale = 1
return f, step, self.model.N return f, step, self.model.N
def opt(self, f_fp=None, f=None, fp=None): def opt(self, f_fp=None, f=None, fp=None):
self.x_opt = self.model._get_params_transformed() self.x_opt = self.model._get_params_transformed()
self.model.grads = np.zeros_like(self.x_opt)
X, Y = self.model.X.copy(), self.model.likelihood.Y.copy() X, Y = self.model.X.copy(), self.model.likelihood.Y.copy()
self.model.likelihood.YYT = None self.model.likelihood.YYT = None
self.model.likelihood.trYYT = None self.model.likelihood.trYYT = None
self.model.likelihood._mean = 0.0 self.model.likelihood._bias = 0.0
self.model.likelihood._std = 1.0 self.model.likelihood._scale = 1.0
N, Q = self.model.X.shape N, Q = self.model.X.shape
D = self.model.likelihood.Y.shape[1] D = self.model.likelihood.Y.shape[1]
@ -225,6 +249,11 @@ class opt_SGD(Optimizer):
self.model.D = len(j) self.model.D = len(j)
self.model.likelihood.D = len(j) self.model.likelihood.D = len(j)
self.model.likelihood.set_data(Y[:, j]) self.model.likelihood.set_data(Y[:, j])
# self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision
sigma = self.model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache
self.model.likelihood._set_params(sigma)
if missing_data: if missing_data:
shapes = self.get_param_shapes(N, Q) shapes = self.get_param_shapes(N, Q)
@ -250,7 +279,6 @@ class opt_SGD(Optimizer):
# plt.clf() # plt.clf()
# plt.plot(self.param_traces['noise']) # plt.plot(self.param_traces['noise'])
# import pdb; pdb.set_trace()
# for k in self.param_traces.keys(): # for k in self.param_traces.keys():
# self.param_traces[k].append(self.model.get(k)[0]) # self.param_traces[k].append(self.model.get(k)[0])
@ -262,6 +290,9 @@ class opt_SGD(Optimizer):
self.model.likelihood.N = N self.model.likelihood.N = N
self.model.likelihood.D = D self.model.likelihood.D = D
self.model.likelihood.Y = Y self.model.likelihood.Y = Y
sigma = self.model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache
self.model.likelihood._set_params(sigma)
self.trace.append(self.f_opt) self.trace.append(self.f_opt)
if self.iteration_file is not None: if self.iteration_file is not None:

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
try: try:
from constructors import rbf_sympy, sympykern # these depend on sympy from constructors import rbf_sympy, sympykern # these depend on sympy
except: except:

View file

@ -38,7 +38,6 @@ class bias(kernpart):
def dK_dtheta(self,dL_dKdiag,X,X2,target): def dK_dtheta(self,dL_dKdiag,X,X2,target):
target += dL_dKdiag.sum() target += dL_dKdiag.sum()
def dKdiag_dtheta(self,dL_dKdiag,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += dL_dKdiag.sum() target += dL_dKdiag.sum()

View file

@ -20,7 +20,6 @@ from periodic_exponential import periodic_exponential as periodic_exponentialpar
from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part
from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part
from prod import prod as prodpart from prod import prod as prodpart
from prod_orthogonal import prod_orthogonal as prod_orthogonalpart
from symmetric import symmetric as symmetric_part from symmetric import symmetric as symmetric_part
from coregionalise import coregionalise as coregionalise_part from coregionalise import coregionalise as coregionalise_part
from rational_quadratic import rational_quadratic as rational_quadraticpart from rational_quadratic import rational_quadratic as rational_quadraticpart
@ -255,7 +254,7 @@ def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,
part = periodic_Matern52part(D,variance, lengthscale, period, n_freq, lower, upper) part = periodic_Matern52part(D,variance, lengthscale, period, n_freq, lower, upper)
return kern(D, [part]) return kern(D, [part])
def prod(k1,k2): def prod(k1,k2,tensor=False):
""" """
Construct a product kernel over D from two kernels over D Construct a product kernel over D from two kernels over D
@ -263,19 +262,8 @@ def prod(k1,k2):
:type k1, k2: kernpart :type k1, k2: kernpart
:rtype: kernel object :rtype: kernel object
""" """
part = prodpart(k1,k2) part = prodpart(k1,k2,tensor)
return kern(k1.D, [part]) return kern(part.D, [part])
def prod_orthogonal(k1,k2):
"""
Construct a product kernel over D1 x D2 from a kernel over D1 and another over D2.
:param k1, k2: the kernels to multiply
:type k1, k2: kernpart
:rtype: kernel object
"""
part = prod_orthogonalpart(k1,k2)
return kern(k1.D+k2.D, [part])
def symmetric(k): def symmetric(k):
""" """

View file

@ -7,7 +7,6 @@ import pylab as pb
from ..core.parameterised import parameterised from ..core.parameterised import parameterised
from kernpart import kernpart from kernpart import kernpart
import itertools import itertools
from prod_orthogonal import prod_orthogonal
from prod import prod from prod import prod
from ..util.linalg import symmetrify from ..util.linalg import symmetrify
@ -84,96 +83,72 @@ class kern(parameterised):
count += p.Nparam count += p.Nparam
def __add__(self, other): def __add__(self, other):
assert self.D == other.D """
newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices) Shortcut for `add`.
# transfer constraints: """
newkern.constrained_indices = self.constrained_indices + [i+self.Nparam for i in other.constrained_indices] return self.add(other)
newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
return newkern
def add(self, other): def add(self, other,tensor=False):
""" """
Add another kernel to this one. Both kernels are defined on the same _space_ Add another kernel to this one. Both kernels are defined on the same _space_
:param other: the other kernel to be added :param other: the other kernel to be added
:type other: GPy.kern :type other: GPy.kern
""" """
return self + other if tensor:
D = self.D + other.D
self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices]
def add_orthogonal(self, other): newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
"""
Add another kernel to this one. Both kernels are defined on separate spaces
:param other: the other kernel to be added
:type other: GPy.kern
"""
# deal with input slices
D = self.D + other.D
self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices]
newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices) # transfer constraints:
newkern.constrained_indices = self.constrained_indices + [x+self.Nparam for x in other.constrained_indices]
# transfer constraints: newkern.constraints = self.constraints + other.constraints
newkern.constrained_indices = self.constrained_indices + [x+self.Nparam for x in other.constrained_indices] newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
newkern.constraints = self.constraints + other.constraints newkern.fixed_values = self.fixed_values + other.fixed_values
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices] newkern.constraints = self.constraints + other.constraints
newkern.fixed_values = self.fixed_values + other.fixed_values newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
newkern.constraints = self.constraints + other.constraints else:
newkern.constrained_bounded_uppers = self.constrained_bounded_uppers + other.constrained_bounded_uppers assert self.D == other.D
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices] newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices)
# transfer constraints:
newkern.constrained_indices = self.constrained_indices + [i+self.Nparam for i in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
return newkern return newkern
def __mul__(self, other): def __mul__(self, other):
""" """
Shortcut for `prod_orthogonal`. Note that `+` assumes that we sum 2 kernels defines on the same space whereas `*` assumes that the kernels are defined on different subspaces. Shortcut for `prod`.
""" """
return self.prod(other) return self.prod(other)
def prod(self, other): def prod(self, other,tensor=False):
""" """
multiply two kernels defined on the same spaces. multiply two kernels (either on the same space, or on the tensor product of the input space)
:param other: the other kernel to be added :param other: the other kernel to be added
:type other: GPy.kern :type other: GPy.kern
""" """
K1 = self.copy() K1 = self.copy()
K2 = other.copy() K2 = other.copy()
newkernparts = [prod(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)]
slices = [] slices = []
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices): for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
s1, s2 = [False] * K1.D, [False] * K2.D s1, s2 = [False]*K1.D, [False]*K2.D
s1[sl1], s2[sl2] = [True], [True] s1[sl1], s2[sl2] = [True], [True]
slices += [s1 + s2] slices += [s1+s2]
newkernparts = [prod(k1, k2,tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)]
if tensor:
newkern = kern(K1.D + K2.D, newkernparts, slices)
else:
newkern = kern(K1.D, newkernparts, slices)
newkern = kern(K1.D, newkernparts, slices)
newkern._follow_constrains(K1, K2) newkern._follow_constrains(K1, K2)
return newkern
def prod_orthogonal(self, other):
"""
multiply two kernels. Both kernels are defined on separate spaces.
:param other: the other kernel to be added
:type other: GPy.kern
"""
K1 = self.copy()
K2 = other.copy()
newkernparts = [prod_orthogonal(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)]
slices = []
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
s1, s2 = [False] * K1.D, [False] * K2.D
s1[sl1], s2[sl2] = [True], [True]
slices += [s1 + s2]
newkern = kern(K1.D + K2.D, newkernparts, slices)
newkern._follow_constrains(K1, K2)
return newkern return newkern
def _follow_constrains(self, K1, K2): def _follow_constrains(self, K1, K2):
@ -277,7 +252,7 @@ class kern(parameterised):
which_parts = [True]*self.Nparts which_parts = [True]*self.Nparts
assert X.shape[1] == self.D assert X.shape[1] == self.D
target = np.zeros(X.shape[0]) target = np.zeros(X.shape[0])
[p.Kdiag(X[:, i_s], target=target) for p, i_s in zip(self.parts, self.input_slices)] [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self.parts, self.input_slices, which_parts) if part_on]
return target return target
def dKdiag_dtheta(self, dL_dKdiag, X): def dKdiag_dtheta(self, dL_dKdiag, X):
@ -469,9 +444,9 @@ class kern(parameterised):
return target_mu, target_S return target_mu, target_S
def plot(self, x=None, plot_limits=None, which_functions='all', resolution=None, *args, **kwargs): def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs):
if which_functions == 'all': if which_parts == 'all':
which_functions = [True] * self.Nparts which_parts = [True] * self.Nparts
if self.D == 1: if self.D == 1:
if x is None: if x is None:
x = np.zeros((1, 1)) x = np.zeros((1, 1))
@ -488,7 +463,7 @@ class kern(parameterised):
raise ValueError, "Bad limits for plotting" raise ValueError, "Bad limits for plotting"
Xnew = np.linspace(xmin, xmax, resolution or 201)[:, None] Xnew = np.linspace(xmin, xmax, resolution or 201)[:, None]
Kx = self.K(Xnew, x, slices2=which_functions) Kx = self.K(Xnew, x, which_parts)
pb.plot(Xnew, Kx, *args, **kwargs) pb.plot(Xnew, Kx, *args, **kwargs)
pb.xlim(xmin, xmax) pb.xlim(xmin, xmax)
pb.xlabel("x") pb.xlabel("x")
@ -514,7 +489,7 @@ class kern(parameterised):
xg = np.linspace(xmin[0], xmax[0], resolution) xg = np.linspace(xmin[0], xmax[0], resolution)
yg = np.linspace(xmin[1], xmax[1], resolution) yg = np.linspace(xmin[1], xmax[1], resolution)
Xnew = np.vstack((xx.flatten(), yy.flatten())).T Xnew = np.vstack((xx.flatten(), yy.flatten())).T
Kx = self.K(Xnew, x, slices2=which_functions) Kx = self.K(Xnew, x, which_parts)
Kx = Kx.reshape(resolution, resolution).T Kx = Kx.reshape(resolution, resolution).T
pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs) pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs)
pb.xlim(xmin[0], xmax[0]) pb.xlim(xmin[0], xmax[0])

View file

@ -4,108 +4,108 @@
from kernpart import kernpart from kernpart import kernpart
import numpy as np import numpy as np
import hashlib import hashlib
#from scipy import integrate # This may not be necessary (Nicolas, 20th Feb)
class prod(kernpart): class prod(kernpart):
""" """
Computes the product of 2 kernels that are defined on the same space Computes the product of 2 kernels
:param k1, k2: the kernels to multiply :param k1, k2: the kernels to multiply
:type k1, k2: kernpart :type k1, k2: kernpart
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
:type tensor: Boolean
:rtype: kernel object :rtype: kernel object
""" """
def __init__(self,k1,k2): def __init__(self,k1,k2,tensor=False):
assert k1.D == k2.D, "Error: The input spaces of the kernels to multiply must have the same dimension"
self.D = k1.D
self.Nparam = k1.Nparam + k2.Nparam self.Nparam = k1.Nparam + k2.Nparam
self.name = k1.name + '<times>' + k2.name self.name = k1.name + '<times>' + k2.name
self.k1 = k1 self.k1 = k1
self.k2 = k2 self.k2 = k2
if tensor:
self.D = k1.D + k2.D
self.slice1 = slice(0,self.k1.D)
self.slice2 = slice(self.k1.D,self.k1.D+self.k2.D)
else:
assert k1.D == k2.D, "Error: The input spaces of the kernels to sum don't have the same dimension."
self.D = k1.D
self.slice1 = slice(0,self.D)
self.slice2 = slice(0,self.D)
self._X, self._X2, self._params = np.empty(shape=(3,1))
self._set_params(np.hstack((k1._get_params(),k2._get_params()))) self._set_params(np.hstack((k1._get_params(),k2._get_params())))
def _get_params(self): def _get_params(self):
"""return the value of the parameters.""" """return the value of the parameters."""
return self.params return np.hstack((self.k1._get_params(), self.k2._get_params()))
def _set_params(self,x): def _set_params(self,x):
"""set the value of the parameters.""" """set the value of the parameters."""
self.k1._set_params(x[:self.k1.Nparam]) self.k1._set_params(x[:self.k1.Nparam])
self.k2._set_params(x[self.k1.Nparam:]) self.k2._set_params(x[self.k1.Nparam:])
self.params = x
def _get_param_names(self): def _get_param_names(self):
"""return parameter names.""" """return parameter names."""
return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()] return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()]
def K(self,X,X2,target): def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2.""" self._K_computations(X,X2)
if X2 is None: target += self._K1 * self._K2
target1 = np.zeros((X.shape[0],X2.shape[0]))
target2 = np.zeros((X.shape[0],X2.shape[0]))
else:
target1 = np.zeros((X.shape[0],X.shape[0]))
target2 = np.zeros((X.shape[0],X.shape[0]))
self.k1.K(X,X2,target1)
self.k2.K(X,X2,target2)
target += target1 * target2
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
target1 = np.zeros((X.shape[0],))
target2 = np.zeros((X.shape[0],))
self.k1.Kdiag(X,target1)
self.k2.Kdiag(X,target2)
target += target1 * target2
def dK_dtheta(self,dL_dK,X,X2,target): def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X self._K_computations(X,X2)
K1 = np.zeros((X.shape[0],X2.shape[0])) if X2 is None:
K2 = np.zeros((X.shape[0],X2.shape[0])) self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.Nparam])
self.k1.K(X,X2,K1) self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.Nparam:])
self.k2.K(X,X2,K2) else:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.Nparam])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.Nparam:])
k1_target = np.zeros(self.k1.Nparam) def Kdiag(self,X,target):
k2_target = np.zeros(self.k2.Nparam) """Compute the diagonal of the covariance matrix associated to X."""
self.k1.dK_dtheta(dL_dK*K2, X, X2, k1_target) target1 = np.zeros(X.shape[0])
self.k2.dK_dtheta(dL_dK*K1, X, X2, k2_target) target2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],target1)
self.k2.Kdiag(X[:,self.slice2],target2)
target += target1 * target2
target[:self.k1.Nparam] += k1_target def dKdiag_dtheta(self,dL_dKdiag,X,target):
target[self.k1.Nparam:] += k2_target K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2)
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.Nparam])
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.Nparam:])
def dK_dX(self,dL_dK,X,X2,target): def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X self._K_computations(X,X2)
K1 = np.zeros((X.shape[0],X2.shape[0])) self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target)
K2 = np.zeros((X.shape[0],X2.shape[0])) self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target)
self.k1.K(X,X2,K1)
self.k2.K(X,X2,K2)
self.k1.dK_dX(dL_dK*K2, X, X2, target) def dKdiag_dX(self, dL_dKdiag, X, target):
self.k2.dK_dX(dL_dK*K1, X, X2, target) K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2)
def dKdiag_dX(self,dL_dKdiag,X,target): self.k1.dK_dX(dL_dKdiag*K2, X[:,self.slice1], target)
target1 = np.zeros((X.shape[0],)) self.k2.dK_dX(dL_dKdiag*K1, X[:,self.slice2], target)
target2 = np.zeros((X.shape[0],))
self.k1.Kdiag(X,target1)
self.k2.Kdiag(X,target2)
self.k1.dKdiag_dX(dL_dKdiag*target2, X, target) def _K_computations(self,X,X2):
self.k2.dKdiag_dX(dL_dKdiag*target1, X, target) if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
self._X = X.copy()
def dKdiag_dtheta(self,dL_dKdiag,X,target): self._params == self._get_params().copy()
"""Compute the diagonal of the covariance matrix associated to X.""" if X2 is None:
target1 = np.zeros((X.shape[0],)) self._X2 = None
target2 = np.zeros((X.shape[0],)) self._K1 = np.zeros((X.shape[0],X.shape[0]))
self.k1.Kdiag(X,target1) self._K2 = np.zeros((X.shape[0],X.shape[0]))
self.k2.Kdiag(X,target2) self.k1.K(X[:,self.slice1],None,self._K1)
self.k2.K(X[:,self.slice2],None,self._K2)
k1_target = np.zeros(self.k1.Nparam) else:
k2_target = np.zeros(self.k2.Nparam) self._X2 = X2.copy()
self.k1.dKdiag_dtheta(dL_dKdiag*target2, X, k1_target) self._K1 = np.zeros((X.shape[0],X2.shape[0]))
self.k2.dKdiag_dtheta(dL_dKdiag*target1, X, k2_target) self._K2 = np.zeros((X.shape[0],X2.shape[0]))
self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1)
target[:self.k1.Nparam] += k1_target self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2)
target[self.k1.Nparam:] += k2_target

View file

@ -1,6 +1,6 @@
import numpy as np import numpy as np
from scipy import stats, linalg from scipy import stats, linalg
from ..util.linalg import pdinv,mdot,jitchol from ..util.linalg import pdinv,mdot,jitchol,DSYR
from likelihood import likelihood from likelihood import likelihood
class EP(likelihood): class EP(likelihood):
@ -113,11 +113,12 @@ class EP(likelihood):
#Site parameters update #Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau self.tau_tilde[i] += Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v self.v_tilde[i] += Delta_v
#Posterior distribution parameters update #Posterior distribution parameters update
si=Sigma[:,i].reshape(self.N,1) DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i])))
Sigma = Sigma - Delta_tau/(1.+ Delta_tau*Sigma[i,i])*np.dot(si,si.T) #si=Sigma[:,i:i+1]
#Sigma -= Delta_tau/(1.+ Delta_tau*Sigma[i,i])*np.dot(si,si.T)#DSYR
mu = np.dot(Sigma,self.v_tilde) mu = np.dot(Sigma,self.v_tilde)
self.iterations += 1 self.iterations += 1
#Sigma recomptutation with Cholesky decompositon #Sigma recomptutation with Cholesky decompositon

View file

@ -7,6 +7,7 @@ from scipy import stats
import scipy as sp import scipy as sp
import pylab as pb import pylab as pb
from ..util.plot import gpplot from ..util.plot import gpplot
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
class likelihood_function: class likelihood_function:
""" """
@ -37,11 +38,11 @@ class probit(likelihood_function):
:param tau_i: precision of the cavity distribution (float) :param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float) :param v_i: mean/variance of the cavity distribution (float)
""" """
if data_i == 0: data_i = -1 #NOTE Binary classification algorithm works better with classes {-1,1}, 1D-plotting works better with classes {0,1}. #if data_i == 0: data_i = -1 #NOTE Binary classification algorithm works better with classes {-1,1}, 1D-plotting works better with classes {0,1}.
# TODO: some version of assert # TODO: some version of assert
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i) z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = stats.norm.cdf(z) Z_hat = std_norm_cdf(z)
phi = stats.norm.pdf(z) phi = std_norm_pdf(z)
mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i)) mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat) sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
return Z_hat, mu_hat, sigma2_hat return Z_hat, mu_hat, sigma2_hat

View file

@ -16,29 +16,6 @@ def backsub_both_sides(L,X):
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T
class FITC(sparse_GP): class FITC(sparse_GP):
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
self.Z = Z
self.M = self.Z.shape[0]
self.true_precision = likelihood.precision
#ERASEME
#N = likelihood.Y.size
#self.beta_star = np.random.rand(N,1)
#self.Kmm_ = kernel.K(self.Z).copy()
#self.Kmmi_,a,b,c = pdinv(self.Kmm_)
#self.psi1_ = kernel.K(self.Z,X).copy()
sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False)
def _set_params(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
self._compute_kernel_matrices()
self.scale_factor = 1.
self._computations()
def update_likelihood_approximation(self): def update_likelihood_approximation(self):
""" """
@ -56,34 +33,15 @@ class FITC(sparse_GP):
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP self._set_params(self._get_params()) # update the GP
#@profile
def _computations(self): def _computations(self):
#factor Kmm #factor Kmm
self.Lm = jitchol(self.Kmm) self.Lm = jitchol(self.Kmm)
self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1) self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1) Lmipsi1 = np.dot(self.Lmi,self.psi1)
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1) self.Qnn = np.dot(Lmipsi1.T,Lmipsi1).copy()
self.Diag0 = self.psi0 - np.diag(self.Qnn) self.Diag0 = self.psi0 - np.diag(self.Qnn)
#TODO eraseme
"""
self.psi1 = self.psi1_
self.Lm = jitchol(self.Kmm_)
self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1)
#self.true_psi1 = self.kern.K(self.Z,self.X)
#self.Qnn = mdot(self.true_psi1.T,self.Lmi.T,self.Lmi,self.true_psi1)
self.Kmmi, a,b,c = pdinv(self.Kmm)
self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1)
#self.Diag0 = self.psi0 #- np.diag(self.Qnn)
self.Diag0 = - np.diag(self.Qnn)
#Kmmi,Lm,Lmi,logdetK = pdinv(self.Kmm)
#self.Lambda = self.Kmmi_ + mdot(self.Kmmi_,self.psi1_,self.beta_star*self.psi1_.T,self.Kmmi_) + np.eye(self.M)*100
#self.Lambdai, LLm, LLmi, self.logdetLambda = pdinv(self.Lambda)
"""
#TODO uncomment
self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #Includes Diag0 in the precision self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #Includes Diag0 in the precision
self.V_star = self.beta_star * self.likelihood.Y self.V_star = self.beta_star * self.likelihood.Y
@ -92,7 +50,7 @@ class FITC(sparse_GP):
raise NotImplementedError raise NotImplementedError
else: else:
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
assert self.likelihood.D == 1 # TODO: what if the likelihood is heterscedatic and there are multiple independent outputs? assert self.likelihood.D == 1
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.N))) tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.N)))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1) tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp) self.A = tdot(tmp)
@ -107,74 +65,122 @@ class FITC(sparse_GP):
tmp, info1 = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0) tmp, info1 = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(tmp), lower=1, trans=0) self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(tmp), lower=1, trans=0)
# dlogbeta_dtheta
Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1) Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1)
b_psi1_Ki = self.beta_star * Kmmipsi1.T b_psi1_Ki = self.beta_star * Kmmipsi1.T
Ki_pbp_Ki = np.dot(Kmmipsi1,b_psi1_Ki) Ki_pbp_Ki = np.dot(Kmmipsi1,b_psi1_Ki)
dlogB_dpsi0 = -.5*self.kern.dKdiag_dtheta(self.beta_star,X=self.X)
dlogB_dpsi1 = self.kern.dK_dtheta(b_psi1_Ki,self.X,self.Z)
dlogB_dKmm = -.5*self.kern.dK_dtheta(Ki_pbp_Ki,X=self.Z)
self.dlogbeta_dtheta = dlogB_dpsi0 + dlogB_dpsi1 + dlogB_dKmm
# dyby_dtheta
Kmmi = np.dot(self.Lmi.T,self.Lmi) Kmmi = np.dot(self.Lmi.T,self.Lmi)
LBiLmi = np.dot(self.LBi,self.Lmi)
LBL_inv = np.dot(LBiLmi.T,LBiLmi)
VVT = np.outer(self.V_star,self.V_star) VVT = np.outer(self.V_star,self.V_star)
VV_p_Ki = np.dot(VVT,Kmmipsi1.T) VV_p_Ki = np.dot(VVT,Kmmipsi1.T)
Ki_pVVp_Ki = np.dot(Kmmipsi1,VV_p_Ki) Ki_pVVp_Ki = np.dot(Kmmipsi1,VV_p_Ki)
dyby_dpsi0 = .5 * self.kern.dKdiag_dtheta(self.V_star**2,self.X)
dyby_dpsi1 = 0
dyby_dKmm = 0
dyby_dtheta = dyby_dpsi0
for psi1_n,V_n,X_n in zip(self.psi1.T,self.V_star,self.X):
dyby_dpsi1 = -V_n**2 * np.dot(psi1_n[None,:],Kmmi)
dyby_dtheta += self.kern.dK_dtheta(dyby_dpsi1,X_n[:,None],self.Z)
for psi1_n,V_n,X_n in zip(self.psi1.T,self.V_star,self.X):
psin_K = np.dot(psi1_n[None,:],Kmmi)
tmp = np.dot(psin_K.T,psin_K)
dyby_dKmm = .5*V_n**2 * tmp
dyby_dtheta += self.kern.dK_dtheta(dyby_dKmm,self.Z)
self.dyby_dtheta = dyby_dtheta
# dlogB_dtheta : C
#C_B
dC_B = -.5*Kmmi
C_B = self.kern.dK_dtheta(dC_B,self.Z) #check
#C_A
LBiLmi = np.dot(self.LBi,self.Lmi)
LBL_inv = np.dot(LBiLmi.T,LBiLmi)
dC_AA = .5*LBL_inv
C_AA = self.kern.dK_dtheta(dC_AA,self.Z) #check
#C_AB
psi1beta = self.psi1*self.beta_star.T psi1beta = self.psi1*self.beta_star.T
dC_ABA = mdot(LBL_inv,psi1beta,Kmmipsi1.T) H = self.Kmm + mdot(self.psi1,self.beta_star*self.psi1.T)
C_ABA = self.kern.dK_dtheta(dC_ABA,self.Z) Hi, LH, LHi, logdetH = pdinv(H)
dC_ABB = -np.dot(psi1beta.T,LBL_inv) #check
C_ABB = self.kern.dK_dtheta(dC_ABB,self.X,self.Z) #check
# C_ABC
betapsi1TLmiLBi = np.dot(psi1beta.T,LBiLmi.T) betapsi1TLmiLBi = np.dot(psi1beta.T,LBiLmi.T)
alpha = np.array([np.dot(a.T,a) for a in betapsi1TLmiLBi])[:,None] alpha = np.array([np.dot(a.T,a) for a in betapsi1TLmiLBi])[:,None]
dC_ABCA = .5 *alpha gamma_1 = mdot(VVT,self.psi1.T,Hi)
C_ABCA = self.kern.dKdiag_dtheta(dC_ABCA,self.X) #check pHip = mdot(self.psi1.T,Hi,self.psi1)
gamma_2 = mdot(self.beta_star*pHip,self.V_star)
gamma_3 = self.V_star * mdot(self.V_star.T,pHip*self.beta_star).T
C_ABCB = 0 dA_dpsi0_1 = -0.5 * self.beta_star
for psi1_n,alpha_n,X_n in zip(self.psi1.T,alpha,self.X): dA_dpsi0 = .5 * self.V_star**2
dC_ABCB_n = - alpha_n * np.dot(psi1_n[None,:],Kmmi)
C_ABCB += self.kern.dK_dtheta(dC_ABCB_n,X_n[:,None],self.Z) #check dC_dpsi0 = .5 *alpha
dD_dpsi0 = 0.5*mdot(self.beta_star*pHip,self.V_star)**2
dD_dpsi1 = gamma_1
dD_dpsi1 += -mdot(psi1beta.T,Hi,self.psi1,gamma_1)
dD_dpsi0 += -self.V_star * mdot(self.V_star.T,pHip*self.beta_star).T
dA_dpsi1 = b_psi1_Ki
dC_dpsi1 = -np.dot(psi1beta.T,LBL_inv)
dA_dKmm = -0.5 * np.dot(Kmmipsi1,b_psi1_Ki)
dC_dKmm = -.5*Kmmi
dC_dKmm += .5*LBL_inv
dC_dKmm += mdot(LBL_inv,psi1beta,Kmmipsi1.T)
dD_dKmm = -.5 * mdot(Hi,self.psi1,gamma_1)
dA_dpsi0_theta = self.kern.dKdiag_dtheta(dA_dpsi0,X=self.X)
dA_dpsi1_theta = 0
dA_dpsi1_X = 0
dA_dKmm_theta = 0
dA_dKmm_X = 0
_dC_dpsi1_dtheta = 0
_dC_dpsi1_dX = 0
_dC_dKmm_dtheta = 0
_dC_dKmm_dX = 0
_dD_dpsi1_dtheta_1 = 0
_dD_dpsi1_dX_1 = 0
_dD_dKmm_dtheta_1 = 0
_dD_dKmm_dX_1 = 0
_dD_dpsi1_dtheta_2 = 0
_dD_dpsi1_dX_2 = 0
_dD_dKmm_dtheta_2 = 0
_dD_dKmm_dX_2 = 0
for psi1_n,V_n,X_n,alpha_n,gamma_n,gamma_k in zip(self.psi1.T,self.V_star,self.X,alpha,gamma_2,gamma_3):
C_ABCC = 0
for psi1_n,alpha_n,X_n in zip(self.psi1.T,alpha,self.X):
psin_K = np.dot(psi1_n[None,:],Kmmi) psin_K = np.dot(psi1_n[None,:],Kmmi)
tmp = np.dot(psin_K.T,psin_K)
dC_ABCC = .5 * alpha_n * tmp
C_ABCC += self.kern.dK_dtheta(dC_ABCC,self.Z) #check
self.dlogB_dtheta = C_B + C_AA + C_ABA + C_ABB + C_ABCA + C_ABCB + C_ABCC _dA_dpsi1 = -V_n**2 * np.dot(psi1_n[None,:],Kmmi)
_dC_dpsi1 = - alpha_n * np.dot(psi1_n[None,:],Kmmi)
_dD_dpsi1_1 = - gamma_n**2 * np.dot(psi1_n[None,:],Kmmi)
_dD_dpsi1_2 = 2. * gamma_k * np.dot(psi1_n[None,:],Kmmi)
_dA_dKmm = .5*V_n**2 * np.dot(psin_K.T,psin_K)
_dC_dKmm = .5 * alpha_n * np.dot(psin_K.T,psin_K)
_dD_dKmm_1 = .5*gamma_n**2 * np.dot(psin_K.T,psin_K)
_dD_dKmm_2 = - gamma_n * np.dot(psin_K.T,psin_K)
dA_dpsi1_theta += self.kern.dK_dtheta(_dA_dpsi1,X_n[None,:],self.Z)
_dC_dpsi1_dtheta += self.kern.dK_dtheta(_dC_dpsi1,X_n[None,:],self.Z)
_dD_dpsi1_dtheta_1 += self.kern.dK_dtheta(_dD_dpsi1_1,X_n[None,:],self.Z)
_dD_dpsi1_dtheta_2 += self.kern.dK_dtheta(_dD_dpsi1_2,X_n[None,:],self.Z)
dA_dKmm_theta += self.kern.dK_dtheta(_dA_dKmm,self.Z)
_dC_dKmm_dtheta += self.kern.dK_dtheta(_dC_dKmm,self.Z)
_dD_dKmm_dtheta_1 += self.kern.dK_dtheta(_dD_dKmm_1,self.Z)
_dD_dKmm_dtheta_2 += self.kern.dK_dtheta(_dD_dKmm_2,self.Z)
dA_dpsi1_X += self.kern.dK_dX(_dA_dpsi1.T,self.Z,X_n[None,:])
_dC_dpsi1_dX += self.kern.dK_dX(_dC_dpsi1.T,self.Z,X_n[None,:])
_dD_dpsi1_dX_1 += self.kern.dK_dX(_dD_dpsi1_1.T,self.Z,X_n[None,:])
_dD_dpsi1_dX_2 += self.kern.dK_dX(_dD_dpsi1_2.T,self.Z,X_n[None,:])
dA_dKmm_X += 2.*self.kern.dK_dX(_dA_dKmm,self.Z)
_dC_dKmm_dX += 2.*self.kern.dK_dX(_dC_dKmm,self.Z)
_dD_dKmm_dX_1 += 2.*self.kern.dK_dX(_dD_dKmm_1,self.Z)
_dD_dKmm_dX_2 += 2.*self.kern.dK_dX(_dD_dKmm_2,self.Z)
dA_dX_1 = self.kern.dK_dX(dA_dpsi1.T,self.Z,self.X) + 2. * self.kern.dK_dX(dA_dKmm,X=self.Z)
dA_dtheta_1 = self.kern.dKdiag_dtheta(dA_dpsi0_1,X=self.X) + self.kern.dK_dtheta(dA_dpsi1,self.X,self.Z) + self.kern.dK_dtheta(dA_dKmm,X=self.Z)
dA_dtheta_2 = dA_dpsi0_theta + dA_dpsi1_theta + dA_dKmm_theta
dA_dX_2 = dA_dpsi1_X + dA_dKmm_X
self.dA_dtheta = dA_dtheta_1 + dA_dtheta_2
self.dA_dX = dA_dX_1 + dA_dX_2
self.dlogB_dtheta = self.kern.dK_dtheta(dC_dKmm,self.Z) + self.kern.dK_dtheta(dC_dpsi1,self.X,self.Z) + self.kern.dKdiag_dtheta(dC_dpsi0,self.X) + _dC_dpsi1_dtheta + _dC_dKmm_dtheta
self.dlogB_dX = 2.*self.kern.dK_dX(dC_dKmm,self.Z) + self.kern.dK_dX(dC_dpsi1.T,self.Z,self.X) + _dC_dpsi1_dX + _dC_dKmm_dX
self.dD_dtheta = self.kern.dKdiag_dtheta(dD_dpsi0,self.X) + self.kern.dK_dtheta(dD_dKmm,self.Z) + self.kern.dK_dtheta(dD_dpsi1,self.X,self.Z) + _dD_dpsi1_dtheta_2 + _dD_dKmm_dtheta_2 + _dD_dpsi1_dtheta_1 + _dD_dKmm_dtheta_1
self.dD_dX = 2.*self.kern.dK_dX(dD_dKmm,self.Z) + self.kern.dK_dX(dD_dpsi1.T,self.Z,self.X) + _dD_dpsi1_dX_2 + _dD_dKmm_dX_2 + _dD_dpsi1_dX_1 + _dD_dKmm_dX_1
# the partial derivative vector for the likelihood # the partial derivative vector for the likelihood
@ -185,64 +191,117 @@ class FITC(sparse_GP):
raise NotImplementedError, "heteroscedatic derivates not implemented" raise NotImplementedError, "heteroscedatic derivates not implemented"
else: else:
# likelihood is not heterscedatic # likelihood is not heterscedatic
self.partial_for_likelihood = 0 #FIXME dbstar_dnoise = self.likelihood.precision * (self.beta_star**2 * self.Diag0[:,None] - self.beta_star)
#self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2 Lmi_psi1 = mdot(self.Lmi,self.psi1)
#self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision) LBiLmipsi1 = np.dot(self.LBi,Lmi_psi1)
#self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V))) aux_0 = np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_2 = np.dot(LBiLmipsi1.T,self._LBi_Lmi_psi1V)
dA_dnoise = 0.5 * self.D * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.D * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T)
alpha = mdot(LBiLmipsi1,self.V_star)
alpha_ = mdot(LBiLmipsi1.T,alpha)
dD_dnoise_2 = -0.5 * self.D * np.sum(alpha_**2 * dbstar_dnoise )
dD_dnoise_1 = mdot(self.V_star.T,self.psi1.T,self.Lmi.T,self.LBi.T,self.LBi,self.Lmi,self.psi1,dbstar_dnoise*self.likelihood.Y)
dD_dnoise_2 = 0.5*mdot(self.V_star.T,self.psi1.T,Hi,self.psi1,dbstar_dnoise*self.psi1.T,Hi,self.psi1,self.V_star)
dD_dnoise = dD_dnoise_1 + dD_dnoise_2
self.partial_for_likelihood = dA_dnoise + dC_dnoise + dD_dnoise
def log_likelihood(self): def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """ """ Compute the (lower bound on the) log marginal likelihood """
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y) A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
C = -self.D * (np.sum(np.log(np.diag(self.LB)))) C = -self.D * (np.sum(np.log(np.diag(self.LB))))
"""
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
#B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
C = -self.D * (np.sum(np.log(np.diag(self.LB))))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + C + D # +B return A + C + D
"""
return A+C
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
pass pass
return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood))) return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
def dL_dtheta(self): def dL_dtheta(self):
#dL_dtheta = self.dlogB_dtheta
#dL_dtheta = self.dyby_dtheta
#dL_dtheta = self.dlogbeta_dtheta + self.dyby_dtheta
dL_dtheta = self.dlogB_dtheta
dL_dtheta = self.dlogbeta_dtheta + self.dyby_dtheta + self.dlogB_dtheta
"""
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance) raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else: else:
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.Z, self.X) dL_dtheta = self.dA_dtheta + self.dlogB_dtheta + self.dD_dtheta
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
"""
return dL_dtheta return dL_dtheta
def dL_dZ(self): def dL_dZ(self):
dL_dZ = np.zeros(self.M)
"""
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance) raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else: else:
dL_dZ += self.kern.dK_dX(self.dL_dpsi1, self.Z, self.X) dL_dZ = self.dA_dX + self.dlogB_dX + self.dD_dX
"""
return dL_dZ return dL_dZ
def _raw_predict(self, Xnew, which_parts, full_cov=False):
if self.likelihood.is_heteroscedastic:
Iplus_Dprod_i = 1./(1.+ self.Diag0 * self.likelihood.precision.flatten())
self.Diag = self.Diag0 * Iplus_Dprod_i
self.P = Iplus_Dprod_i[:,None] * self.psi1.T
self.RPT0 = np.dot(self.Lmi,self.psi1)
self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T))
self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
self.w = self.Diag * self.likelihood.v_tilde
self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde))
self.mu = self.w + np.dot(self.P,self.gamma)
"""
Make a prediction for the generalized FITC model
Arguments
---------
X : Input prediction data - Nx1 numpy array (floats)
"""
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
# Ci = I + (RPT0)Di(RPT0).T
# C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T
# = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T
# = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
# = I - V.T * V
U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1)
C = np.eye(self.M) - np.dot(V.T,V)
mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:])
#self.C = C
#self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
#self.mu_u = mu_u
#self.U = U
# q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T)
mu_H = np.dot(mu_u,self.mu)
self.mu_H = mu_H
Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
KR0T = np.dot(Kx.T,self.Lmi.T)
mu_star = np.dot(KR0T,mu_H)
if full_cov:
Kxx = self.kern.K(Xnew,which_parts=which_parts)
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
else:
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
Kxx_ = self.kern.K(Xnew,which_parts=which_parts) # TODO: RA, is this line needed?
var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) # TODO: RA, is this line needed?
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None]
return mu_star[:,None],var
else:
raise NotImplementedError, "homoscedastic fitc not implemented"
"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu,var[:,None]
"""

View file

@ -3,10 +3,11 @@
import numpy as np import numpy as np
from scipy import linalg
import pylab as pb import pylab as pb
from .. import kern from .. import kern
from ..core import model from ..core import model
from ..util.linalg import pdinv, mdot from ..util.linalg import pdinv, mdot, tdot
from ..util.plot import gpplot, x_frame1D, x_frame2D, Tango from ..util.plot import gpplot, x_frame1D, x_frame2D, Tango
from ..likelihoods import EP from ..likelihoods import EP
@ -58,13 +59,12 @@ class GP(model):
""" """
TODO: one day we might like to learn Z by gradient methods? TODO: one day we might like to learn Z by gradient methods?
""" """
#FIXME: this doesn;t live here.
return np.zeros_like(self.Z) return np.zeros_like(self.Z)
def _set_params(self, p): def _set_params(self, p):
self.kern._set_params_transformed(p[:self.kern.Nparam_transformed()]) self.kern._set_params_transformed(p[:self.kern.Nparam_transformed()])
# self.likelihood._set_params(p[self.kern.Nparam:]) # test by Nicolas self.likelihood._set_params(p[self.kern.Nparam_transformed():])
self.likelihood._set_params(p[self.kern.Nparam_transformed():]) # test by Nicolas
self.K = self.kern.K(self.X) self.K = self.kern.K(self.X)
self.K += self.likelihood.covariance_matrix self.K += self.likelihood.covariance_matrix
@ -73,10 +73,14 @@ class GP(model):
# the gradient of the likelihood wrt the covariance matrix # the gradient of the likelihood wrt the covariance matrix
if self.likelihood.YYT is None: if self.likelihood.YYT is None:
alpha = np.dot(self.Ki, self.likelihood.Y) #alpha = np.dot(self.Ki, self.likelihood.Y)
self.dL_dK = 0.5 * (np.dot(alpha, alpha.T) - self.D * self.Ki) alpha,_ = linalg.lapack.flapack.dpotrs(self.L, self.likelihood.Y,lower=1)
self.dL_dK = 0.5 * (tdot(alpha) - self.D * self.Ki)
else: else:
tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki) #tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1)
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
self.dL_dK = 0.5 * (tmp - self.D * self.Ki) self.dL_dK = 0.5 * (tmp - self.D * self.Ki)
def _get_params(self): def _get_params(self):
@ -100,7 +104,9 @@ class GP(model):
Computes the model fit using YYT if it's available Computes the model fit using YYT if it's available
""" """
if self.likelihood.YYT is None: if self.likelihood.YYT is None:
return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y))) tmp, _ = linalg.lapack.flapack.dtrtrs(self.L, np.asfortranarray(self.likelihood.Y), lower=1)
return -0.5 * np.sum(np.square(tmp))
#return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y)))
else: else:
return -0.5 * np.sum(np.multiply(self.Ki, self.likelihood.YYT)) return -0.5 * np.sum(np.multiply(self.Ki, self.likelihood.YYT))
@ -123,18 +129,15 @@ class GP(model):
""" """
return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False): def _raw_predict(self, _Xnew, which_parts='all', full_cov=False,stop=False):
""" """
Internal helper function for making predictions, does not account Internal helper function for making predictions, does not account
for normalization or likelihood for normalization or likelihood
#TODO: which_parts does nothing
""" """
Kx = self.kern.K(self.X, _Xnew,which_parts=which_parts) Kx = self.kern.K(_Xnew,self.X,which_parts=which_parts).T
mu = np.dot(np.dot(Kx.T, self.Ki), self.likelihood.Y) #KiKx = np.dot(self.Ki, Kx)
KiKx = np.dot(self.Ki, Kx) KiKx, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(Kx), lower=1)
mu = np.dot(KiKx.T, self.likelihood.Y)
if full_cov: if full_cov:
Kxx = self.kern.K(_Xnew, which_parts=which_parts) Kxx = self.kern.K(_Xnew, which_parts=which_parts)
var = Kxx - np.dot(KiKx.T, Kx) var = Kxx - np.dot(KiKx.T, Kx)
@ -142,6 +145,8 @@ class GP(model):
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts) Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
var = Kxx - np.sum(np.multiply(KiKx, Kx), 0) var = Kxx - np.sum(np.multiply(KiKx, Kx), 0)
var = var[:, None] var = var[:, None]
if stop:
debug_this
return mu, var return mu, var
@ -178,7 +183,8 @@ class GP(model):
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False): def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False):
""" """
Plot the GP's view of the world, where the data is normalized and the likelihood is Gaussian Plot the GP's view of the world, where the data is normalized and the
likelihood is Gaussian.
:param samples: the number of a posteriori samples to plot :param samples: the number of a posteriori samples to plot
:param which_data: which if the training data to plot (default all) :param which_data: which if the training data to plot (default all)
@ -193,8 +199,8 @@ class GP(model):
- In two dimsensions, a contour-plot shows the mean predicted function - In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, we've no implemented this yet !TODO! - In higher dimensions, we've no implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions using which_data and which_functions Can plot only part of the data and part of the posterior functions
Plot the data's view of the world, with non-normalized values and GP predictions passed through the likelihood using which_data and which_functions
""" """
if which_data == 'all': if which_data == 'all':
which_data = slice(None) which_data = slice(None)

View file

@ -45,7 +45,7 @@ class GPLVM(GP):
return np.hstack((self.X.flatten(), GP._get_params(self))) return np.hstack((self.X.flatten(), GP._get_params(self)))
def _set_params(self,x): def _set_params(self,x):
self.X = x[:self.X.size].reshape(self.N,self.Q).copy() self.X = x[:self.N*self.Q].reshape(self.N,self.Q).copy()
GP._set_params(self, x[self.X.size:]) GP._set_params(self, x[self.X.size:])
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):

View file

@ -12,4 +12,4 @@ from sparse_GPLVM import sparse_GPLVM
from Bayesian_GPLVM import Bayesian_GPLVM from Bayesian_GPLVM import Bayesian_GPLVM
from mrd import MRD from mrd import MRD
from generalized_FITC import generalized_FITC from generalized_FITC import generalized_FITC
#from FITC import FITC from FITC import FITC

View file

@ -3,17 +3,12 @@
import numpy as np import numpy as np
import pylab as pb import pylab as pb
from ..util.linalg import mdot, jitchol, tdot, symmetrify from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides
from ..util.plot import gpplot from ..util.plot import gpplot
from .. import kern from .. import kern
from GP import GP from GP import GP
from scipy import linalg from scipy import linalg
def backsub_both_sides(L, X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1)
return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T
class sparse_GP(GP): class sparse_GP(GP):
""" """
Variational sparse GP model Variational sparse GP model

View file

@ -23,6 +23,7 @@ class warpedGP(GP):
self.warping_function = TanhWarpingFunction_d(warping_terms) self.warping_function = TanhWarpingFunction_d(warping_terms)
self.warping_params = (np.random.randn(self.warping_function.n_terms*3+1,) * 1) self.warping_params = (np.random.randn(self.warping_function.n_terms*3+1,) * 1)
Y = self._scale_data(Y)
self.has_uncertain_inputs = False self.has_uncertain_inputs = False
self.Y_untransformed = Y.copy() self.Y_untransformed = Y.copy()
self.predict_in_warped_space = False self.predict_in_warped_space = False
@ -30,6 +31,14 @@ class warpedGP(GP):
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X) GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
def _scale_data(self, Y):
self._Ymax = Y.max()
self._Ymin = Y.min()
return (Y-self._Ymin)/(self._Ymax-self._Ymin) - 0.5
def _unscale_data(self, Y):
return (Y + 0.5)*(self._Ymax - self._Ymin) + self._Ymin
def _set_params(self, x): def _set_params(self, x):
self.warping_params = x[:self.warping_function.num_parameters] self.warping_params = x[:self.warping_function.num_parameters]
Y = self.transform_data() Y = self.transform_data()
@ -79,5 +88,5 @@ class warpedGP(GP):
if self.predict_in_warped_space: if self.predict_in_warped_space:
mu = self.warping_function.f_inv(mu, self.warping_params) mu = self.warping_function.f_inv(mu, self.warping_params)
var = self.warping_function.f_inv(var, self.warping_params) var = self.warping_function.f_inv(var, self.warping_params)
mu = self._unscale_data(mu)
return mu, var return mu, var

View file

@ -124,8 +124,9 @@ def pdinv(A, *args):
L = jitchol(A, *args) L = jitchol(A, *args)
logdet = 2.*np.sum(np.log(np.diag(L))) logdet = 2.*np.sum(np.log(np.diag(L)))
Li = chol_inv(L) Li = chol_inv(L)
Ai = linalg.lapack.flapack.dpotri(L)[0] Ai, _ = linalg.lapack.flapack.dpotri(L)
Ai = np.tril(Ai) + np.tril(Ai,-1).T #Ai = np.tril(Ai) + np.tril(Ai,-1).T
symmetrify(Ai)
return Ai, L, Li, logdet return Ai, L, Li, logdet
@ -235,6 +236,18 @@ def tdot(*args, **kwargs):
else: else:
return tdot_numpy(*args,**kwargs) return tdot_numpy(*args,**kwargs)
def DSYR(A,x,alpha=1.):
N = c_int(A.shape[0])
LDA = c_int(A.shape[0])
UPLO = c_char('l')
ALPHA = c_double(alpha)
A_ = A.ctypes.data_as(ctypes.c_void_p)
x_ = x.ctypes.data_as(ctypes.c_void_p)
INCX = c_int(1)
_blaslib.dsyr_(byref(UPLO), byref(N), byref(ALPHA),
x_, byref(INCX), A_, byref(LDA))
symmetrify(A,upper=True)
def symmetrify(A,upper=False): def symmetrify(A,upper=False):
""" """
Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
@ -244,33 +257,38 @@ def symmetrify(A,upper=False):
N,M = A.shape N,M = A.shape
assert N==M assert N==M
c_contig_code = """ c_contig_code = """
int iN;
for (int i=1; i<N; i++){ for (int i=1; i<N; i++){
iN = i*N;
for (int j=0; j<i; j++){ for (int j=0; j<i; j++){
A[i+j*N] = A[i*N+j]; A[i+j*N] = A[iN+j];
} }
} }
""" """
f_contig_code = """ f_contig_code = """
int iN;
for (int i=1; i<N; i++){ for (int i=1; i<N; i++){
iN = i*N;
for (int j=0; j<i; j++){ for (int j=0; j<i; j++){
A[i*N+j] = A[i+j*N]; A[iN+j] = A[i+j*N];
} }
} }
""" """
if A.flags['C_CONTIGUOUS'] and upper: if A.flags['C_CONTIGUOUS'] and upper:
weave.inline(f_contig_code,['A','N']) weave.inline(f_contig_code,['A','N'], extra_compile_args=['-O3'])
elif A.flags['C_CONTIGUOUS'] and not upper: elif A.flags['C_CONTIGUOUS'] and not upper:
weave.inline(c_contig_code,['A','N']) weave.inline(c_contig_code,['A','N'], extra_compile_args=['-O3'])
elif A.flags['F_CONTIGUOUS'] and upper: elif A.flags['F_CONTIGUOUS'] and upper:
weave.inline(c_contig_code,['A','N']) weave.inline(c_contig_code,['A','N'], extra_compile_args=['-O3'])
elif A.flags['F_CONTIGUOUS'] and not upper: elif A.flags['F_CONTIGUOUS'] and not upper:
weave.inline(f_contig_code,['A','N']) weave.inline(f_contig_code,['A','N'], extra_compile_args=['-O3'])
else: else:
tmp = np.tril(A) tmp = np.tril(A)
A[:] = 0.0 A[:] = 0.0
A += tmp A += tmp
A += np.tril(tmp,-1).T A += np.tril(tmp,-1).T
def symmetrify_murray(A): def symmetrify_murray(A):
A += A.T A += A.T
nn = A.shape[0] nn = A.shape[0]
@ -303,3 +321,13 @@ def cholupdate(L,x):
x = x.copy() x = x.copy()
N = x.size N = x.size
weave.inline(code, support_code=support_code, arg_names=['N','L','x'], type_converters=weave.converters.blitz) weave.inline(code, support_code=support_code, arg_names=['N','L','x'], type_converters=weave.converters.blitz)
def backsub_both_sides(L, X,transpose='left'):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
if transpose=='left':
tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1)
return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T
else:
tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=0)
return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=0)[0].T

View file

@ -0,0 +1,35 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import weave
def std_norm_pdf(x):
"""Standard Gaussian density function"""
return 1./np.sqrt(2.*np.pi)*np.exp(-.5*x**2)
def std_norm_cdf(x):
"""
Cumulative standard Gaussian distribution
Based on Abramowitz, M. and Stegun, I. (1970)
"""
support_code = "#include <math.h>"
code = """
double sign = 1.0;
if (x < 0.0){
sign = -1.0;
x = -x;
}
x = x/sqrt(2.0);
double t = 1.0/(1.0 + 0.3275911*x);
double erf = 1. - exp(-x*x)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))));
return_val = 0.5*(1.0 + sign*erf);
"""
x = float(x)
return weave.inline(code,arg_names=['x'],support_code=support_code)

View file

@ -14,7 +14,7 @@ class data_show:
""" """
def __init__(self, vals, axes=None): def __init__(self, vals, axes=None):
self.vals = vals self.vals = vals.copy()
# If no axes are defined, create some. # If no axes are defined, create some.
if axes==None: if axes==None:
fig = plt.figure() fig = plt.figure()
@ -32,12 +32,12 @@ class vector_show(data_show):
""" """
def __init__(self, vals, axes=None): def __init__(self, vals, axes=None):
data_show.__init__(self, vals, axes) data_show.__init__(self, vals, axes)
self.vals = vals.T self.vals = vals.T.copy()
self.handle = self.axes.plot(np.arange(0, len(vals))[:, None], self.vals)[0] self.handle = self.axes.plot(np.arange(0, len(vals))[:, None], self.vals)[0]
def modify(self, vals): def modify(self, vals):
xdata, ydata = self.handle.get_data() xdata, ydata = self.handle.get_data()
self.vals = vals.T self.vals = vals.T.copy()
self.handle.set_data(xdata, self.vals) self.handle.set_data(xdata, self.vals)
self.axes.figure.canvas.draw() self.axes.figure.canvas.draw()
@ -52,7 +52,7 @@ class lvm(data_show):
:param latent_axes: the axes where the latent visualization should be plotted. :param latent_axes: the axes where the latent visualization should be plotted.
""" """
if vals == None: if vals == None:
vals = model.X[0] vals = model.X[0].copy()
data_show.__init__(self, vals, axes=latent_axes) data_show.__init__(self, vals, axes=latent_axes)
@ -83,7 +83,6 @@ class lvm(data_show):
def modify(self, vals): def modify(self, vals):
"""When latent values are modified update the latent representation and ulso update the output visualization.""" """When latent values are modified update the latent representation and ulso update the output visualization."""
y = self.model.predict(vals)[0] y = self.model.predict(vals)[0]
self.data_visualize.modify(y) self.data_visualize.modify(y)
self.latent_handle.set_data(vals[self.latent_index[0]], vals[self.latent_index[1]]) self.latent_handle.set_data(vals[self.latent_index[0]], vals[self.latent_index[1]])
@ -216,11 +215,11 @@ class image_show(data_show):
plt.show() plt.show()
def set_image(self, vals): def set_image(self, vals):
self.vals = np.reshape(vals, self.dimensions, order='F') self.vals = np.reshape(vals, self.dimensions, order='F').copy()
if self.transpose: if self.transpose:
self.vals = self.vals.T self.vals = self.vals.T.copy()
if not self.scale: if not self.scale:
self.vals = self.vals self.vals = self.vals.copy()
#if self.invert: #if self.invert:
# self.vals = -self.vals # self.vals = -self.vals
@ -304,7 +303,7 @@ class stick_show(mocap_data_show):
mocap_data_show.__init__(self, vals, axes, connect) mocap_data_show.__init__(self, vals, axes, connect)
def process_values(self, vals): def process_values(self, vals):
self.vals = vals.reshape((3, vals.shape[1]/3)).T self.vals = vals.reshape((3, vals.shape[1]/3)).T.copy()
class skeleton_show(mocap_data_show): class skeleton_show(mocap_data_show):
"""data_show class for visualizing motion capture data encoded as a skeleton with angles.""" """data_show class for visualizing motion capture data encoded as a skeleton with angles."""

View file

@ -185,7 +185,7 @@ class TanhWarpingFunction_d(WarpingFunction):
return z return z
def f_inv(self, y, psi, iterations = 30): def f_inv(self, z, psi, max_iterations = 1000):
""" """
calculate the numerical inverse of f calculate the numerical inverse of f
@ -194,13 +194,19 @@ class TanhWarpingFunction_d(WarpingFunction):
""" """
y = y.copy() z = z.copy()
z = np.ones_like(y) y = np.ones_like(z)
it = 0
update = np.inf
for i in range(iterations): while it == 0 or (np.abs(update).sum() > 1e-10 and it < max_iterations):
z -= (self.f(z, psi) - y)/self.fgrad_y(z,psi) update = (self.f(y, psi) - z)/self.fgrad_y(y, psi)
y -= update
it += 1
if it == max_iterations:
print "WARNING!!! Maximum number of iterations reached in f_inv "
return z return y
def fgrad_y(self, y, psi, return_precalc = False): def fgrad_y(self, y, psi, return_precalc = False):

Binary file not shown.

Before

Width:  |  Height:  |  Size: 37 KiB

After

Width:  |  Height:  |  Size: 75 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 78 KiB

After

Width:  |  Height:  |  Size: 130 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 61 KiB

After

Width:  |  Height:  |  Size: 62 KiB

Before After
Before After

View file

@ -55,18 +55,18 @@ In ``GPy``, kernel objects can be added or multiplied. In both cases, two kinds
* a kernel over :math:`\mathbb{R} \times \mathbb{R}`: :math:`k(x,y) = k_1(x,y) \times k_2(x,y)` * a kernel over :math:`\mathbb{R} \times \mathbb{R}`: :math:`k(x,y) = k_1(x,y) \times k_2(x,y)`
* a kernel over :math:`\mathbb{R}^2 \times \mathbb{R}^2`: :math:`k(\mathbf{x},\mathbf{y}) = k_1(x_1,y_1) \times k_2(x_2,y_2)` * a kernel over :math:`\mathbb{R}^2 \times \mathbb{R}^2`: :math:`k(\mathbf{x},\mathbf{y}) = k_1(x_1,y_1) \times k_2(x_2,y_2)`
These two options are available in GPy under the name ``prod`` and ``prod_orthogonal`` (resp. ``add`` and ``add_orthogonal`` for the addition). Here is a quick example :: These two options are available in GPy using the flag ``tensor`` in the ``add`` and ``prod`` functions. Here is a quick example ::
k1 = GPy.kern.rbf(1,1.,2.) k1 = GPy.kern.rbf(1,1.,2.)
k2 = GPy.kern.Matern32(1, 0.5, 0.2) k2 = GPy.kern.Matern32(1, 0.5, 0.2)
# Product of kernels # Product of kernels
k_prod = k1.prod(k2) k_prod = k1.prod(k2) # By default, tensor=False
k_prodorth = k1.prod_orthogonal(k2) k_prodtens = k1.prod(k2,tensor=True)
# Sum of kernels # Sum of kernels
k_add = k1.add(k2) k_add = k1.add(k2) # By default, tensor=False
k_addorth = k1.add_orthogonal(k2) k_addtens = k1.add(k2,tensor=True)
.. # plots .. # plots
pb.figure(figsize=(8,8)) pb.figure(figsize=(8,8))
@ -74,21 +74,21 @@ These two options are available in GPy under the name ``prod`` and ``prod_orthog
k_prod.plot() k_prod.plot()
pb.title('prod') pb.title('prod')
pb.subplot(2,2,2) pb.subplot(2,2,2)
k_prodorth.plot() k_prodtens.plot()
pb.title('prod_orthogonal') pb.title('tensor prod')
pb.subplot(2,2,3) pb.subplot(2,2,3)
k_add.plot() k_add.plot()
pb.title('add') pb.title('sum')
pb.subplot(2,2,4) pb.subplot(2,2,4)
k_addorth.plot() k_addtens.plot()
pb.title('add_orthogonal') pb.title('tensor sum')
pb.subplots_adjust(wspace=0.3, hspace=0.3) pb.subplots_adjust(wspace=0.3, hspace=0.3)
.. figure:: Figures/tuto_kern_overview_multadd.png .. figure:: Figures/tuto_kern_overview_multadd.png
:align: center :align: center
:height: 500px :height: 500px
A shortcut for ``add`` and ``prod`` is provided by the usual ``+`` and ``*`` operators. Here is another example where we create a periodic kernel with some decay :: A shortcut for ``add`` and ``prod`` (with default flag ``tensor=False``) is provided by the usual ``+`` and ``*`` operators. Here is another example where we create a periodic kernel with some decay ::
k1 = GPy.kern.rbf(1,1.,2) k1 = GPy.kern.rbf(1,1.,2)
k2 = GPy.kern.periodic_Matern52(1,variance=1e3, lengthscale=1, period = 1.5, lower=-5., upper = 5) k2 = GPy.kern.periodic_Matern52(1,variance=1e3, lengthscale=1, period = 1.5, lower=-5., upper = 5)
@ -113,7 +113,7 @@ A shortcut for ``add`` and ``prod`` is provided by the usual ``+`` and ``*`` ope
:align: center :align: center
:height: 300px :height: 300px
In general, ``kern`` objects can be seen as a sum of ``kernparts`` objects, where the later are covariance functions denied on the same space. For example, the following code :: In general, ``kern`` objects can be seen as a sum of ``kernparts`` objects, where the later are covariance functions defined on the same space. For example, the following code ::
k = (k1+k2)*(k1+k2) k = (k1+k2)*(k1+k2)
print k.parts[0].name, '\n', k.parts[1].name, '\n', k.parts[2].name, '\n', k.parts[3].name print k.parts[0].name, '\n', k.parts[1].name, '\n', k.parts[2].name, '\n', k.parts[3].name
@ -184,7 +184,7 @@ Let us assume that we want to define an ANOVA kernel with a Matern 3/2 kernel fo
k_cst = GPy.kern.bias(1,variance=1.) k_cst = GPy.kern.bias(1,variance=1.)
k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3) k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3)
Kanova = (k_cst + k_mat).prod_orthogonal(k_cst + k_mat) Kanova = (k_cst + k_mat).prod(k_cst + k_mat,tensor=True)
print Kanova print Kanova
Printing the resulting kernel outputs the following :: Printing the resulting kernel outputs the following ::
@ -236,14 +236,14 @@ The submodels can be represented with the option ``which_function`` of ``plot``:
pb.subplot(1,5,2) pb.subplot(1,5,2)
pb.ylabel("= ",rotation='horizontal',fontsize='30') pb.ylabel("= ",rotation='horizontal',fontsize='30')
pb.subplot(1,5,3) pb.subplot(1,5,3)
m.plot(which_functions=[False,True,False,False]) m.plot(which_parts=[False,True,False,False])
pb.ylabel("cst +",rotation='horizontal',fontsize='30') pb.ylabel("cst +",rotation='horizontal',fontsize='30')
pb.subplot(1,5,4) pb.subplot(1,5,4)
m.plot(which_functions=[False,False,True,False]) m.plot(which_parts=[False,False,True,False])
pb.ylabel("+ ",rotation='horizontal',fontsize='30') pb.ylabel("+ ",rotation='horizontal',fontsize='30')
pb.subplot(1,5,5) pb.subplot(1,5,5)
pb.ylabel("+ ",rotation='horizontal',fontsize='30') pb.ylabel("+ ",rotation='horizontal',fontsize='30')
m.plot(which_functions=[False,False,False,True]) m.plot(which_parts=[False,False,False,True])
.. pb.savefig('tuto_kern_overview_mANOVAdec.png',bbox_inches='tight') .. pb.savefig('tuto_kern_overview_mANOVAdec.png',bbox_inches='tight')
@ -252,7 +252,8 @@ The submodels can be represented with the option ``which_function`` of ``plot``:
:height: 250px :height: 250px
.. import pylab as pb .. # code
import pylab as pb
import numpy as np import numpy as np
import GPy import GPy
pb.ion() pb.ion()