Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
Ricardo 2013-05-10 17:55:00 +01:00
commit a500c526dd
14 changed files with 174 additions and 198 deletions

View file

@ -50,7 +50,9 @@ class logexp_clipped(transformation):
ef = np.exp(f) ef = np.exp(f)
gf = (ef - 1.) / ef gf = (ef - 1.) / ef
return np.where(f < 1e-6, 0, gf) return np.where(f < 1e-6, 0, gf)
def initialize(self, f): def initialize(self,f):
if np.any(f<0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f) return np.abs(f)
def __str__(self): def __str__(self):
return '(+ve)' return '(+ve)'
@ -65,6 +67,8 @@ class exponent(transformation):
def gradfactor(self, f): def gradfactor(self, f):
return f return f
def initialize(self, f): def initialize(self, f):
if np.any(f<0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f) return np.abs(f)
def __str__(self): def __str__(self):
return '(+ve)' return '(+ve)'
@ -79,6 +83,8 @@ class negative_exponent(transformation):
def gradfactor(self, f): def gradfactor(self, f):
return f return f
def initialize(self, f): def initialize(self, f):
if np.any(f>0.):
print "Warning: changing parameters to satisfy constraints"
return -np.abs(f) return -np.abs(f)
def __str__(self): def __str__(self):
return '(-ve)' return '(-ve)'
@ -108,9 +114,11 @@ class logistic(transformation):
def finv(self, f): def finv(self, f):
return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf)) return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf))
def gradfactor(self, f): def gradfactor(self, f):
return (f - self.lower) * (self.upper - f) / self.difference return (f-self.lower)*(self.upper-f)/self.difference
def initialize(self, f): def initialize(self,f):
return self.f(f * 0.) if np.any(np.logical_or(f<self.lower,f>self.upper)):
print "Warning: changing parameters to satisfy constraints"
return np.where(np.logical_or(f<self.lower,f>self.upper),self.f(f*0.),f)
def __str__(self): def __str__(self):
return '({},{})'.format(self.lower, self.upper) return '({},{})'.format(self.lower, self.upper)

View file

@ -63,7 +63,7 @@ def GPLVM_oil_100(optimize=True):
m.plot_latent(labels=m.data_labels) m.plot_latent(labels=m.data_labels)
return m return m
def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300, plot=False): def BGPLVM_oil(optimize=True, N=100, Q=10, M=20, max_f_eval=300, plot=False):
data = GPy.util.datasets.oil() data = GPy.util.datasets.oil()
# create simple GP model # create simple GP model
@ -72,19 +72,19 @@ def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300, plot=False):
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=kernel, M=M) m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=kernel, M=M)
m.data_labels = data['Y'][:N].argmax(axis=1) m.data_labels = data['Y'][:N].argmax(axis=1)
m.constrain('variance', logexp_clipped())
m.constrain('length', logexp_clipped())
m['lengt'] = 100.
m.ensure_default_constraints()
# optimize # optimize
if optimize: if optimize:
m.constrain_fixed('noise', 1. / Y.var()) m.unconstrain('noise'); m.constrain_fixed('noise', Y.var() / 100.)
m.constrain('variance', logexp_clipped()) m.optimize('scg', messages=1, max_f_eval=150)
m['lengt'] = 1000
m.ensure_default_constraints()
m.optimize('scg', messages=1, max_f_eval=max(80, max_f_eval))
m.unconstrain('noise') m.unconstrain('noise')
m.constrain_positive('noise') m.constrain('noise', logexp_clipped())
m.optimize('scg', messages=1, max_f_eval=max(0, max_f_eval - 80)) m.optimize('scg', messages=1, max_f_eval=max_f_eval)
else:
m.ensure_default_constraints()
if plot: if plot:
y = m.likelihood.Y[0, :] y = m.likelihood.Y[0, :]
@ -92,7 +92,7 @@ def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300, plot=False):
plt.sca(latent_axes) plt.sca(latent_axes)
m.plot_latent() m.plot_latent()
data_show = GPy.util.visualize.vector_show(y) data_show = GPy.util.visualize.vector_show(y)
lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], m, data_show, latent_axes=latent_axes) # , sense_axes=sense_axes) lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], m, data_show, latent_axes=latent_axes) # , sense_axes=sense_axes)
raw_input('Press enter to finish') raw_input('Press enter to finish')
plt.close('all') plt.close('all')
# # plot # # plot
@ -182,7 +182,7 @@ def bgplvm_simulation_matlab_compare():
Y = sim_data['Y'] Y = sim_data['Y']
S = sim_data['S'] S = sim_data['S']
mu = sim_data['mu'] mu = sim_data['mu']
M, [_, Q] = 20, mu.shape M, [_, Q] = 3, mu.shape
from GPy.models import mrd from GPy.models import mrd
from GPy import kern from GPy import kern
@ -192,7 +192,7 @@ def bgplvm_simulation_matlab_compare():
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k,
# X=mu, # X=mu,
# X_variance=S, # X_variance=S,
_debug=True) _debug=False)
m.ensure_default_constraints() m.ensure_default_constraints()
m.auto_scale_factor = True m.auto_scale_factor = True
m['noise'] = Y.var() / 100. m['noise'] = Y.var() / 100.
@ -223,7 +223,7 @@ def bgplvm_simulation(burnin='scg', plot_sim=False,
Y = Ylist[0] Y = Ylist[0]
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q) k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q)
# k = kern.white(Q, .00001) + kern.bias(Q) # k = kern.white(Q, .00001) + kern.bias(Q)
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True) m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True)
# m.set('noise',) # m.set('noise',)
@ -358,7 +358,7 @@ def mrd_simulation(plot_sim=False):
# import ipdb; ipdb.set_trace() # import ipdb; ipdb.set_trace()
# np.seterrcall(ipdbonerr) # np.seterrcall(ipdbonerr)
return m # , mtest return m # , mtest
def mrd_silhouette(): def mrd_silhouette():
@ -371,7 +371,7 @@ def brendan_faces():
# optimize # optimize
m.ensure_default_constraints() m.ensure_default_constraints()
# m.optimize(messages=1, max_f_eval=10000) m.optimize(messages=1, max_f_eval=10000)
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0, :] y = m.likelihood.Y[0, :]

View file

@ -49,6 +49,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
function_eval = 1 function_eval = 1
fnow = fold fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient. gradnew = gradf(x, *optargs) # Initial gradient.
current_grad = np.dot(gradnew, gradnew)
gradold = gradnew.copy() gradold = gradnew.copy()
d = -gradnew # Initial search direction. d = -gradnew # Initial search direction.
success = True # Force calculation of directional derivs. success = True # Force calculation of directional derivs.
@ -110,7 +111,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
iteration += 1 iteration += 1
if display: if display:
print '\r', print '\r',
print 'i: {0:>5g} f:{1:> 12e} b:{2:> 12e} |g|:{3:> 12e}'.format(iteration, fnow, beta, np.dot(gradnew, gradnew)), print 'i: {0:>5g} f:{1:> 12e} b:{2:> 12e} |g|:{3:> 12e}'.format(iteration, fnow, beta, current_grad),
# print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r', # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush() sys.stdout.flush()
@ -125,8 +126,9 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
fold = fnew fold = fnew
gradold = gradnew gradold = gradnew
gradnew = gradf(x, *optargs) gradnew = gradf(x, *optargs)
current_grad = np.dot(gradnew, gradnew)
# If the gradient is zero then we are done. # If the gradient is zero then we are done.
if (np.dot(gradnew, gradnew)) <= gtol: if current_grad <= gtol:
status = 'converged' status = 'converged'
return x, flog, function_eval, status return x, flog, function_eval, status

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
try: try:
from constructors import rbf_sympy, sympykern # these depend on sympy from constructors import rbf_sympy, sympykern # these depend on sympy
except: except:

View file

@ -20,7 +20,6 @@ from periodic_exponential import periodic_exponential as periodic_exponentialpar
from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part
from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part
from prod import prod as prodpart from prod import prod as prodpart
from prod_orthogonal import prod_orthogonal as prod_orthogonalpart
from symmetric import symmetric as symmetric_part from symmetric import symmetric as symmetric_part
from coregionalise import coregionalise as coregionalise_part from coregionalise import coregionalise as coregionalise_part
from rational_quadratic import rational_quadratic as rational_quadraticpart from rational_quadratic import rational_quadratic as rational_quadraticpart
@ -255,7 +254,7 @@ def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,
part = periodic_Matern52part(D,variance, lengthscale, period, n_freq, lower, upper) part = periodic_Matern52part(D,variance, lengthscale, period, n_freq, lower, upper)
return kern(D, [part]) return kern(D, [part])
def prod(k1,k2): def prod(k1,k2,tensor=False):
""" """
Construct a product kernel over D from two kernels over D Construct a product kernel over D from two kernels over D
@ -263,19 +262,8 @@ def prod(k1,k2):
:type k1, k2: kernpart :type k1, k2: kernpart
:rtype: kernel object :rtype: kernel object
""" """
part = prodpart(k1,k2) part = prodpart(k1,k2,tensor)
return kern(k1.D, [part]) return kern(part.D, [part])
def prod_orthogonal(k1,k2):
"""
Construct a product kernel over D1 x D2 from a kernel over D1 and another over D2.
:param k1, k2: the kernels to multiply
:type k1, k2: kernpart
:rtype: kernel object
"""
part = prod_orthogonalpart(k1,k2)
return kern(k1.D+k2.D, [part])
def symmetric(k): def symmetric(k):
""" """

View file

@ -7,7 +7,6 @@ import pylab as pb
from ..core.parameterised import parameterised from ..core.parameterised import parameterised
from kernpart import kernpart from kernpart import kernpart
import itertools import itertools
from prod_orthogonal import prod_orthogonal
from prod import prod from prod import prod
from ..util.linalg import symmetrify from ..util.linalg import symmetrify
@ -84,96 +83,72 @@ class kern(parameterised):
count += p.Nparam count += p.Nparam
def __add__(self, other): def __add__(self, other):
assert self.D == other.D """
newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices) Shortcut for `add`.
# transfer constraints: """
newkern.constrained_indices = self.constrained_indices + [i+self.Nparam for i in other.constrained_indices] return self.add(other)
newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
return newkern
def add(self, other): def add(self, other,tensor=False):
""" """
Add another kernel to this one. Both kernels are defined on the same _space_ Add another kernel to this one. Both kernels are defined on the same _space_
:param other: the other kernel to be added :param other: the other kernel to be added
:type other: GPy.kern :type other: GPy.kern
""" """
return self + other if tensor:
D = self.D + other.D
self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices]
def add_orthogonal(self, other): newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
"""
Add another kernel to this one. Both kernels are defined on separate spaces
:param other: the other kernel to be added
:type other: GPy.kern
"""
# deal with input slices
D = self.D + other.D
self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices]
newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices) # transfer constraints:
newkern.constrained_indices = self.constrained_indices + [x+self.Nparam for x in other.constrained_indices]
# transfer constraints: newkern.constraints = self.constraints + other.constraints
newkern.constrained_indices = self.constrained_indices + [x+self.Nparam for x in other.constrained_indices] newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
newkern.constraints = self.constraints + other.constraints newkern.fixed_values = self.fixed_values + other.fixed_values
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices] newkern.constraints = self.constraints + other.constraints
newkern.fixed_values = self.fixed_values + other.fixed_values newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
newkern.constraints = self.constraints + other.constraints else:
newkern.constrained_bounded_uppers = self.constrained_bounded_uppers + other.constrained_bounded_uppers assert self.D == other.D
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices] newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices)
# transfer constraints:
newkern.constrained_indices = self.constrained_indices + [i+self.Nparam for i in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
return newkern return newkern
def __mul__(self, other): def __mul__(self, other):
""" """
Shortcut for `prod_orthogonal`. Note that `+` assumes that we sum 2 kernels defines on the same space whereas `*` assumes that the kernels are defined on different subspaces. Shortcut for `prod`.
""" """
return self.prod(other) return self.prod(other)
def prod(self, other): def prod(self, other,tensor=False):
""" """
multiply two kernels defined on the same spaces. multiply two kernels (either on the same space, or on the tensor product of the input space)
:param other: the other kernel to be added :param other: the other kernel to be added
:type other: GPy.kern :type other: GPy.kern
""" """
K1 = self.copy() K1 = self.copy()
K2 = other.copy() K2 = other.copy()
newkernparts = [prod(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)]
slices = [] slices = []
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices): for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
s1, s2 = [False] * K1.D, [False] * K2.D s1, s2 = [False]*K1.D, [False]*K2.D
s1[sl1], s2[sl2] = [True], [True] s1[sl1], s2[sl2] = [True], [True]
slices += [s1 + s2] slices += [s1+s2]
newkernparts = [prod(k1, k2,tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)]
if tensor:
newkern = kern(K1.D + K2.D, newkernparts, slices)
else:
newkern = kern(K1.D, newkernparts, slices)
newkern = kern(K1.D, newkernparts, slices)
newkern._follow_constrains(K1, K2) newkern._follow_constrains(K1, K2)
return newkern
def prod_orthogonal(self, other):
"""
multiply two kernels. Both kernels are defined on separate spaces.
:param other: the other kernel to be added
:type other: GPy.kern
"""
K1 = self.copy()
K2 = other.copy()
newkernparts = [prod_orthogonal(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)]
slices = []
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
s1, s2 = [False] * K1.D, [False] * K2.D
s1[sl1], s2[sl2] = [True], [True]
slices += [s1 + s2]
newkern = kern(K1.D + K2.D, newkernparts, slices)
newkern._follow_constrains(K1, K2)
return newkern return newkern
def _follow_constrains(self, K1, K2): def _follow_constrains(self, K1, K2):
@ -469,9 +444,9 @@ class kern(parameterised):
return target_mu, target_S return target_mu, target_S
def plot(self, x=None, plot_limits=None, which_functions='all', resolution=None, *args, **kwargs): def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs):
if which_functions == 'all': if which_parts == 'all':
which_functions = [True] * self.Nparts which_parts = [True] * self.Nparts
if self.D == 1: if self.D == 1:
if x is None: if x is None:
x = np.zeros((1, 1)) x = np.zeros((1, 1))
@ -488,7 +463,7 @@ class kern(parameterised):
raise ValueError, "Bad limits for plotting" raise ValueError, "Bad limits for plotting"
Xnew = np.linspace(xmin, xmax, resolution or 201)[:, None] Xnew = np.linspace(xmin, xmax, resolution or 201)[:, None]
Kx = self.K(Xnew, x, slices2=which_functions) Kx = self.K(Xnew, x, which_parts)
pb.plot(Xnew, Kx, *args, **kwargs) pb.plot(Xnew, Kx, *args, **kwargs)
pb.xlim(xmin, xmax) pb.xlim(xmin, xmax)
pb.xlabel("x") pb.xlabel("x")
@ -514,7 +489,7 @@ class kern(parameterised):
xg = np.linspace(xmin[0], xmax[0], resolution) xg = np.linspace(xmin[0], xmax[0], resolution)
yg = np.linspace(xmin[1], xmax[1], resolution) yg = np.linspace(xmin[1], xmax[1], resolution)
Xnew = np.vstack((xx.flatten(), yy.flatten())).T Xnew = np.vstack((xx.flatten(), yy.flatten())).T
Kx = self.K(Xnew, x, slices2=which_functions) Kx = self.K(Xnew, x, which_parts)
Kx = Kx.reshape(resolution, resolution).T Kx = Kx.reshape(resolution, resolution).T
pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs) pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs)
pb.xlim(xmin[0], xmax[0]) pb.xlim(xmin[0], xmax[0])

View file

@ -4,108 +4,108 @@
from kernpart import kernpart from kernpart import kernpart
import numpy as np import numpy as np
import hashlib import hashlib
#from scipy import integrate # This may not be necessary (Nicolas, 20th Feb)
class prod(kernpart): class prod(kernpart):
""" """
Computes the product of 2 kernels that are defined on the same space Computes the product of 2 kernels
:param k1, k2: the kernels to multiply :param k1, k2: the kernels to multiply
:type k1, k2: kernpart :type k1, k2: kernpart
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
:type tensor: Boolean
:rtype: kernel object :rtype: kernel object
""" """
def __init__(self,k1,k2): def __init__(self,k1,k2,tensor=False):
assert k1.D == k2.D, "Error: The input spaces of the kernels to multiply must have the same dimension"
self.D = k1.D
self.Nparam = k1.Nparam + k2.Nparam self.Nparam = k1.Nparam + k2.Nparam
self.name = k1.name + '<times>' + k2.name self.name = k1.name + '<times>' + k2.name
self.k1 = k1 self.k1 = k1
self.k2 = k2 self.k2 = k2
if tensor:
self.D = k1.D + k2.D
self.slice1 = slice(0,self.k1.D)
self.slice2 = slice(self.k1.D,self.k1.D+self.k2.D)
else:
assert k1.D == k2.D, "Error: The input spaces of the kernels to sum don't have the same dimension."
self.D = k1.D
self.slice1 = slice(0,self.D)
self.slice2 = slice(0,self.D)
self._X, self._X2, self._params = np.empty(shape=(3,1))
self._set_params(np.hstack((k1._get_params(),k2._get_params()))) self._set_params(np.hstack((k1._get_params(),k2._get_params())))
def _get_params(self): def _get_params(self):
"""return the value of the parameters.""" """return the value of the parameters."""
return self.params return np.hstack((self.k1._get_params(), self.k2._get_params()))
def _set_params(self,x): def _set_params(self,x):
"""set the value of the parameters.""" """set the value of the parameters."""
self.k1._set_params(x[:self.k1.Nparam]) self.k1._set_params(x[:self.k1.Nparam])
self.k2._set_params(x[self.k1.Nparam:]) self.k2._set_params(x[self.k1.Nparam:])
self.params = x
def _get_param_names(self): def _get_param_names(self):
"""return parameter names.""" """return parameter names."""
return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()] return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()]
def K(self,X,X2,target): def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2.""" self._K_computations(X,X2)
if X2 is None: target += self._K1 * self._K2
target1 = np.zeros((X.shape[0],X2.shape[0]))
target2 = np.zeros((X.shape[0],X2.shape[0]))
else:
target1 = np.zeros((X.shape[0],X.shape[0]))
target2 = np.zeros((X.shape[0],X.shape[0]))
self.k1.K(X,X2,target1)
self.k2.K(X,X2,target2)
target += target1 * target2
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
target1 = np.zeros((X.shape[0],))
target2 = np.zeros((X.shape[0],))
self.k1.Kdiag(X,target1)
self.k2.Kdiag(X,target2)
target += target1 * target2
def dK_dtheta(self,dL_dK,X,X2,target): def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X self._K_computations(X,X2)
K1 = np.zeros((X.shape[0],X2.shape[0])) if X2 is None:
K2 = np.zeros((X.shape[0],X2.shape[0])) self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.Nparam])
self.k1.K(X,X2,K1) self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.Nparam:])
self.k2.K(X,X2,K2) else:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.Nparam])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.Nparam:])
k1_target = np.zeros(self.k1.Nparam) def Kdiag(self,X,target):
k2_target = np.zeros(self.k2.Nparam) """Compute the diagonal of the covariance matrix associated to X."""
self.k1.dK_dtheta(dL_dK*K2, X, X2, k1_target) target1 = np.zeros(X.shape[0])
self.k2.dK_dtheta(dL_dK*K1, X, X2, k2_target) target2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],target1)
self.k2.Kdiag(X[:,self.slice2],target2)
target += target1 * target2
target[:self.k1.Nparam] += k1_target def dKdiag_dtheta(self,dL_dKdiag,X,target):
target[self.k1.Nparam:] += k2_target K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2)
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.Nparam])
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.Nparam:])
def dK_dX(self,dL_dK,X,X2,target): def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X self._K_computations(X,X2)
K1 = np.zeros((X.shape[0],X2.shape[0])) self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target)
K2 = np.zeros((X.shape[0],X2.shape[0])) self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target)
self.k1.K(X,X2,K1)
self.k2.K(X,X2,K2)
self.k1.dK_dX(dL_dK*K2, X, X2, target) def dKdiag_dX(self, dL_dKdiag, X, target):
self.k2.dK_dX(dL_dK*K1, X, X2, target) K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2)
def dKdiag_dX(self,dL_dKdiag,X,target): self.k1.dK_dX(dL_dKdiag*K2, X[:,self.slice1], target)
target1 = np.zeros((X.shape[0],)) self.k2.dK_dX(dL_dKdiag*K1, X[:,self.slice2], target)
target2 = np.zeros((X.shape[0],))
self.k1.Kdiag(X,target1)
self.k2.Kdiag(X,target2)
self.k1.dKdiag_dX(dL_dKdiag*target2, X, target) def _K_computations(self,X,X2):
self.k2.dKdiag_dX(dL_dKdiag*target1, X, target) if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
self._X = X.copy()
def dKdiag_dtheta(self,dL_dKdiag,X,target): self._params == self._get_params().copy()
"""Compute the diagonal of the covariance matrix associated to X.""" if X2 is None:
target1 = np.zeros((X.shape[0],)) self._X2 = None
target2 = np.zeros((X.shape[0],)) self._K1 = np.zeros((X.shape[0],X.shape[0]))
self.k1.Kdiag(X,target1) self._K2 = np.zeros((X.shape[0],X.shape[0]))
self.k2.Kdiag(X,target2) self.k1.K(X[:,self.slice1],None,self._K1)
self.k2.K(X[:,self.slice2],None,self._K2)
k1_target = np.zeros(self.k1.Nparam) else:
k2_target = np.zeros(self.k2.Nparam) self._X2 = X2.copy()
self.k1.dKdiag_dtheta(dL_dKdiag*target2, X, k1_target) self._K1 = np.zeros((X.shape[0],X2.shape[0]))
self.k2.dKdiag_dtheta(dL_dKdiag*target1, X, k2_target) self._K2 = np.zeros((X.shape[0],X2.shape[0]))
self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1)
target[:self.k1.Nparam] += k1_target self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2)
target[self.k1.Nparam:] += k2_target

View file

@ -136,14 +136,16 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
self._savedparams.append([self.f_call, self._get_params()]) self._savedparams.append([self.f_call, self._get_params()])
self._savedgradients.append([self.f_call, self._log_likelihood_gradients()]) self._savedgradients.append([self.f_call, self._log_likelihood_gradients()])
self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]]) self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]])
sf2 = self.scale_factor ** 2 # sf2 = self.scale_factor ** 2
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y) A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y)
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2) # B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
else: else:
A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2) # B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5 * self.M * np.log(sf2)) B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
self._savedABCD.append([self.f_call, A, B, C, D]) self._savedABCD.append([self.f_call, A, B, C, D])

View file

@ -273,7 +273,7 @@ class MRD(model):
def _handle_plotting(self, fig_num, axes, plotf): def _handle_plotting(self, fig_num, axes, plotf):
if axes is None: if axes is None:
fig = pylab.figure(num=fig_num, figsize=(4 * len(self.bgplvms), 3 * len(self.bgplvms))) fig = pylab.figure(num=fig_num, figsize=(4 * len(self.bgplvms), 3))
for i, g in enumerate(self.bgplvms): for i, g in enumerate(self.bgplvms):
if axes is None: if axes is None:
ax = fig.add_subplot(1, len(self.bgplvms), i + 1) ax = fig.add_subplot(1, len(self.bgplvms), i + 1)

View file

@ -104,7 +104,7 @@ class sparse_GP(GP):
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1) tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp) self.A = tdot(tmp)
else: else:
# tmp = self.psi1 * (np.sqrt(self.likelihood.precision) / sf) # tmp = self.psi1 * (np.sqrt(self.likelihood.precision) / sf)
tmp = self.psi1 * (np.sqrt(self.likelihood.precision)) tmp = self.psi1 * (np.sqrt(self.likelihood.precision))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1) tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp) self.A = tdot(tmp)
@ -163,7 +163,7 @@ class sparse_GP(GP):
else: else:
# likelihood is not heterscedatic # likelihood is not heterscedatic
self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2 self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
# self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision * sf2) # self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision * sf2)
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision) self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V))) self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
@ -177,7 +177,7 @@ class sparse_GP(GP):
# B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2) # B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A)) B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
else: else:
A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT A = -0.5 * self.N * self.D * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
# B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2) # B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A)) B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
# C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5 * self.M * np.log(sf2)) # C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5 * self.M * np.log(sf2))

Binary file not shown.

Before

Width:  |  Height:  |  Size: 37 KiB

After

Width:  |  Height:  |  Size: 75 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 78 KiB

After

Width:  |  Height:  |  Size: 130 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 61 KiB

After

Width:  |  Height:  |  Size: 62 KiB

Before After
Before After

View file

@ -55,18 +55,18 @@ In ``GPy``, kernel objects can be added or multiplied. In both cases, two kinds
* a kernel over :math:`\mathbb{R} \times \mathbb{R}`: :math:`k(x,y) = k_1(x,y) \times k_2(x,y)` * a kernel over :math:`\mathbb{R} \times \mathbb{R}`: :math:`k(x,y) = k_1(x,y) \times k_2(x,y)`
* a kernel over :math:`\mathbb{R}^2 \times \mathbb{R}^2`: :math:`k(\mathbf{x},\mathbf{y}) = k_1(x_1,y_1) \times k_2(x_2,y_2)` * a kernel over :math:`\mathbb{R}^2 \times \mathbb{R}^2`: :math:`k(\mathbf{x},\mathbf{y}) = k_1(x_1,y_1) \times k_2(x_2,y_2)`
These two options are available in GPy under the name ``prod`` and ``prod_orthogonal`` (resp. ``add`` and ``add_orthogonal`` for the addition). Here is a quick example :: These two options are available in GPy using the flag ``tensor`` in the ``add`` and ``prod`` functions. Here is a quick example ::
k1 = GPy.kern.rbf(1,1.,2.) k1 = GPy.kern.rbf(1,1.,2.)
k2 = GPy.kern.Matern32(1, 0.5, 0.2) k2 = GPy.kern.Matern32(1, 0.5, 0.2)
# Product of kernels # Product of kernels
k_prod = k1.prod(k2) k_prod = k1.prod(k2) # By default, tensor=False
k_prodorth = k1.prod_orthogonal(k2) k_prodtens = k1.prod(k2,tensor=True)
# Sum of kernels # Sum of kernels
k_add = k1.add(k2) k_add = k1.add(k2) # By default, tensor=False
k_addorth = k1.add_orthogonal(k2) k_addtens = k1.add(k2,tensor=True)
.. # plots .. # plots
pb.figure(figsize=(8,8)) pb.figure(figsize=(8,8))
@ -74,21 +74,21 @@ These two options are available in GPy under the name ``prod`` and ``prod_orthog
k_prod.plot() k_prod.plot()
pb.title('prod') pb.title('prod')
pb.subplot(2,2,2) pb.subplot(2,2,2)
k_prodorth.plot() k_prodtens.plot()
pb.title('prod_orthogonal') pb.title('tensor prod')
pb.subplot(2,2,3) pb.subplot(2,2,3)
k_add.plot() k_add.plot()
pb.title('add') pb.title('sum')
pb.subplot(2,2,4) pb.subplot(2,2,4)
k_addorth.plot() k_addtens.plot()
pb.title('add_orthogonal') pb.title('tensor sum')
pb.subplots_adjust(wspace=0.3, hspace=0.3) pb.subplots_adjust(wspace=0.3, hspace=0.3)
.. figure:: Figures/tuto_kern_overview_multadd.png .. figure:: Figures/tuto_kern_overview_multadd.png
:align: center :align: center
:height: 500px :height: 500px
A shortcut for ``add`` and ``prod`` is provided by the usual ``+`` and ``*`` operators. Here is another example where we create a periodic kernel with some decay :: A shortcut for ``add`` and ``prod`` (with default flag ``tensor=False``) is provided by the usual ``+`` and ``*`` operators. Here is another example where we create a periodic kernel with some decay ::
k1 = GPy.kern.rbf(1,1.,2) k1 = GPy.kern.rbf(1,1.,2)
k2 = GPy.kern.periodic_Matern52(1,variance=1e3, lengthscale=1, period = 1.5, lower=-5., upper = 5) k2 = GPy.kern.periodic_Matern52(1,variance=1e3, lengthscale=1, period = 1.5, lower=-5., upper = 5)
@ -113,7 +113,7 @@ A shortcut for ``add`` and ``prod`` is provided by the usual ``+`` and ``*`` ope
:align: center :align: center
:height: 300px :height: 300px
In general, ``kern`` objects can be seen as a sum of ``kernparts`` objects, where the later are covariance functions denied on the same space. For example, the following code :: In general, ``kern`` objects can be seen as a sum of ``kernparts`` objects, where the later are covariance functions defined on the same space. For example, the following code ::
k = (k1+k2)*(k1+k2) k = (k1+k2)*(k1+k2)
print k.parts[0].name, '\n', k.parts[1].name, '\n', k.parts[2].name, '\n', k.parts[3].name print k.parts[0].name, '\n', k.parts[1].name, '\n', k.parts[2].name, '\n', k.parts[3].name
@ -184,7 +184,7 @@ Let us assume that we want to define an ANOVA kernel with a Matern 3/2 kernel fo
k_cst = GPy.kern.bias(1,variance=1.) k_cst = GPy.kern.bias(1,variance=1.)
k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3) k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3)
Kanova = (k_cst + k_mat).prod_orthogonal(k_cst + k_mat) Kanova = (k_cst + k_mat).prod(k_cst + k_mat,tensor=True)
print Kanova print Kanova
Printing the resulting kernel outputs the following :: Printing the resulting kernel outputs the following ::
@ -236,14 +236,14 @@ The submodels can be represented with the option ``which_function`` of ``plot``:
pb.subplot(1,5,2) pb.subplot(1,5,2)
pb.ylabel("= ",rotation='horizontal',fontsize='30') pb.ylabel("= ",rotation='horizontal',fontsize='30')
pb.subplot(1,5,3) pb.subplot(1,5,3)
m.plot(which_functions=[False,True,False,False]) m.plot(which_parts=[False,True,False,False])
pb.ylabel("cst +",rotation='horizontal',fontsize='30') pb.ylabel("cst +",rotation='horizontal',fontsize='30')
pb.subplot(1,5,4) pb.subplot(1,5,4)
m.plot(which_functions=[False,False,True,False]) m.plot(which_parts=[False,False,True,False])
pb.ylabel("+ ",rotation='horizontal',fontsize='30') pb.ylabel("+ ",rotation='horizontal',fontsize='30')
pb.subplot(1,5,5) pb.subplot(1,5,5)
pb.ylabel("+ ",rotation='horizontal',fontsize='30') pb.ylabel("+ ",rotation='horizontal',fontsize='30')
m.plot(which_functions=[False,False,False,True]) m.plot(which_parts=[False,False,False,True])
.. pb.savefig('tuto_kern_overview_mANOVAdec.png',bbox_inches='tight') .. pb.savefig('tuto_kern_overview_mANOVAdec.png',bbox_inches='tight')
@ -252,7 +252,8 @@ The submodels can be represented with the option ``which_function`` of ``plot``:
:height: 250px :height: 250px
.. import pylab as pb .. # code
import pylab as pb
import numpy as np import numpy as np
import GPy import GPy
pb.ion() pb.ion()