added BGPLVM in parameterized

This commit is contained in:
Max Zwiessele 2013-11-06 15:15:13 +00:00
parent 3316d29341
commit 8c02e4af36
6 changed files with 103 additions and 72 deletions

View file

@ -13,6 +13,9 @@ from domains import _POSITIVE, _REAL
from numpy.linalg.linalg import LinAlgError from numpy.linalg.linalg import LinAlgError
from index_operations import ParameterIndexOperations from index_operations import ParameterIndexOperations
import itertools import itertools
from GPy.kern.parts.rbf import RBF
from GPy.kern.parts.rbf_inv import RBFInv
from GPy.kern.parts.linear import Linear
# import numdifftools as ndt # import numdifftools as ndt
class Model(Parameterized): class Model(Parameterized):
@ -273,7 +276,7 @@ class Model(Parameterized):
these terms are present in the name the parameter is these terms are present in the name the parameter is
constrained positive. constrained positive.
""" """
positive_strings = ['variance', 'lengthscale', 'precision', 'kappa'] positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity']
# param_names = self._get_param_names() # param_names = self._get_param_names()
for s in positive_strings: for s in positive_strings:
paramlist = self.grep_param_names(".*"+s) paramlist = self.grep_param_names(".*"+s)
@ -308,7 +311,7 @@ class Model(Parameterized):
raise e raise e
self._fail_count += 1 self._fail_count += 1
return np.inf return np.inf
return -self.log_likelihood() - self.log_prior() return -float(self.log_likelihood()) - self.log_prior()
def objective_function_gradients(self, x): def objective_function_gradients(self, x):
""" """
@ -329,6 +332,7 @@ class Model(Parameterized):
if self._fail_count >= self._allowed_failures: if self._fail_count >= self._allowed_failures:
raise e raise e
self._fail_count += 1 self._fail_count += 1
import ipdb;ipdb.set_trace()
obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100) obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100)
return obj_grads return obj_grads
@ -342,7 +346,7 @@ class Model(Parameterized):
try: try:
self._set_params_transformed(x) self._set_params_transformed(x)
obj_f = -self.log_likelihood() - self.log_prior() obj_f = -float(self.log_likelihood()) - self.log_prior()
self._fail_count = 0 self._fail_count = 0
obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()) obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
except (LinAlgError, ZeroDivisionError, ValueError) as e: except (LinAlgError, ZeroDivisionError, ValueError) as e:
@ -544,16 +548,16 @@ class Model(Parameterized):
if not hasattr(self, 'kern'): if not hasattr(self, 'kern'):
raise ValueError, "this model has no kernel" raise ValueError, "this model has no kernel"
k = [p for p in self.kern._parameters_ if p.name in ['rbf', 'linear', 'rbf_inv']] k = [p for p in self.kern._parameters_ if hasattr(p, "ARD") and p.ARD]
if (not len(k) == 1) or (not k[0].ARD): if (not len(k) == 1):
raise ValueError, "cannot determine sensitivity for this kernel" raise ValueError, "cannot determine sensitivity for this kernel"
k = k[0] k = k[0]
if k.name == 'rbf': if isinstance(k, RBF):
return 1. / k.lengthscale return 1. / k.lengthscale
elif k.name == 'rbf_inv': elif isinstance(k, RBFInv):
return k.inv_lengthscale return k.inv_lengthscale
elif k.name == 'linear': elif isinstance(k, Linear):
return k.variances return k.variances

View file

@ -202,18 +202,19 @@ class Parameterized(Nameable, Pickleable, Observable):
""" """
[self.add_parameter(p) for p in parameters] [self.add_parameter(p) for p in parameters]
# def remove_parameter(self, *names_params_indices): def remove_parameter(self, *names_params_indices):
# """ """
# :param names_params_indices: mix of parameter_names, parameter objects, or indices :param names_params_indices: mix of parameter_names, parameter objects, or indices
# to remove from being a parameter of this parameterized object. to remove from being a parameter of this parameterized object.
#
# note: if it is a string object it will be regexp-matched automatically note: if it is a string object it will be regexp-matched automatically
# """ """
# self._parameters_ = [p for p in self._parameters_ self._parameters_ = [p for p in self._parameters_
# if not (p._parent_index_ in names_params_indices if not (p._parent_index_ in names_params_indices
# or p.name in names_params_indices or p.name in names_params_indices
# or p in names_params_indices)] or p in names_params_indices)]
# self._connect_parameters() self._connect_parameters()
def parameters_changed(self): def parameters_changed(self):
""" """
This method gets called when parameters have changed. This method gets called when parameters have changed.
@ -471,7 +472,7 @@ class Parameterized(Nameable, Pickleable, Observable):
return self._parameters_[param._parent_index_] return self._parameters_[param._parent_index_]
def hirarchy_name(self): def hirarchy_name(self):
if self.has_parent(): if self.has_parent():
return self._direct_parent_.hirarchy_name() + self.name + "." return self._direct_parent_.hirarchy_name() + _adjust_name_for_printing(self.name) + "."
return '' return ''
#=========================================================================== #===========================================================================
# Constraint Handling: # Constraint Handling:
@ -534,7 +535,12 @@ class Parameterized(Nameable, Pickleable, Observable):
if paramlist is None: if paramlist is None:
paramlist = self.grep_param_names(name) paramlist = self.grep_param_names(name)
if len(paramlist) < 1: raise AttributeError, name if len(paramlist) < 1: raise AttributeError, name
if len(paramlist) == 1: return paramlist[-1] if len(paramlist) == 1:
if isinstance(paramlist[-1], Parameterized):
paramlist = paramlist[-1].flattened_parameters
if len(paramlist) != 1:
return ParamConcatenation(paramlist)
return paramlist[-1]
return ParamConcatenation(paramlist) return ParamConcatenation(paramlist)
def __setitem__(self, name, value, paramlist=None): def __setitem__(self, name, value, paramlist=None):
try: param = self.__getitem__(name, paramlist) try: param = self.__getitem__(name, paramlist)

View file

@ -25,9 +25,9 @@ def BGPLVM(seed=default_seed):
Y = np.random.multivariate_normal(np.zeros(N), K, D).T Y = np.random.multivariate_normal(np.zeros(N), K, D).T
lik = Gaussian(Y, normalize=True) lik = Gaussian(Y, normalize=True)
k = GPy.kern.rbf_inv(input_dim, .5, np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) # k = GPy.kern.rbf_inv(input_dim, .5, np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
# k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim, ARD=1)
# k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001) # k = GPy.kern.rbf(input_dim, ARD = False)
m = GPy.models.BayesianGPLVM(lik, input_dim, kernel=k, num_inducing=num_inducing) m = GPy.models.BayesianGPLVM(lik, input_dim, kernel=k, num_inducing=num_inducing)
m.lengthscales = lengthscales m.lengthscales = lengthscales
@ -158,15 +158,15 @@ def BGPLVM_oil(optimize=True, N=200, input_dim=7, num_inducing=40, max_iters=100
# m.constrain('variance|leng', LogexpClipped()) # m.constrain('variance|leng', LogexpClipped())
# m['.*lengt'] = m.X.var(0).max() / m.X.var(0) # m['.*lengt'] = m.X.var(0).max() / m.X.var(0)
m['noise'] = Yn.Y.var() / 100. m['gaussian'] = Yn.Y.var() / 100.
# optimize # optimize
if optimize: if optimize:
m.constrain_fixed('noise') m.gaussian.variance.fix() # m.constrain_fixed('noise')
m.optimize('scg', messages=1, max_iters=200, gtol=.05) m.optimize('scg', messages=1, max_iters=200, gtol=.05)
m.constrain_positive('noise') m.gaussian.variance.constrain_positive() # m.constrain_positive('noise')
m.constrain_bounded('white', 1e-7, 1) #m.constrain_bounded('white', 1e-7, 1)
m.optimize('scg', messages=1, max_iters=max_iters, gtol=.05) m.optimize('scg', messages=1, max_iters=max_iters, gtol=.05)
if plot: if plot:
@ -265,7 +265,7 @@ def bgplvm_simulation_matlab_compare():
# X_variance=S, # X_variance=S,
_debug=False) _debug=False)
m.auto_scale_factor = True m.auto_scale_factor = True
m['noise'] = Y.var() / 100. m['gaussian'] = Y.var() / 100.
m['linear_variance'] = .01 m['linear_variance'] = .01
return m return m
@ -287,7 +287,7 @@ def bgplvm_simulation(optimize='scg',
m = BayesianGPLVM(Y, input_dim, init="PCA", num_inducing=num_inducing, kernel=k) m = BayesianGPLVM(Y, input_dim, init="PCA", num_inducing=num_inducing, kernel=k)
# m.constrain('variance|noise', LogexpClipped()) # m.constrain('variance|noise', LogexpClipped())
m['noise'] = Y.var() / 100. m['gaussian'] = Y.var() / 100.
if optimize: if optimize:
print "Optimizing model:" print "Optimizing model:"

View file

@ -56,7 +56,7 @@ class RBF(Kernpart):
self.add_parameters(self.variance, self.lengthscale) self.add_parameters(self.variance, self.lengthscale)
self.parameters_changed() self.parameters_changed()
#self.update_lengthscale(self.lengthscale) #self.update_inv_lengthscale(self.lengthscale)
#self.parameters_changed() #self.parameters_changed()
# initialize cache # initialize cache
#self._Z, self._mu, self._S = np.empty(shape=(3, 1)) #self._Z, self._mu, self._S = np.empty(shape=(3, 1))

View file

@ -7,6 +7,7 @@ import numpy as np
import hashlib import hashlib
from scipy import weave from scipy import weave
from ...util.linalg import tdot from ...util.linalg import tdot
from GPy.core.parameter import Param
class RBFInv(RBF): class RBFInv(RBF):
""" """
@ -33,8 +34,9 @@ class RBFInv(RBF):
""" """
def __init__(self, input_dim, variance=1., inv_lengthscale=None, ARD=False): def __init__(self, input_dim, variance=1., inv_lengthscale=None, ARD=False):
self.input_dim = input_dim #self.input_dim = input_dim
self.name = 'rbf_inv' #self.name = 'rbf_inv'
super(RBFInv, self).__init__(input_dim, variance=variance, lengthscale=1./np.array(inv_lengthscale), ARD=ARD, name='inverse rbf')
self.ARD = ARD self.ARD = ARD
if not ARD: if not ARD:
self.num_params = 2 self.num_params = 2
@ -50,8 +52,13 @@ class RBFInv(RBF):
assert inv_lengthscale.size == self.input_dim, "bad number of lengthscales" assert inv_lengthscale.size == self.input_dim, "bad number of lengthscales"
else: else:
inv_lengthscale = np.ones(self.input_dim) inv_lengthscale = np.ones(self.input_dim)
self._set_params(np.hstack((variance, inv_lengthscale.flatten()))) self.variance = Param('variance', variance)
self.inv_lengthscale = Param('sensitivity', inv_lengthscale)
self.inv_lengthscale.add_observer(self, self.update_inv_lengthscale)
self.remove_parameter(self.lengthscale)
self.add_parameters(self.variance, self.inv_lengthscale)
#self._set_params(np.hstack((variance, inv_lengthscale.flatten())))
# initialize cache # initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) self._Z, self._mu, self._S = np.empty(shape=(3, 1))
@ -64,26 +71,29 @@ class RBFInv(RBF):
def _get_params(self): # def _get_params(self):
return np.hstack((self.variance, self.inv_lengthscale)) # return np.hstack((self.variance, self.inv_lengthscale))
def _set_params(self, x): def update_inv_lengthscale(self, il):
assert x.size == (self.num_params)
self.variance = x[0]
self.inv_lengthscale = x[1:]
self.inv_lengthscale2 = np.square(self.inv_lengthscale) self.inv_lengthscale2 = np.square(self.inv_lengthscale)
# TODO: We can rewrite everything with inv_lengthscale and never need to do the below # TODO: We can rewrite everything with inv_lengthscale and never need to do the below
self.lengthscale = 1. / self.inv_lengthscale self.lengthscale = 1. / self.inv_lengthscale
self.lengthscale2 = np.square(self.lengthscale) self.lengthscale2 = np.square(self.lengthscale)
#def _set_params(self, x):
def parameters_changed(self):
#assert x.size == (self.num_params)
#self.variance = x[0]
#self.inv_lengthscale = x[1:]
# reset cached results # reset cached results
self._X, self._X2, self._params = np.empty(shape=(3, 1)) self._X, self._X2, self._params = np.empty(shape=(3, 1))
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
def _get_param_names(self): # def _get_param_names(self):
if self.num_params == 2: # if self.num_params == 2:
return ['variance', 'inv_lengthscale'] # return ['variance', 'inv_lengthscale']
else: # else:
return ['variance'] + ['inv_lengthscale%i' % i for i in range(self.inv_lengthscale.size)] # return ['variance'] + ['inv_lengthscale%i' % i for i in range(self.inv_lengthscale.size)]
# TODO: Rewrite computations so that lengthscale is not needed (but only inv. lengthscale) # TODO: Rewrite computations so that lengthscale is not needed (but only inv. lengthscale)
def dK_dtheta(self, dL_dK, X, X2, target): def dK_dtheta(self, dL_dK, X, X2, target):

View file

@ -12,6 +12,7 @@ from GPy.util import plot_latent, linalg
from GPy.models.gplvm import GPLVM from GPy.models.gplvm import GPLVM
from GPy.util.plot_latent import most_significant_input_dimensions from GPy.util.plot_latent import most_significant_input_dimensions
from matplotlib import pyplot from matplotlib import pyplot
from GPy.core.variational import Normal
class BayesianGPLVM(SparseGP, GPLVM): class BayesianGPLVM(SparseGP, GPLVM):
""" """
@ -47,6 +48,9 @@ class BayesianGPLVM(SparseGP, GPLVM):
kernel = kern.rbf(input_dim) # + kern.white(input_dim) kernel = kern.rbf(input_dim) # + kern.white(input_dim)
SparseGP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs) SparseGP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs)
self.q = Normal('latent space', self.X, self.X_variance)
self.add_parameter(self.q, gradient=self._dbound_dmuS, index=0)
self.ensure_default_constraints() self.ensure_default_constraints()
def getstate(self): def getstate(self):
@ -61,32 +65,32 @@ class BayesianGPLVM(SparseGP, GPLVM):
self.init = state.pop() self.init = state.pop()
SparseGP.setstate(self, state) SparseGP.setstate(self, state)
def _get_param_names(self): # def _get_param_names(self):
X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) # X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) # S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
return (X_names + S_names + SparseGP._get_param_names(self)) # return (X_names + S_names + SparseGP._get_param_names(self))
#def _get_print_names(self): #def _get_print_names(self):
# return SparseGP._get_print_names(self) # return SparseGP._get_print_names(self)
def _get_params(self): # def _get_params(self):
""" # """
Horizontally stacks the parameters in order to present them to the optimizer. # Horizontally stacks the parameters in order to present them to the optimizer.
The resulting 1-input_dim array has this structure: # The resulting 1-input_dim array has this structure:
#
# ===============================================================
# | mu | S | Z | theta | beta |
# ===============================================================
#
# """
# x = np.hstack((self.X.flatten(), self.X_variance.flatten(), SparseGP._get_params(self)))
# return x
=============================================================== # def _set_params(self, x, save_old=True, save_count=0):
| mu | S | Z | theta | beta | # N, input_dim = self.num_data, self.input_dim
=============================================================== # self.X = x[:self.X.size].reshape(N, input_dim).copy()
# self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy()
""" # SparseGP._set_params(self, x[(2 * N * input_dim):])
x = np.hstack((self.X.flatten(), self.X_variance.flatten(), SparseGP._get_params(self)))
return x
def _set_params(self, x, save_old=True, save_count=0):
N, input_dim = self.num_data, self.input_dim
self.X = x[:self.X.size].reshape(N, input_dim).copy()
self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy()
SparseGP._set_params(self, x[(2 * N * input_dim):])
def dKL_dmuS(self): def dKL_dmuS(self):
dKL_dS = (1. - (1. / (self.X_variance))) * 0.5 dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
@ -112,14 +116,21 @@ class BayesianGPLVM(SparseGP, GPLVM):
kl = self.KL_divergence() kl = self.KL_divergence()
return ll - kl return ll - kl
def _log_likelihood_gradients(self): def _dbound_dmuS(self):
dKL_dmu, dKL_dS = self.dKL_dmuS() dKL_dmu, dKL_dS = self.dKL_dmuS()
dL_dmu, dL_dS = self.dL_dmuS() dL_dmu, dL_dS = self.dL_dmuS()
d_dmu = (dL_dmu - dKL_dmu).flatten() d_dmu = (dL_dmu - dKL_dmu).flatten()
d_dS = (dL_dS - dKL_dS).flatten() d_dS = (dL_dS - dKL_dS).flatten()
self.dbound_dmuS = np.hstack((d_dmu, d_dS)) return np.hstack((d_dmu, d_dS))
self.dbound_dZtheta = SparseGP._log_likelihood_gradients(self)
return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta)) # def _log_likelihood_gradients(self):
# dKL_dmu, dKL_dS = self.dKL_dmuS()
# dL_dmu, dL_dS = self.dL_dmuS()
# d_dmu = (dL_dmu - dKL_dmu).flatten()
# d_dS = (dL_dS - dKL_dS).flatten()
# self.dbound_dmuS = np.hstack((d_dmu, d_dS))
# self.dbound_dZtheta = SparseGP._log_likelihood_gradients(self)
# return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))
def plot_latent(self, plot_inducing=True, *args, **kwargs): def plot_latent(self, plot_inducing=True, *args, **kwargs):
return plot_latent.plot_latent(self, plot_inducing=plot_inducing, *args, **kwargs) return plot_latent.plot_latent(self, plot_inducing=plot_inducing, *args, **kwargs)