diff --git a/GPy/core/model.py b/GPy/core/model.py index 4ff864e2..e8194957 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -13,6 +13,9 @@ from domains import _POSITIVE, _REAL from numpy.linalg.linalg import LinAlgError from index_operations import ParameterIndexOperations import itertools +from GPy.kern.parts.rbf import RBF +from GPy.kern.parts.rbf_inv import RBFInv +from GPy.kern.parts.linear import Linear # import numdifftools as ndt class Model(Parameterized): @@ -273,7 +276,7 @@ class Model(Parameterized): these terms are present in the name the parameter is constrained positive. """ - positive_strings = ['variance', 'lengthscale', 'precision', 'kappa'] + positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity'] # param_names = self._get_param_names() for s in positive_strings: paramlist = self.grep_param_names(".*"+s) @@ -308,7 +311,7 @@ class Model(Parameterized): raise e self._fail_count += 1 return np.inf - return -self.log_likelihood() - self.log_prior() + return -float(self.log_likelihood()) - self.log_prior() def objective_function_gradients(self, x): """ @@ -329,6 +332,7 @@ class Model(Parameterized): if self._fail_count >= self._allowed_failures: raise e self._fail_count += 1 + import ipdb;ipdb.set_trace() obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100) return obj_grads @@ -342,7 +346,7 @@ class Model(Parameterized): try: self._set_params_transformed(x) - obj_f = -self.log_likelihood() - self.log_prior() + obj_f = -float(self.log_likelihood()) - self.log_prior() self._fail_count = 0 obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()) except (LinAlgError, ZeroDivisionError, ValueError) as e: @@ -544,16 +548,16 @@ class Model(Parameterized): if not hasattr(self, 'kern'): raise ValueError, "this model has no kernel" - k = [p for p in self.kern._parameters_ if p.name in ['rbf', 'linear', 'rbf_inv']] - if (not len(k) == 1) or (not k[0].ARD): + k = [p for p in self.kern._parameters_ if hasattr(p, "ARD") and p.ARD] + if (not len(k) == 1): raise ValueError, "cannot determine sensitivity for this kernel" k = k[0] - if k.name == 'rbf': + if isinstance(k, RBF): return 1. / k.lengthscale - elif k.name == 'rbf_inv': + elif isinstance(k, RBFInv): return k.inv_lengthscale - elif k.name == 'linear': + elif isinstance(k, Linear): return k.variances diff --git a/GPy/core/parameterized.py b/GPy/core/parameterized.py index 6ec81914..db367824 100644 --- a/GPy/core/parameterized.py +++ b/GPy/core/parameterized.py @@ -202,18 +202,19 @@ class Parameterized(Nameable, Pickleable, Observable): """ [self.add_parameter(p) for p in parameters] -# def remove_parameter(self, *names_params_indices): -# """ -# :param names_params_indices: mix of parameter_names, parameter objects, or indices -# to remove from being a parameter of this parameterized object. -# -# note: if it is a string object it will be regexp-matched automatically -# """ -# self._parameters_ = [p for p in self._parameters_ -# if not (p._parent_index_ in names_params_indices -# or p.name in names_params_indices -# or p in names_params_indices)] -# self._connect_parameters() + def remove_parameter(self, *names_params_indices): + """ + :param names_params_indices: mix of parameter_names, parameter objects, or indices + to remove from being a parameter of this parameterized object. + + note: if it is a string object it will be regexp-matched automatically + """ + self._parameters_ = [p for p in self._parameters_ + if not (p._parent_index_ in names_params_indices + or p.name in names_params_indices + or p in names_params_indices)] + self._connect_parameters() + def parameters_changed(self): """ This method gets called when parameters have changed. @@ -471,7 +472,7 @@ class Parameterized(Nameable, Pickleable, Observable): return self._parameters_[param._parent_index_] def hirarchy_name(self): if self.has_parent(): - return self._direct_parent_.hirarchy_name() + self.name + "." + return self._direct_parent_.hirarchy_name() + _adjust_name_for_printing(self.name) + "." return '' #=========================================================================== # Constraint Handling: @@ -534,7 +535,12 @@ class Parameterized(Nameable, Pickleable, Observable): if paramlist is None: paramlist = self.grep_param_names(name) if len(paramlist) < 1: raise AttributeError, name - if len(paramlist) == 1: return paramlist[-1] + if len(paramlist) == 1: + if isinstance(paramlist[-1], Parameterized): + paramlist = paramlist[-1].flattened_parameters + if len(paramlist) != 1: + return ParamConcatenation(paramlist) + return paramlist[-1] return ParamConcatenation(paramlist) def __setitem__(self, name, value, paramlist=None): try: param = self.__getitem__(name, paramlist) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index e53b677b..859ae1dc 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -25,9 +25,9 @@ def BGPLVM(seed=default_seed): Y = np.random.multivariate_normal(np.zeros(N), K, D).T lik = Gaussian(Y, normalize=True) - k = GPy.kern.rbf_inv(input_dim, .5, np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) - # k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) - # k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001) +# k = GPy.kern.rbf_inv(input_dim, .5, np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) + k = GPy.kern.rbf(input_dim, ARD=1) +# k = GPy.kern.rbf(input_dim, ARD = False) m = GPy.models.BayesianGPLVM(lik, input_dim, kernel=k, num_inducing=num_inducing) m.lengthscales = lengthscales @@ -158,15 +158,15 @@ def BGPLVM_oil(optimize=True, N=200, input_dim=7, num_inducing=40, max_iters=100 # m.constrain('variance|leng', LogexpClipped()) # m['.*lengt'] = m.X.var(0).max() / m.X.var(0) - m['noise'] = Yn.Y.var() / 100. + m['gaussian'] = Yn.Y.var() / 100. # optimize if optimize: - m.constrain_fixed('noise') + m.gaussian.variance.fix() # m.constrain_fixed('noise') m.optimize('scg', messages=1, max_iters=200, gtol=.05) - m.constrain_positive('noise') - m.constrain_bounded('white', 1e-7, 1) + m.gaussian.variance.constrain_positive() # m.constrain_positive('noise') + #m.constrain_bounded('white', 1e-7, 1) m.optimize('scg', messages=1, max_iters=max_iters, gtol=.05) if plot: @@ -265,7 +265,7 @@ def bgplvm_simulation_matlab_compare(): # X_variance=S, _debug=False) m.auto_scale_factor = True - m['noise'] = Y.var() / 100. + m['gaussian'] = Y.var() / 100. m['linear_variance'] = .01 return m @@ -287,7 +287,7 @@ def bgplvm_simulation(optimize='scg', m = BayesianGPLVM(Y, input_dim, init="PCA", num_inducing=num_inducing, kernel=k) # m.constrain('variance|noise', LogexpClipped()) - m['noise'] = Y.var() / 100. + m['gaussian'] = Y.var() / 100. if optimize: print "Optimizing model:" diff --git a/GPy/kern/parts/rbf.py b/GPy/kern/parts/rbf.py index dd5cacd3..6668b1b5 100644 --- a/GPy/kern/parts/rbf.py +++ b/GPy/kern/parts/rbf.py @@ -56,7 +56,7 @@ class RBF(Kernpart): self.add_parameters(self.variance, self.lengthscale) self.parameters_changed() - #self.update_lengthscale(self.lengthscale) + #self.update_inv_lengthscale(self.lengthscale) #self.parameters_changed() # initialize cache #self._Z, self._mu, self._S = np.empty(shape=(3, 1)) diff --git a/GPy/kern/parts/rbf_inv.py b/GPy/kern/parts/rbf_inv.py index 0433e96c..e4285d33 100644 --- a/GPy/kern/parts/rbf_inv.py +++ b/GPy/kern/parts/rbf_inv.py @@ -7,6 +7,7 @@ import numpy as np import hashlib from scipy import weave from ...util.linalg import tdot +from GPy.core.parameter import Param class RBFInv(RBF): """ @@ -33,8 +34,9 @@ class RBFInv(RBF): """ def __init__(self, input_dim, variance=1., inv_lengthscale=None, ARD=False): - self.input_dim = input_dim - self.name = 'rbf_inv' + #self.input_dim = input_dim + #self.name = 'rbf_inv' + super(RBFInv, self).__init__(input_dim, variance=variance, lengthscale=1./np.array(inv_lengthscale), ARD=ARD, name='inverse rbf') self.ARD = ARD if not ARD: self.num_params = 2 @@ -50,8 +52,13 @@ class RBFInv(RBF): assert inv_lengthscale.size == self.input_dim, "bad number of lengthscales" else: inv_lengthscale = np.ones(self.input_dim) - - self._set_params(np.hstack((variance, inv_lengthscale.flatten()))) + + self.variance = Param('variance', variance) + self.inv_lengthscale = Param('sensitivity', inv_lengthscale) + self.inv_lengthscale.add_observer(self, self.update_inv_lengthscale) + self.remove_parameter(self.lengthscale) + self.add_parameters(self.variance, self.inv_lengthscale) + #self._set_params(np.hstack((variance, inv_lengthscale.flatten()))) # initialize cache self._Z, self._mu, self._S = np.empty(shape=(3, 1)) @@ -64,26 +71,29 @@ class RBFInv(RBF): - def _get_params(self): - return np.hstack((self.variance, self.inv_lengthscale)) +# def _get_params(self): +# return np.hstack((self.variance, self.inv_lengthscale)) - def _set_params(self, x): - assert x.size == (self.num_params) - self.variance = x[0] - self.inv_lengthscale = x[1:] + def update_inv_lengthscale(self, il): self.inv_lengthscale2 = np.square(self.inv_lengthscale) # TODO: We can rewrite everything with inv_lengthscale and never need to do the below self.lengthscale = 1. / self.inv_lengthscale self.lengthscale2 = np.square(self.lengthscale) + + #def _set_params(self, x): + def parameters_changed(self): + #assert x.size == (self.num_params) + #self.variance = x[0] + #self.inv_lengthscale = x[1:] # reset cached results self._X, self._X2, self._params = np.empty(shape=(3, 1)) self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S - def _get_param_names(self): - if self.num_params == 2: - return ['variance', 'inv_lengthscale'] - else: - return ['variance'] + ['inv_lengthscale%i' % i for i in range(self.inv_lengthscale.size)] +# def _get_param_names(self): +# if self.num_params == 2: +# return ['variance', 'inv_lengthscale'] +# else: +# return ['variance'] + ['inv_lengthscale%i' % i for i in range(self.inv_lengthscale.size)] # TODO: Rewrite computations so that lengthscale is not needed (but only inv. lengthscale) def dK_dtheta(self, dL_dK, X, X2, target): diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index d4d29711..9f19393b 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -12,6 +12,7 @@ from GPy.util import plot_latent, linalg from GPy.models.gplvm import GPLVM from GPy.util.plot_latent import most_significant_input_dimensions from matplotlib import pyplot +from GPy.core.variational import Normal class BayesianGPLVM(SparseGP, GPLVM): """ @@ -47,6 +48,9 @@ class BayesianGPLVM(SparseGP, GPLVM): kernel = kern.rbf(input_dim) # + kern.white(input_dim) SparseGP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs) + + self.q = Normal('latent space', self.X, self.X_variance) + self.add_parameter(self.q, gradient=self._dbound_dmuS, index=0) self.ensure_default_constraints() def getstate(self): @@ -61,32 +65,32 @@ class BayesianGPLVM(SparseGP, GPLVM): self.init = state.pop() SparseGP.setstate(self, state) - def _get_param_names(self): - X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) - S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) - return (X_names + S_names + SparseGP._get_param_names(self)) +# def _get_param_names(self): +# X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) +# S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) +# return (X_names + S_names + SparseGP._get_param_names(self)) #def _get_print_names(self): # return SparseGP._get_print_names(self) - def _get_params(self): - """ - Horizontally stacks the parameters in order to present them to the optimizer. - The resulting 1-input_dim array has this structure: +# def _get_params(self): +# """ +# Horizontally stacks the parameters in order to present them to the optimizer. +# The resulting 1-input_dim array has this structure: +# +# =============================================================== +# | mu | S | Z | theta | beta | +# =============================================================== +# +# """ +# x = np.hstack((self.X.flatten(), self.X_variance.flatten(), SparseGP._get_params(self))) +# return x - =============================================================== - | mu | S | Z | theta | beta | - =============================================================== - - """ - x = np.hstack((self.X.flatten(), self.X_variance.flatten(), SparseGP._get_params(self))) - return x - - def _set_params(self, x, save_old=True, save_count=0): - N, input_dim = self.num_data, self.input_dim - self.X = x[:self.X.size].reshape(N, input_dim).copy() - self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy() - SparseGP._set_params(self, x[(2 * N * input_dim):]) +# def _set_params(self, x, save_old=True, save_count=0): +# N, input_dim = self.num_data, self.input_dim +# self.X = x[:self.X.size].reshape(N, input_dim).copy() +# self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy() +# SparseGP._set_params(self, x[(2 * N * input_dim):]) def dKL_dmuS(self): dKL_dS = (1. - (1. / (self.X_variance))) * 0.5 @@ -112,14 +116,21 @@ class BayesianGPLVM(SparseGP, GPLVM): kl = self.KL_divergence() return ll - kl - def _log_likelihood_gradients(self): + def _dbound_dmuS(self): dKL_dmu, dKL_dS = self.dKL_dmuS() dL_dmu, dL_dS = self.dL_dmuS() d_dmu = (dL_dmu - dKL_dmu).flatten() d_dS = (dL_dS - dKL_dS).flatten() - self.dbound_dmuS = np.hstack((d_dmu, d_dS)) - self.dbound_dZtheta = SparseGP._log_likelihood_gradients(self) - return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta)) + return np.hstack((d_dmu, d_dS)) + +# def _log_likelihood_gradients(self): +# dKL_dmu, dKL_dS = self.dKL_dmuS() +# dL_dmu, dL_dS = self.dL_dmuS() +# d_dmu = (dL_dmu - dKL_dmu).flatten() +# d_dS = (dL_dS - dKL_dS).flatten() +# self.dbound_dmuS = np.hstack((d_dmu, d_dS)) +# self.dbound_dZtheta = SparseGP._log_likelihood_gradients(self) +# return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta)) def plot_latent(self, plot_inducing=True, *args, **kwargs): return plot_latent.plot_latent(self, plot_inducing=plot_inducing, *args, **kwargs)