From c4f6b0dbe7e391256a5ae7f729ea649ce48efcf1 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Wed, 19 Feb 2014 15:32:16 +0000 Subject: [PATCH] copy and missing data --- GPy/core/__init__.py | 4 +- GPy/core/model.py | 6 +- GPy/core/parameterization/param.py | 13 ++- GPy/core/parameterization/parameter_core.py | 82 +++++++++++++++++-- GPy/core/parameterization/parameterized.py | 57 ++----------- GPy/core/sparse_gp.py | 17 ++-- GPy/examples/dimensionality_reduction.py | 11 ++- .../latent_function_inference/__init__.py | 16 +++- .../latent_function_inference/var_dtc.py | 54 +++++++----- GPy/models/mrd.py | 16 ++++ 10 files changed, 179 insertions(+), 97 deletions(-) diff --git a/GPy/core/__init__.py b/GPy/core/__init__.py index 839529d6..a42d76ed 100644 --- a/GPy/core/__init__.py +++ b/GPy/core/__init__.py @@ -2,7 +2,9 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) from model import * -from parameterization.parameterized import * +from parameterization.parameterized import adjust_name_for_printing, Parameterizable +from parameterization.param import Param, ParamConcatenation + from gp import GP from sparse_gp import SparseGP from svigp import SVIGP diff --git a/GPy/core/model.py b/GPy/core/model.py index 55083aaf..c067d51d 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -4,12 +4,8 @@ from .. import likelihoods from ..inference import optimization -from ..util.linalg import jitchol from ..util.misc import opt_wrapper from parameterization import Parameterized -from parameterization.parameterized import UNFIXED -from parameterization.domains import _POSITIVE, _REAL -from parameterization.index_operations import ParameterIndexOperations import multiprocessing as mp import numpy as np from numpy.linalg.linalg import LinAlgError @@ -240,7 +236,7 @@ class Model(Parameterized): constrained positive. """ raise DeprecationWarning, 'parameters now have default constraints' - positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity'] + #positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity'] # param_names = self._get_param_names() # for s in positive_strings: diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index f54c0117..49d6682c 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -3,7 +3,7 @@ import itertools import numpy -from parameter_core import Constrainable, Gradcheckable, Indexable, Parameterizable, adjust_name_for_printing +from parameter_core import Constrainable, Gradcheckable, Indexable, Parentable, adjust_name_for_printing from array_core import ObservableArray, ParamList ###### printing @@ -15,7 +15,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision __print_threshold__ = 5 ###### -class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameterizable): +class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parentable): """ Parameter object for GPy models. @@ -114,7 +114,14 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri self._parent_index_ = state.pop() self._direct_parent_ = state.pop() self.name = state.pop() - + + def copy(self, *args): + constr = self.constraints.copy() + priors = self.priors.copy() + p = Param(self.name, self.view(numpy.ndarray).copy(), self._default_constraint_) + p.constraints = constr + p.priors = priors + return p #=========================================================================== # get/set parameters #=========================================================================== diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 275198b2..9002adc3 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -68,6 +68,10 @@ class Parentable(object): return self return self._direct_parent_._highest_parent_ + def _notify_parameters_changed(self): + if self.has_parent(): + self._direct_parent_._notify_parameters_changed() + class Nameable(Parentable): _name = None def __init__(self, name, direct_parent=None, parent_index=None): @@ -80,22 +84,47 @@ class Nameable(Parentable): @name.setter def name(self, name): from_name = self.name + assert isinstance(name, str) self._name = name if self.has_parent(): - self._direct_parent_._name_changed(self, from_name) - + self._direct_parent_._name_changed(self, from_name) class Parameterizable(Parentable): def __init__(self, *args, **kwargs): super(Parameterizable, self).__init__(*args, **kwargs) from GPy.core.parameterization.array_core import ParamList _parameters_ = ParamList() + self._added_names_ = set() def parameter_names(self, add_name=False): if add_name: return [adjust_name_for_printing(self.name) + "." + xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)] return [xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)] + + def _add_parameter_name(self, param): + pname = adjust_name_for_printing(param.name) + # and makes sure to not delete programmatically added parameters + if pname in self.__dict__: + if not (param is self.__dict__[pname]): + if pname in self._added_names_: + del self.__dict__[pname] + self._add_parameter_name(param) + else: + self.__dict__[pname] = param + self._added_names_.add(pname) + + def _remove_parameter_name(self, param=None, pname=None): + assert param is None or pname is None, "can only delete either param by name, or the name of a param" + pname = adjust_name_for_printing(pname) or adjust_name_for_printing(param.name) + if pname in self._added_names_: + del self.__dict__[pname] + self._added_names_.remove(pname) + self._connect_parameters() + def _name_changed(self, param, old_name): + self._remove_parameter_name(None, old_name) + self._add_parameter_name(param) + def _collect_gradient(self, target): import itertools [p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)] @@ -113,6 +142,38 @@ class Parameterizable(Parentable): [p._set_params(params[s], update=update) for p, s in itertools.izip(self._parameters_, self._param_slices_)] self.parameters_changed() + def copy(self): + """Returns a (deep) copy of the current model""" + import copy + from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView + from .array_core import ParamList + dc = dict() + for k, v in self.__dict__.iteritems(): + if k not in ['_direct_parent_', '_parameters_', '_parent_index_'] + self.parameter_names(): + if isinstance(v, (Constrainable, ParameterIndexOperations, ParameterIndexOperationsView)): + dc[k] = v.copy() + else: + dc[k] = copy.deepcopy(v) + if k == '_parameters_': + params = [p.copy() for p in v] + #dc = copy.deepcopy(self.__dict__) + dc['_direct_parent_'] = None + dc['_parent_index_'] = None + dc['_parameters_'] = ParamList() + s = self.__new__(self.__class__) + s.__dict__ = dc + #import ipdb;ipdb.set_trace() + for p in params: + s.add_parameter(p) + #dc._notify_parent_change() + return s + #return copy.deepcopy(self) + + def _notify_parameters_changed(self): + self.parameters_changed() + if self.has_parent(): + self._direct_parent_._notify_parameters_changed() + def parameters_changed(self): """ This method gets called when parameters have changed. @@ -122,11 +183,6 @@ class Parameterizable(Parentable): """ pass - def _notify_parameters_changed(self): - self.parameters_changed() - if self.has_parent(): - self._direct_parent_._notify_parameters_changed() - class Gradcheckable(Parentable): #=========================================================================== @@ -157,7 +213,7 @@ class Indexable(object): """ raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?" -class Constrainable(Nameable, Indexable, Parameterizable): +class Constrainable(Nameable, Indexable, Parentable): def __init__(self, name, default_constraint=None): super(Constrainable,self).__init__(name) self._default_constraint_ = default_constraint @@ -167,6 +223,16 @@ class Constrainable(Nameable, Indexable, Parameterizable): if self._default_constraint_ is not None: self.constrain(self._default_constraint_) + def _disconnect_parent(self, constr=None): + if constr is None: + constr = self.constraints.copy() + self.constraints.clear() + self.constraints = constr + self._direct_parent_ = None + self._parent_index_ = None + self._connect_fixes() + self._notify_parent_change() + #=========================================================================== # Fixing Parameters: #=========================================================================== diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index c8a841c0..cef1daa2 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -3,16 +3,15 @@ import numpy; np = numpy -import copy import cPickle import itertools from re import compile, _pattern_type -from param import ParamConcatenation, Param -from parameter_core import Constrainable, Pickleable, Observable, adjust_name_for_printing, Gradcheckable -from transformations import __fixed__, FIXED, UNFIXED +from param import ParamConcatenation +from parameter_core import Constrainable, Pickleable, Observable, Parameterizable, adjust_name_for_printing, Gradcheckable +from transformations import __fixed__ from array_core import ParamList -class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): +class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable, Parameterizable): """ Parameterized class @@ -63,7 +62,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): self._fixes_ = None self._param_slices_ = [] self._connect_parameters() - self._added_names_ = set() del self._in_init_ def add_parameter(self, param, index=None): @@ -117,17 +115,10 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): raise RuntimeError, "Parameter {} does not belong to this object, remove parameters directly from their respective parents".format(param._short()) del self._parameters_[param._parent_index_] self.size -= param.size - constr = param.constraints.copy() - param.constraints.clear() - param.constraints = constr - param._direct_parent_ = None - param._parent_index_ = None - param._connect_fixes() - param._notify_parent_change() - pname = adjust_name_for_printing(param.name) - if pname in self._added_names_: - del self.__dict__[pname] - self._connect_parameters() + + param._disconnect_parent() + self._remove_parameter_name(param) + #self._notify_parent_change() self._connect_fixes() @@ -145,19 +136,9 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): for i, p in enumerate(self._parameters_): p._direct_parent_ = self p._parent_index_ = i - not_unique = [] sizes.append(p.size + sizes[-1]) self._param_slices_.append(slice(sizes[-2], sizes[-1])) - pname = adjust_name_for_printing(p.name) - # and makes sure to not delete programmatically added parameters - if pname in self.__dict__: - if isinstance(self.__dict__[pname], (Parameterized, Param)): - if not p is self.__dict__[pname]: - not_unique.append(pname) - del self.__dict__[pname] - elif not (pname in not_unique): - self.__dict__[pname] = p - self._added_names_.add(pname) + self._add_parameter_name(p) #=========================================================================== # Pickling operations @@ -174,19 +155,7 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): cPickle.dump(self, f, protocol) else: cPickle.dump(self, f, protocol) - def copy(self): - """Returns a (deep) copy of the current model """ - # dc = dict() - # for k, v in self.__dict__.iteritems(): - # if k not in ['_highest_parent_', '_direct_parent_']: - # dc[k] = copy.deepcopy(v) - # dc = copy.deepcopy(self.__dict__) - # dc['_highest_parent_'] = None - # dc['_direct_parent_'] = None - # s = self.__class__.new() - # s.__dict__ = dc - return copy.deepcopy(self) def __getstate__(self): if self._has_get_set_state(): return self._getstate() @@ -265,14 +234,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp [numpy.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] return p - def _name_changed(self, param, old_name): - if hasattr(self, old_name) and old_name in self._added_names_: - delattr(self, old_name) - self._added_names_.remove(old_name) - pname = adjust_name_for_printing(param.name) - if pname not in self.__dict__: - self._added_names_.add(pname) - self.__dict__[pname] = param #=========================================================================== # Indexable Handling #=========================================================================== diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index edb8d8f6..1d436c53 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -33,12 +33,12 @@ class SparseGP(GP): def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, X_variance=None, name='sparse gp'): - #pick a sensible inference method + # pick a sensible inference method if inference_method is None: if isinstance(likelihood, likelihoods.Gaussian): inference_method = var_dtc.VarDTC() else: - #inference_method = ?? + # inference_method = ?? raise NotImplementedError, "what to do what to do?" print "defaulting to ", inference_method, "for latent function inference" @@ -54,7 +54,7 @@ class SparseGP(GP): self.parameters_changed() def _update_gradients_Z(self, add=False): - #The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed) + # The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed) if not self.Z.is_fixed: if add: self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) else: self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) @@ -77,13 +77,14 @@ class SparseGP(GP): mu = np.dot(Kx.T, self.posterior.woodbury_vector) if full_cov: Kxx = self.kern.K(Xnew, which_parts=which_parts) - var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) # NOTE this won't work for plotting + var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx), Kx, [1,0]).T else: - Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts) - var = Kxx - np.sum(Kx * np.dot(self.posterior.woodbury_inv, Kx), 0) + Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)[:, None] + #import ipdb;ipdb.set_trace() + var = Kxx - (np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T * Kx.T[:,:,None]).sum(1) else: # assert which_parts=='all', "swithching out parts of variational kernels is not implemented" - Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts + Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts mu = np.dot(Kx, self.Cpsi1V) if full_cov: raise NotImplementedError, "TODO" @@ -91,7 +92,7 @@ class SparseGP(GP): Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new) psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new) var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1) - return mu, var[:,None] + return mu, var def _getstate(self): diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index a7eb0adb..2924386f 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -1,9 +1,9 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as _np -default_seed = _np.random.seed(123344) +#default_seed = _np.random.seed(123344) -def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False, output_dim=200, nan=False): +def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan=False): """ model for testing purposes. Samples from a GP with rbf kernel and learns the samples with a new kernel. Normally not for optimization, just model cheking @@ -41,7 +41,7 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False, if nan: m.inference_method = GPy.inference.latent_function_inference.var_dtc.VarDTCMissingData() - m.Y[_np.random.binomial(1,p,size=(Y.shape))] = _np.nan + m.Y[_np.random.binomial(1,p,size=(Y.shape)).astype(bool)] = _np.nan m.parameters_changed() #=========================================================================== @@ -186,6 +186,8 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, return m def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False): + _np.random.seed(1234) + x = _np.linspace(0, 4 * _np.pi, N)[:, None] s1 = _np.vectorize(lambda x: _np.sin(x)) s2 = _np.vectorize(lambda x: _np.cos(x)) @@ -293,10 +295,11 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1, Y = Ylist[0] k = kern.linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) - inan = _np.random.binomial(1, .3, size=Y.shape) + inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool) m = BayesianGPLVM(Y, Q, init="random", num_inducing=num_inducing, kernel=k) m.inference_method = VarDTCMissingData() m.Y[inan] = _np.nan + m.q.variance *= .1 m.parameters_changed() if optimize: diff --git a/GPy/inference/latent_function_inference/__init__.py b/GPy/inference/latent_function_inference/__init__.py index 337a8477..a633c381 100644 --- a/GPy/inference/latent_function_inference/__init__.py +++ b/GPy/inference/latent_function_inference/__init__.py @@ -16,7 +16,9 @@ If the likelihood object is something other than Gaussian, then exact inference is not tractable. We then resort to a Laplace approximation (laplace.py) or expectation propagation (ep.py). -The inference methods return a "Posterior" instance, which is a simple +The inference methods return a +:class:`~GPy.inference.latent_function_inference.posterior.Posterior` +instance, which is a simple structure which contains a summary of the posterior. The model classes can then use this posterior object for making predictions, optimizing hyper-parameters, etc. @@ -29,3 +31,15 @@ expectation_propagation = 'foo' # TODO from GPy.inference.latent_function_inference.var_dtc import VarDTC from dtc import DTC from fitc import FITC + +# class FullLatentFunctionData(object): +# +# +# class LatentFunctionInference(object): +# def inference(self, kern, X, likelihood, Y, Y_metadata=None): +# """ +# Do inference on the latent functions given a covariance function `kern`, +# inputs and outputs `X` and `Y`, and a likelihood `likelihood`. +# Additional metadata for the outputs `Y` can be given in `Y_metadata`. +# """ +# raise NotImplementedError, "Abstract base class for full inference" \ No newline at end of file diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index 264f7fc3..2f11cb08 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -139,7 +139,8 @@ class VarDTCMissingData(object): dL_dpsi2_all = np.zeros((X.shape[0], num_inducing, num_inducing)) partial_for_likelihood = 0 - LB_all = Cpsi1Vf_all = 0 + woodbury_vector = np.zeros((num_inducing, Y.shape[1])) + woodbury_inv_all = np.zeros((num_inducing, num_inducing, Y.shape[1])) dL_dKmm = 0 log_marginal = 0 @@ -153,6 +154,8 @@ class VarDTCMissingData(object): VVT_factor_all = np.empty(Y.shape) full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1] + if not full_VVT_factor: + psi1V = np.dot(Y.T*beta_all, psi1_all).T for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices): if het_noise: beta = beta_all[ind] @@ -185,8 +188,7 @@ class VarDTCMissingData(object): psi1Vf = psi1.T.dot(VVT_factor) _LBi_Lmi_psi1Vf, Cpsi1Vf = _compute_psi1Vf(Lm, LB, psi1Vf) - if full_VVT_factor: Cpsi1Vf_all += Cpsi1Vf - LB_all += LB + #LB_all[ind, :,:] = LB # data fit and derivative of L w.r.t. Kmm delit = tdot(_LBi_Lmi_psi1Vf) @@ -219,6 +221,21 @@ class VarDTCMissingData(object): psi0, psi1, beta, data_fit, num_data, output_dim, trYYT) + if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf + else: + print 'foobar' + tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) + tmp, _ = dpotrs(LB, tmp, lower=1) + woodbury_vector[:, ind] = dtrtrs(Lm, tmp, lower=1, trans=1)[0] + + #import ipdb;ipdb.set_trace() + Bi, _ = dpotri(LB, lower=1) + symmetrify(Bi) + Bi = -dpotri(LB, lower=1)[0] + from ...util import diag + diag.add(Bi, 1) + woodbury_inv_all[:, :, ind] = backsub_both_sides(Lm, Bi)[:,:,None] + # gradients: likelihood.update_gradients(partial_for_likelihood) @@ -231,23 +248,22 @@ class VarDTCMissingData(object): #get sufficient things for posterior prediction #TODO: do we really want to do this in the loop? - if full_VVT_factor: - woodbury_vector = Cpsi1Vf_all # == Cpsi1V - else: - print 'foobar' - psi1V = np.dot(Y.T*beta_all, psi1_all).T - tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) - tmp, _ = dpotrs(LB_all, tmp, lower=1) - woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1) - - Bi, _ = dpotri(LB_all, lower=1) - symmetrify(Bi) - Bi = -dpotri(LB_all, lower=1)[0] - from ...util import diag - diag.add(Bi, 1) + #if not full_VVT_factor: + # print 'foobar' + # psi1V = np.dot(Y.T*beta_all, psi1_all).T + # tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) + # tmp, _ = dpotrs(LB_all, tmp, lower=1) + # woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1) + #import ipdb;ipdb.set_trace() + #Bi, _ = dpotri(LB_all, lower=1) + #symmetrify(Bi) + #Bi = -dpotri(LB_all, lower=1)[0] + #from ...util import diag + #diag.add(Bi, 1) - woodbury_inv = backsub_both_sides(Lm, Bi) - post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) + #woodbury_inv = backsub_both_sides(Lm, Bi) + + post = Posterior(woodbury_inv=woodbury_inv_all, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) return post, log_marginal, grad_dict diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index 3e105785..511ce5aa 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -10,6 +10,22 @@ import pylab from GPy.kern.kern import kern from GPy.models.bayesian_gplvm import BayesianGPLVM +class MRD2(Model): + """ + Apply MRD to all given datasets Y in Ylist. + + Y_i in [n x p_i] + + The samples n in the datasets need + to match up, whereas the dimensionality p_d can differ. + + :param [array-like] Ylist: List of datasets to apply MRD on + :param array-like q_mean: mean of starting latent space q in [n x q] + :param array-like q_variance: variance of starting latent space q in [n x q] + :param :class:`~GPy.inference.latent_function_inference + """ + + class MRD(Model): """ Do MRD on given Datasets in Ylist.