diff --git a/GPy/core/gp.py b/GPy/core/gp.py index c38820f3..e0f5755c 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -12,6 +12,10 @@ from .. import likelihoods from ..likelihoods.gaussian import Gaussian from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation, LatentFunctionInference from parameterization.variational import VariationalPosterior +from scipy.sparse.base import issparse + +import logging +logger = logging.getLogger("GP") class GP(Model): """ @@ -34,12 +38,14 @@ class GP(Model): assert X.ndim == 2 if isinstance(X, (ObsAr, VariationalPosterior)): self.X = X.copy() - else: self.X = ObsAr(X.copy()) + else: self.X = ObsAr(X) self.num_data, self.input_dim = self.X.shape assert Y.ndim == 2 - self.Y = ObsAr(Y.copy()) + logger.info("initializing Y") + if issparse(Y): self.Y = Y + else: self.Y = ObsAr(Y) assert Y.shape[0] == self.num_data _, self.output_dim = self.Y.shape @@ -54,6 +60,7 @@ class GP(Model): self.likelihood = likelihood #find a sensible inference method + logger.info("initializing inference method") if inference_method is None: if isinstance(likelihood, likelihoods.Gaussian) or isinstance(likelihood, likelihoods.MixedNoise): inference_method = exact_gaussian_inference.ExactGaussianInference() @@ -62,6 +69,7 @@ class GP(Model): print "defaulting to ", inference_method, "for latent function inference" self.inference_method = inference_method + logger.info("adding kernel and likelihood as parameters") self.add_parameter(self.kern) self.add_parameter(self.likelihood) @@ -199,9 +207,9 @@ class GP(Model): if fillcol is not None: kw['fillcol'] = fillcol return models_plots.plot_fit(self, plot_limits, which_data_rows, - which_data_ycols, fixed_inputs, - levels, samples, fignum, ax, resolution, - plot_raw=plot_raw, Y_metadata=Y_metadata, + which_data_ycols, fixed_inputs, + levels, samples, fignum, ax, resolution, + plot_raw=plot_raw, Y_metadata=Y_metadata, data_symbol=data_symbol, **kw) def plot(self, plot_limits=None, which_data_rows='all', @@ -250,9 +258,9 @@ class GP(Model): if fillcol is not None: kw['fillcol'] = fillcol return models_plots.plot_fit(self, plot_limits, which_data_rows, - which_data_ycols, fixed_inputs, - levels, samples, fignum, ax, resolution, - plot_raw=plot_raw, Y_metadata=Y_metadata, + which_data_ycols, fixed_inputs, + levels, samples, fignum, ax, resolution, + plot_raw=plot_raw, Y_metadata=Y_metadata, data_symbol=data_symbol, **kw) def input_sensitivity(self): @@ -281,4 +289,4 @@ class GP(Model): except KeyboardInterrupt: print "KeyboardInterrupt caught, calling on_optimization_end() to round things up" self.inference_method.on_optimization_end() - raise \ No newline at end of file + raise diff --git a/GPy/core/model.py b/GPy/core/model.py index 8152dae1..3acb64b9 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -118,12 +118,12 @@ class Model(Parameterized): """ The objective function for the given algorithm. - This function is the true objective, which wants to be minimized. - Note that all parameters are already set and in place, so you just need + This function is the true objective, which wants to be minimized. + Note that all parameters are already set and in place, so you just need to return the objective function here. For probabilistic models this is the negative log_likelihood - (including the MAP prior), so we return it here. If your model is not + (including the MAP prior), so we return it here. If your model is not probabilistic, just return your objective to minimize here! """ return -float(self.log_likelihood()) - self.log_prior() @@ -131,18 +131,18 @@ class Model(Parameterized): def objective_function_gradients(self): """ The gradients for the objective function for the given algorithm. - The gradients are w.r.t. the *negative* objective function, as + The gradients are w.r.t. the *negative* objective function, as this framework works with *negative* log-likelihoods as a default. You can find the gradient for the parameters in self.gradient at all times. This is the place, where gradients get stored for parameters. - This function is the true objective, which wants to be minimized. - Note that all parameters are already set and in place, so you just need + This function is the true objective, which wants to be minimized. + Note that all parameters are already set and in place, so you just need to return the gradient here. For probabilistic models this is the gradient of the negative log_likelihood - (including the MAP prior), so we return it here. If your model is not + (including the MAP prior), so we return it here. If your model is not probabilistic, just return your *negative* gradient here! """ return -(self._log_likelihood_gradients() + self._log_prior_gradients()) @@ -225,14 +225,18 @@ class Model(Parameterized): if self.size == 0: raise RuntimeError, "Model without parameters cannot be optimized" - if optimizer is None: - optimizer = self.preferred_optimizer - if start == None: start = self.optimizer_array - optimizer = optimization.get_optimizer(optimizer) - opt = optimizer(start, model=self, **kwargs) + if optimizer is None: + optimizer = self.preferred_optimizer + + if isinstance(optimizer, optimization.Optimizer): + opt = optimizer + opt.model = self + else: + optimizer = optimization.get_optimizer(optimizer) + opt = optimizer(start, model=self, **kwargs) opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads) @@ -249,7 +253,7 @@ class Model(Parameterized): def _checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3): """ Check the gradient of the ,odel by comparing to a numerical - estimate. If the verbose flag is passed, invividual + estimate. If the verbose flag is passed, individual components are tested (and printed) :param verbose: If True, print a "full" checking of each parameter diff --git a/GPy/core/parameterization/observable_array.py b/GPy/core/parameterization/observable_array.py index 24fad7b6..09450b08 100644 --- a/GPy/core/parameterization/observable_array.py +++ b/GPy/core/parameterization/observable_array.py @@ -33,7 +33,7 @@ class ObsAr(np.ndarray, Pickleable, Observable): def _setup_observers(self): # do not setup anything, as observable arrays do not have default observers pass - + def copy(self): from lists_and_dicts import ObserverList memo = {} diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 0357eb39..e359409e 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -751,8 +751,6 @@ class OptimizationHandlable(Indexable): Transform the gradients by multiplying the gradient factor for each constraint to it. """ - if self.has_parent(): - return g [np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__] if self._has_fixes(): return g[self._fixes_] return g @@ -793,7 +791,7 @@ class OptimizationHandlable(Indexable): #=========================================================================== # Randomizeable #=========================================================================== - def randomize(self, rand_gen=np.random.normal, loc=0, scale=1, *args, **kwargs): + def randomize(self, rand_gen=np.random.normal, *args, **kwargs): """ Randomize the model. Make this draw from the prior if one exists, else draw from given random generator @@ -804,7 +802,7 @@ class OptimizationHandlable(Indexable): :param args, kwargs: will be passed through to random number generator """ # first take care of all parameters (from N(0,1)) - x = rand_gen(loc=loc, scale=scale, size=self._size_transformed(), *args, **kwargs) + x = rand_gen(size=self._size_transformed(), *args, **kwargs) # now draw from prior where possible [np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None] self.optimizer_array = x # makes sure all of the tied parameters get the same init (since there's only one prior object...) @@ -835,6 +833,11 @@ class OptimizationHandlable(Indexable): 1.) connect param_array of children to self.param_array 2.) tell all children to propagate further """ + if self.param_array.size != self.size: + self._param_array_ = np.empty(self.size, dtype=np.float64) + if self.gradient.size != self.size: + self._gradient_array_ = np.empty(self.size, dtype=np.float64) + pi_old_size = 0 for pi in self.parameters: pislice = slice(pi_old_size, pi_old_size + pi.size) @@ -848,6 +851,9 @@ class OptimizationHandlable(Indexable): pi._propagate_param_grad(parray[pislice], garray[pislice]) pi_old_size += pi.size + def _connect_parameters(self): + pass + class Parameterizable(OptimizationHandlable): """ A parameterisable class. @@ -874,6 +880,9 @@ class Parameterizable(OptimizationHandlable): """ Array representing the parameters of this class. There is only one copy of all parameters in memory, two during optimization. + + !WARNING!: setting the parameter array MUST always be done in memory: + m.param_array[:] = m_copy.param_array """ if self.__dict__.get('_param_array_', None) is None: self._param_array_ = np.empty(self.size, dtype=np.float64) @@ -986,6 +995,11 @@ class Parameterizable(OptimizationHandlable): # notification system #=========================================================================== def _parameters_changed_notification(self, me, which=None): + """ + In parameterizable we just need to make sure, that the next call to optimizer_array + will update the optimizer_array to the latest parameters + """ + self._optimizer_copy_transformed = False # tells the optimizer array to update on next request self.parameters_changed() def _pass_through_notify_observers(self, me, which=None): self.notify_observers(which=which) @@ -1017,4 +1031,3 @@ class Parameterizable(OptimizationHandlable): updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer`` """ pass - diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index 54065d8f..eabd5a9c 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -8,11 +8,23 @@ from re import compile, _pattern_type from param import ParamConcatenation from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing +import logging +logger = logging.getLogger("parameters changed meta") + class ParametersChangedMeta(type): def __call__(self, *args, **kw): - instance = super(ParametersChangedMeta, self).__call__(*args, **kw) - instance.parameters_changed() - return instance + self._in_init_ = True + #import ipdb;ipdb.set_trace() + self = super(ParametersChangedMeta, self).__call__(*args, **kw) + logger.debug("finished init") + self._in_init_ = False + logger.debug("connecting parameters") + self._highest_parent_._connect_parameters() + self._highest_parent_._notify_parent_change() + self._highest_parent_._connect_fixes() + logger.debug("calling parameters changed") + self.parameters_changed() + return self class Parameterized(Parameterizable): """ @@ -57,21 +69,19 @@ class Parameterized(Parameterizable): and concatenate them. Printing m[''] will result in printing of all parameters in detail. """ #=========================================================================== - # Metaclass for parameters changed after init. + # Metaclass for parameters changed after init. # This makes sure, that parameters changed will always be called after __init__ - # **Never** call parameters_changed() yourself + # **Never** call parameters_changed() yourself __metaclass__ = ParametersChangedMeta #=========================================================================== def __init__(self, name=None, parameters=[], *a, **kw): super(Parameterized, self).__init__(name=name, *a, **kw) - self._in_init_ = True self.size = sum(p.size for p in self.parameters) self.add_observer(self, self._parameters_changed_notification, -100) if not self._has_fixes(): self._fixes_ = None self._param_slices_ = [] - self._connect_parameters() - del self._in_init_ + #self._connect_parameters() self.add_parameters(*parameters) def build_pydot(self, G=None): @@ -125,6 +135,9 @@ class Parameterized(Parameterizable): param._parent_.remove_parameter(param) # make sure the size is set if index is None: + start = sum(p.size for p in self.parameters) + self.constraints.shift_right(start, param.size) + self.priors.shift_right(start, param.size) self.constraints.update(param.constraints, self.size) self.priors.update(param.priors, self.size) self.parameters.append(param) @@ -143,14 +156,16 @@ class Parameterized(Parameterizable): parent.size += param.size parent = parent._parent_ - self._connect_parameters() + if not self._in_init_: + self._connect_parameters() + self._notify_parent_change() - self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names) - self._highest_parent_._notify_parent_change() - self._highest_parent_._connect_fixes() + self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names) + self._highest_parent_._notify_parent_change() + self._highest_parent_._connect_fixes() else: - raise HierarchyError, """Parameter exists already and no copy made""" + raise HierarchyError, """Parameter exists already, try making a copy""" def add_parameters(self, *parameters): @@ -198,26 +213,28 @@ class Parameterized(Parameterizable): # no parameters for this class return if self.param_array.size != self.size: - self.param_array = np.empty(self.size, dtype=np.float64) + self._param_array_ = np.empty(self.size, dtype=np.float64) if self.gradient.size != self.size: self._gradient_array_ = np.empty(self.size, dtype=np.float64) old_size = 0 self._param_slices_ = [] for i, p in enumerate(self.parameters): + if not p.param_array.flags['C_CONTIGUOUS']: + raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS" + p._parent_ = self p._parent_index_ = i pslice = slice(old_size, old_size + p.size) + # first connect all children p._propagate_param_grad(self.param_array[pslice], self.gradient_full[pslice]) + # then connect children to self self.param_array[pslice] = p.param_array.flat # , requirements=['C', 'W']).ravel(order='C') self.gradient_full[pslice] = p.gradient_full.flat # , requirements=['C', 'W']).ravel(order='C') - if not p.param_array.flags['C_CONTIGUOUS']: - raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS" - p.param_array.data = self.param_array[pslice].data p.gradient_full.data = self.gradient_full[pslice].data @@ -332,7 +349,7 @@ class Parameterized(Parameterizable): def __str__(self, header=True): name = adjust_name_for_printing(self.name) + "." - constrs = self._constraints_str; + constrs = self._constraints_str; ts = self._ties_str prirs = self._priors_str desc = self._description_str; names = self.parameter_names() diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py index 29adc923..ddc4d02f 100644 --- a/GPy/core/parameterization/priors.py +++ b/GPy/core/parameterization/priors.py @@ -76,11 +76,11 @@ class Uniform(Prior): o = super(Prior, cls).__new__(cls, lower, upper) cls._instances.append(weakref.ref(o)) return cls._instances[-1]() - + def __init__(self, lower, upper): self.lower = float(lower) self.upper = float(upper) - + def __str__(self): return "[" + str(np.round(self.lower)) + ', ' + str(np.round(self.upper)) + ']' @@ -93,7 +93,7 @@ class Uniform(Prior): def rvs(self, n): return np.random.uniform(self.lower, self.upper, size=n) - + class LogGaussian(Prior): """ Implementation of the univariate *log*-Gaussian probability function, coupled with random variables. @@ -246,7 +246,7 @@ class Gamma(Prior): """ Creates an instance of a Gamma Prior by specifying the Expected value(s) and Variance(s) of the distribution. - + :param E: expected value :param V: variance """ diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index ac752c36..301d4b49 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -8,6 +8,9 @@ from ..inference.latent_function_inference import var_dtc from .. import likelihoods from parameterization.variational import VariationalPosterior +import logging +logger = logging.getLogger("sparse gp") + class SparseGP(GP): """ A general purpose Sparse GP model @@ -46,7 +49,7 @@ class SparseGP(GP): self.num_inducing = Z.shape[0] GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata) - + logger.info("Adding Z as parameter") self.add_parameter(self.Z, index=0) def has_uncertain_inputs(self): @@ -66,10 +69,10 @@ class SparseGP(GP): #gradients wrt Z self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z) self.Z.gradient += self.kern.gradients_Z_expectations( - self.grad_dict['dL_dpsi0'], - self.grad_dict['dL_dpsi1'], - self.grad_dict['dL_dpsi2'], - Z=self.Z, + self.grad_dict['dL_dpsi0'], + self.grad_dict['dL_dpsi1'], + self.grad_dict['dL_dpsi2'], + Z=self.Z, variational_posterior=self.X) else: #gradients wrt kernel diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index d0e35b71..1932691c 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -37,7 +37,7 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan # k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True) p = .3 - + m = GPy.models.BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) if nan: @@ -144,7 +144,7 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=25, Q=4 m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel) m.data_colors = c m.data_t = t - + if optimize: m.optimize('bfgs', messages=verbose, max_iters=2e3) @@ -169,7 +169,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, Y = data['X'][:N] m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k) m.data_labels = data['Y'][:N].argmax(axis=1) - + if optimize: m.optimize('bfgs', messages=verbose, max_iters=max_iters, gtol=.05) @@ -296,15 +296,16 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1, from GPy.models import BayesianGPLVM from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData - D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 7, 9 + D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4 _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) Y = Ylist[0] k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) - inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool) - Y[inan] = _np.nan + inan = _np.random.binomial(1, .8, size=Y.shape).astype(bool) # 80% missing data + Ymissing = Y.copy() + Ymissing[inan] = _np.nan - m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, + m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing, inference_method=VarDTCMissingData(inan=inan), kernel=k) m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape) @@ -364,7 +365,7 @@ def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim for inan in inanlist: imlist.append(VarDTCMissingData(limit=1, inan=inan)) - m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, + m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, inference_method=imlist, initx="random", initz='permute', **kw) @@ -410,11 +411,11 @@ def olivetti_faces(optimize=True, verbose=True, plot=True): Yn /= Yn.std() m = GPy.models.BayesianGPLVM(Yn, Q, num_inducing=20) - + if optimize: m.optimize('bfgs', messages=verbose, max_iters=1000) if plot: ax = m.plot_latent(which_indices=(0, 1)) - y = m.likelihood.Y[0, :] + y = m.Y[0, :] data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False) lvm = GPy.plotting.matplot_dep.visualize.lvm(m.X.mean[0, :].copy(), m, data_show, ax) raw_input('Press enter to finish') @@ -514,7 +515,7 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True): data = GPy.util.datasets.osu_run1() Q = 6 - kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True) + kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True) m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel) m.data = data @@ -566,7 +567,7 @@ def ssgplvm_simulation_linear(): import GPy N, D, Q = 1000, 20, 5 pi = 0.2 - + def sample_X(Q, pi): x = np.empty(Q) dies = np.random.rand(Q) @@ -576,7 +577,7 @@ def ssgplvm_simulation_linear(): else: x[q] = 0. return x - + Y = np.empty((N,D)) X = np.empty((N,Q)) # Generate data from random sampled weight matrices @@ -584,4 +585,4 @@ def ssgplvm_simulation_linear(): X[n] = sample_X(Q,pi) w = np.random.randn(D,Q) Y[n] = np.dot(w,X[n]) - + diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index a9a137dc..78f4b6f7 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -9,6 +9,8 @@ import numpy as np from ...util.misc import param_to_array from . import LatentFunctionInference log_2_pi = np.log(2*np.pi) +import logging, itertools +logger = logging.getLogger('vardtc') class VarDTC(LatentFunctionInference): """ @@ -36,11 +38,11 @@ class VarDTC(LatentFunctionInference): return param_to_array(np.sum(np.square(Y))) def __getstate__(self): - # has to be overridden, as Cacher objects cannot be pickled. + # has to be overridden, as Cacher objects cannot be pickled. return self.limit def __setstate__(self, state): - # has to be overridden, as Cacher objects cannot be pickled. + # has to be overridden, as Cacher objects cannot be pickled. self.limit = state from ...util.caching import Cacher self.get_trYYT = Cacher(self._get_trYYT, self.limit) @@ -196,18 +198,19 @@ class VarDTCMissingData(LatentFunctionInference): def __init__(self, limit=1, inan=None): from ...util.caching import Cacher self._Y = Cacher(self._subarray_computations, limit) - self._inan = inan + if inan is not None: self._inan = ~inan + else: self._inan = None pass def set_limit(self, limit): self._Y.limit = limit def __getstate__(self): - # has to be overridden, as Cacher objects cannot be pickled. + # has to be overridden, as Cacher objects cannot be pickled. return self._Y.limit, self._inan def __setstate__(self, state): - # has to be overridden, as Cacher objects cannot be pickled. + # has to be overridden, as Cacher objects cannot be pickled. from ...util.caching import Cacher self.limit = state[0] self._inan = state[1] @@ -217,21 +220,35 @@ class VarDTCMissingData(LatentFunctionInference): if self._inan is None: inan = np.isnan(Y) has_none = inan.any() + self._inan = ~inan else: inan = self._inan has_none = True if has_none: - from ...util.subarray_and_sorting import common_subarrays - self._subarray_indices = [] - for v,ind in common_subarrays(inan, 1).iteritems(): - if not np.all(v): - v = ~np.array(v, dtype=bool) - ind = np.array(ind, dtype=int) - if ind.size == Y.shape[1]: - ind = slice(None) - self._subarray_indices.append([v,ind]) - Ys = [Y[v, :][:, ind] for v, ind in self._subarray_indices] - traces = [(y**2).sum() for y in Ys] + #print "caching missing data slices, this can take several minutes depending on the number of unique dimensions of the data..." + #csa = common_subarrays(inan, 1) + size = Y.shape[1] + #logger.info('preparing subarrays {:3.3%}'.format((i+1.)/size)) + Ys = [] + next_ten = [0.] + count = itertools.count() + for v, y in itertools.izip(inan.T, Y.T[:,:,None]): + i = count.next() + if ((i+1.)/size) >= next_ten[0]: + logger.info('preparing subarrays {:>6.1%}'.format((i+1.)/size)) + next_ten[0] += .1 + Ys.append(y[v,:]) + + next_ten = [0.] + count = itertools.count() + def trace(y): + i = count.next() + if ((i+1.)/size) >= next_ten[0]: + logger.info('preparing traces {:>6.1%}'.format((i+1.)/size)) + next_ten[0] += .1 + y = y[inan[:,i],i:i+1] + return np.einsum('ij,ij->', y,y) + traces = [trace(Y) for _ in xrange(size)] return Ys, traces else: self._subarray_indices = [[slice(None),slice(None)]] @@ -253,7 +270,6 @@ class VarDTCMissingData(LatentFunctionInference): beta_all = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6) het_noise = beta_all.size != 1 - import itertools num_inducing = Z.shape[0] dL_dpsi0_all = np.zeros(Y.shape[0]) @@ -273,22 +289,24 @@ class VarDTCMissingData(LatentFunctionInference): Lm = jitchol(Kmm) if uncertain_inputs: LmInv = dtrtri(Lm) - VVT_factor_all = np.empty(Y.shape) - full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1] - if not full_VVT_factor: - psi1V = np.dot(Y.T*beta_all, psi1_all).T + #VVT_factor_all = np.empty(Y.shape) + #full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1] + #if not full_VVT_factor: + # psi1V = np.dot(Y.T*beta_all, psi1_all).T - for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices): - if het_noise: beta = beta_all[ind] + #logger.info('computing dimension-wise likelihood and derivatives') + #size = len(Ys) + size = Y.shape[1] + next_ten = 0 + for i, [y, v, trYYT] in enumerate(itertools.izip(Ys, self._inan.T, traces)): + if ((i+1.)/size) >= next_ten: + logger.info('inference {:> 6.1%}'.format((i+1.)/size)) + next_ten += .1 + if het_noise: beta = beta_all[i] else: beta = beta_all - VVT_factor = (beta*y) - try: - VVT_factor_all[v, ind].flat = VVT_factor.flat - except ValueError: - mult = np.ravel_multi_index((v.nonzero()[0][:,None],ind[None,:]), VVT_factor_all.shape) - VVT_factor_all.flat[mult] = VVT_factor - output_dim = y.shape[1] + VVT_factor = (y*beta) + output_dim = 1#len(ind) psi0 = psi0_all[v] psi1 = psi1_all[v, :] @@ -347,19 +365,20 @@ class VarDTCMissingData(LatentFunctionInference): psi0, psi1, beta, data_fit, num_data, output_dim, trYYT, Y) - if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf - else: - print 'foobar' - tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) - tmp, _ = dpotrs(LB, tmp, lower=1) - woodbury_vector[:, ind] = dtrtrs(Lm, tmp, lower=1, trans=1)[0] + #if full_VVT_factor: + woodbury_vector[:, i:i+1] = Cpsi1Vf + #else: + # print 'foobar' + # tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) + # tmp, _ = dpotrs(LB, tmp, lower=1) + # woodbury_vector[:, ind] = dtrtrs(Lm, tmp, lower=1, trans=1)[0] #import ipdb;ipdb.set_trace() Bi, _ = dpotri(LB, lower=1) symmetrify(Bi) Bi = -dpotri(LB, lower=1)[0] diag.add(Bi, 1) - woodbury_inv_all[:, :, ind] = backsub_both_sides(Lm, Bi)[:,:,None] + woodbury_inv_all[:, :, i:i+1] = backsub_both_sides(Lm, Bi)[:,:,None] dL_dthetaL = likelihood.exact_inference_gradients(dL_dR) @@ -376,23 +395,6 @@ class VarDTCMissingData(LatentFunctionInference): 'dL_dKnm':dL_dpsi1_all, 'dL_dthetaL':dL_dthetaL} - #get sufficient things for posterior prediction - #TODO: do we really want to do this in the loop? - #if not full_VVT_factor: - # print 'foobar' - # psi1V = np.dot(Y.T*beta_all, psi1_all).T - # tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) - # tmp, _ = dpotrs(LB_all, tmp, lower=1) - # woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1) - #import ipdb;ipdb.set_trace() - #Bi, _ = dpotri(LB_all, lower=1) - #symmetrify(Bi) - #Bi = -dpotri(LB_all, lower=1)[0] - #from ...util import diag - #diag.add(Bi, 1) - - #woodbury_inv = backsub_both_sides(Lm, Bi) - post = Posterior(woodbury_inv=woodbury_inv_all, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) return post, log_marginal, grad_dict diff --git a/GPy/inference/latent_function_inference/var_dtc_parallel.py b/GPy/inference/latent_function_inference/var_dtc_parallel.py index 11d03413..e9a40cbb 100644 --- a/GPy/inference/latent_function_inference/var_dtc_parallel.py +++ b/GPy/inference/latent_function_inference/var_dtc_parallel.py @@ -22,21 +22,21 @@ class VarDTC_minibatch(LatentFunctionInference): """ const_jitter = 1e-6 def __init__(self, batchsize, limit=1): - + self.batchsize = batchsize - + # Cache functions from ...util.caching import Cacher self.get_trYYT = Cacher(self._get_trYYT, limit) self.get_YYTfactor = Cacher(self._get_YYTfactor, limit) - + self.midRes = {} self.batch_pos = 0 # the starting position of the current mini-batch def set_limit(self, limit): self.get_trYYT.limit = limit self.get_YYTfactor.limit = limit - + def _get_trYYT(self, Y): return param_to_array(np.sum(np.square(Y))) @@ -51,23 +51,23 @@ class VarDTC_minibatch(LatentFunctionInference): return param_to_array(Y) else: return jitchol(tdot(Y)) - + def inference_likelihood(self, kern, X, Z, likelihood, Y): """ The first phase of inference: Compute: log-likelihood, dL_dKmm - + Cached intermediate results: Kmm, KmmInv, """ - - num_inducing = Z.shape[0] + + num_inducing = Z.shape[0] num_data, output_dim = Y.shape if isinstance(X, VariationalPosterior): uncertain_inputs = True else: uncertain_inputs = False - + #see whether we've got a different noise variance for each datum beta = 1./np.fmax(likelihood.variance, 1e-6) het_noise = beta.size > 1 @@ -77,19 +77,19 @@ class VarDTC_minibatch(LatentFunctionInference): #self.YYTfactor = beta*self.get_YYTfactor(Y) YYT_factor = Y trYYT = self.get_trYYT(Y) - + psi2_full = np.zeros((num_inducing,num_inducing)) psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM psi0_full = 0 YRY_full = 0 - + for n_start in xrange(0,num_data,self.batchsize): - + n_end = min(self.batchsize+n_start, num_data) - + Y_slice = YYT_factor[n_start:n_end] X_slice = X[n_start:n_end] - + if uncertain_inputs: psi0 = kern.psi0(Z, X_slice) psi1 = kern.psi1(Z, X_slice) @@ -98,7 +98,7 @@ class VarDTC_minibatch(LatentFunctionInference): psi0 = kern.Kdiag(X_slice) psi1 = kern.K(X_slice, Z) psi2 = None - + if het_noise: beta_slice = beta[n_start:n_end] psi0_full += (beta_slice*psi0).sum() @@ -106,33 +106,33 @@ class VarDTC_minibatch(LatentFunctionInference): YRY_full += (beta_slice*np.square(Y_slice).sum(axis=-1)).sum() else: psi0_full += psi0.sum() - psi1Y_full += np.dot(Y_slice.T,psi1) # DxM - + psi1Y_full += np.dot(Y_slice.T,psi1) # DxM + if uncertain_inputs: if het_noise: psi2_full += beta_slice*psi2 else: - psi2_full += psi2 + psi2_full += psi2.sum(0) else: if het_noise: psi2_full += beta_slice*np.outer(psi1,psi1) else: - psi2_full += np.outer(psi1,psi1) - + psi2_full += np.einsum('nm,jk->mk',psi1,psi1) + if not het_noise: psi0_full *= beta psi1Y_full *= beta psi2_full *= beta YRY_full = trYYT*beta - + #====================================================================== # Compute Common Components #====================================================================== - + self.psi1Y = psi1Y_full Kmm = kern.K(Z).copy() diag.add(Kmm, self.const_jitter) Lm = jitchol(Kmm) - + Lambda = Kmm+psi2_full LL = jitchol(Lambda) b,_ = dtrtrs(LL, psi1Y_full.T) @@ -140,18 +140,18 @@ class VarDTC_minibatch(LatentFunctionInference): v,_ = dtrtrs(LL.T,b,lower=False) vvt = np.einsum('md,od->mo',v,v) LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right') - + Psi2LLInvT = dtrtrs(LL,psi2_full)[0].T LmInvPsi2LLInvT= dtrtrs(Lm,Psi2LLInvT)[0] KmmInvPsi2LLInvT = dtrtrs(Lm,LmInvPsi2LLInvT,trans=True)[0] KmmInvPsi2P = dtrtrs(LL,KmmInvPsi2LLInvT.T, trans=True)[0].T - + dL_dpsi2R = (output_dim*KmmInvPsi2P - vvt)/2. # dL_dpsi2 with R inside psi2 - + # Cache intermediate results self.midRes['dL_dpsi2R'] = dL_dpsi2R self.midRes['v'] = v - + #====================================================================== # Compute log-likelihood #====================================================================== @@ -159,30 +159,33 @@ class VarDTC_minibatch(LatentFunctionInference): logL_R = -np.log(beta).sum() else: logL_R = -num_data*np.log(beta) - logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.-output_dim*(-np.log(np.diag(Lm)).sum()+np.log(np.diag(LL)).sum()) + logL = ( + -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2. + -output_dim*(-np.log(np.diag(Lm)).sum()+np.log(np.diag(LL)).sum()) + ) #====================================================================== # Compute dL_dKmm #====================================================================== - + dL_dKmm = -(output_dim*np.einsum('md,od->mo',KmmInvPsi2LLInvT,KmmInvPsi2LLInvT) + vvt)/2. #====================================================================== # Compute the Posterior distribution of inducing points p(u|Y) #====================================================================== - + # phi_u_mean = np.dot(Kmm,v) # LLInvKmm,_ = dtrtrs(LL,Kmm) # # phi_u_var = np.einsum('ma,mb->ab',LLInvKmm,LLInvKmm) # phi_u_var = Kmm - np.dot(LLInvKmm.T,LLInvKmm) - + post = Posterior(woodbury_inv=KmmInvPsi2P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm) return logL, dL_dKmm, post def inference_minibatch(self, kern, X, Z, likelihood, Y): """ - The second phase of inference: Computing the derivatives over a minibatch of Y + The second phase of inference: Computing the derivatives over a minibatch of Y Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL return a flag showing whether it reached the end of Y (isEnd) """ @@ -193,14 +196,14 @@ class VarDTC_minibatch(LatentFunctionInference): uncertain_inputs = True else: uncertain_inputs = False - + #see whether we've got a different noise variance for each datum beta = 1./np.fmax(likelihood.variance, 1e-6) het_noise = beta.size > 1 # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! #self.YYTfactor = beta*self.get_YYTfactor(Y) YYT_factor = Y - + n_start = self.batch_pos n_end = min(self.batchsize+n_start, num_data) if n_end==num_data: @@ -209,11 +212,11 @@ class VarDTC_minibatch(LatentFunctionInference): else: isEnd = False self.batch_pos = n_end - + num_slice = n_end-n_start Y_slice = YYT_factor[n_start:n_end] X_slice = X[n_start:n_end] - + if uncertain_inputs: psi0 = kern.psi0(Z, X_slice) psi1 = kern.psi1(Z, X_slice) @@ -222,51 +225,51 @@ class VarDTC_minibatch(LatentFunctionInference): psi0 = kern.Kdiag(X_slice) psi1 = kern.K(X_slice, Z) psi2 = None - + if het_noise: beta = beta[n_start] # assuming batchsize==1 betaY = beta*Y_slice betapsi1 = np.einsum('n,nm->nm',beta,psi1) - + #====================================================================== # Load Intermediate Results #====================================================================== - + dL_dpsi2R = self.midRes['dL_dpsi2R'] v = self.midRes['v'] #====================================================================== # Compute dL_dpsi #====================================================================== - + dL_dpsi0 = -0.5 * output_dim * (beta * np.ones((n_end-n_start,))) - + dL_dpsi1 = np.dot(betaY,v.T) - + if uncertain_inputs: dL_dpsi2 = beta* dL_dpsi2R else: dL_dpsi1 += np.dot(betapsi1,dL_dpsi2R)*2. dL_dpsi2 = None - + #====================================================================== # Compute dL_dthetaL #====================================================================== if het_noise: if uncertain_inputs: - psiR = np.einsum('mo,nmo->n',dL_dpsi2R,psi2) + psiR = np.einsum('mo,nmo->',dL_dpsi2R,psi2) else: - psiR = np.einsum('nm,no,mo->n',psi1,psi1,dL_dpsi2R) - + psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R) + dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0)-output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum(axis=-1) else: if uncertain_inputs: - psiR = np.einsum('mo,mo->',dL_dpsi2R,psi2) + psiR = np.einsum('mo,nmo->',dL_dpsi2R,psi2) else: psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R) - + dL_dthetaL = ((np.square(betaY)).sum() + beta*beta*output_dim*(psi0.sum())-num_slice*output_dim*beta)/2. - beta*beta*psiR- (betaY*np.dot(betapsi1,v)).sum() if uncertain_inputs: @@ -278,15 +281,15 @@ class VarDTC_minibatch(LatentFunctionInference): grad_dict = {'dL_dKdiag':dL_dpsi0, 'dL_dKnm':dL_dpsi1, 'dL_dthetaL':dL_dthetaL} - + return isEnd, (n_start,n_end), grad_dict def update_gradients(model): model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, model.X, model.Z, model.likelihood, model.Y) - + het_noise = model.likelihood.variance.size > 1 - + if het_noise: dL_dthetaL = np.empty((model.Y.shape[0],)) else: @@ -295,40 +298,54 @@ def update_gradients(model): #gradients w.r.t. kernel model.kern.update_gradients_full(dL_dKmm, model.Z, None) kern_grad = model.kern.gradient.copy() - + #gradients w.r.t. Z model.Z.gradient = model.kern.gradients_X(dL_dKmm, model.Z) - + isEnd = False while not isEnd: isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, model.X, model.Z, model.likelihood, model.Y) if isinstance(model.X, VariationalPosterior): X_slice = model.X[n_range[0]:n_range[1]] - + + dL_dpsi1 = grad_dict['dL_dpsi1']#[None, :] + dL_dpsi2 = grad_dict['dL_dpsi2'][None, :, :] #gradients w.r.t. kernel - model.kern.update_gradients_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2']) + model.kern.update_gradients_expectations(variational_posterior=X_slice,Z=model.Z,dL_dpsi0=grad_dict['dL_dpsi0'],dL_dpsi1=dL_dpsi1,dL_dpsi2=dL_dpsi2) kern_grad += model.kern.gradient - + #gradients w.r.t. Z model.Z.gradient += model.kern.gradients_Z_expectations( - dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice) - + dL_dpsi0=grad_dict['dL_dpsi0'], + dL_dpsi1=dL_dpsi1, + dL_dpsi2=dL_dpsi2, + Z=model.Z, variational_posterior=X_slice) + #gradients w.r.t. posterior parameters of X - X_grad = model.kern.gradients_qX_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2']) - model.set_X_gradients(X_slice, X_grad) - + X_grad = model.kern.gradients_qX_expectations( + variational_posterior=X_slice, + Z=model.Z, + dL_dpsi0=grad_dict['dL_dpsi0'], + dL_dpsi1=dL_dpsi1, + dL_dpsi2=dL_dpsi2) + + model.X.mean[n_range[0]:n_range[1]].gradient = X_grad[0] + model.X.variance[n_range[0]:n_range[1]].gradient = X_grad[1] + if het_noise: dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL'] else: dL_dthetaL += grad_dict['dL_dthetaL'] - + #import ipdb;ipdb.set_trace() + model.grad_dict = grad_dict + if isinstance(model.X, VariationalPosterior): + # Update Log-likelihood + model._log_marginal_likelihood -= model.variational_prior.KL_divergence(model.X) + # update for the KL divergence + model.variational_prior.update_gradients_KL(model.X) + # Set the gradients w.r.t. kernel model.kern.gradient = kern_grad - # Update Log-likelihood - model._log_marginal_likelihood -= model.variational_prior.KL_divergence(model.X) - # update for the KL divergence - model.variational_prior.update_gradients_KL(model.X) - # dL_dthetaL model.likelihood.update_gradients(dL_dthetaL) diff --git a/GPy/inference/optimization/scg.py b/GPy/inference/optimization/scg.py index 503c19be..e183b7a8 100644 --- a/GPy/inference/optimization/scg.py +++ b/GPy/inference/optimization/scg.py @@ -56,13 +56,13 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True, if gtol is None: gtol = 1e-5 - sigma0 = 1.0e-8 + sigma0 = 1.0e-7 fold = f(x, *optargs) # Initial function value. function_eval = 1 fnow = fold gradnew = gradf(x, *optargs) # Initial gradient. - if any(np.isnan(gradnew)): - raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value" + #if any(np.isnan(gradnew)): + # raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value" current_grad = np.dot(gradnew, gradnew) gradold = gradnew.copy() d = -gradnew # Initial search direction. @@ -168,13 +168,13 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True, if Delta < 0.25: beta = min(4.0 * beta, betamax) if Delta > 0.75: - beta = max(0.5 * beta, betamin) + beta = max(0.25 * beta, betamin) # Update search direction using Polak-Ribiere formula, or re-start # in direction of negative gradient after nparams steps. if nsuccess == x.size: d = -gradnew -# beta = 1. # TODO: betareset!! + beta = 1. # This is not in the original paper nsuccess = 0 elif success: Gamma = np.dot(gradold - gradnew, gradnew) / (mu) diff --git a/GPy/installation.cfg b/GPy/installation.cfg index 867a15bf..901a7ef5 100644 --- a/GPy/installation.cfg +++ b/GPy/installation.cfg @@ -1,2 +1,2 @@ -# This is the local configuration file for GPy +# This is the local installation configuration file for GPy diff --git a/GPy/kern/_src/independent_outputs.py b/GPy/kern/_src/independent_outputs.py index f07db692..21958267 100644 --- a/GPy/kern/_src/independent_outputs.py +++ b/GPy/kern/_src/independent_outputs.py @@ -20,6 +20,8 @@ def index_to_slices(index): returns >>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]] """ + if len(index)==0: + return[] #contruct the return structure ind = np.asarray(index,dtype=np.int) diff --git a/GPy/kern/_src/periodic.py b/GPy/kern/_src/periodic.py index a8573a05..9f232ab0 100644 --- a/GPy/kern/_src/periodic.py +++ b/GPy/kern/_src/periodic.py @@ -101,6 +101,7 @@ class PeriodicExponential(Periodic): Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None] return(self.lengthscale/(2*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T)) + @silence_errors def update_gradients_full(self, dL_dK, X, X2=None): """derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)""" if X2 is None: X2 = X @@ -213,7 +214,7 @@ class PeriodicMatern32(Periodic): return(self.lengthscale**3/(12*np.sqrt(3)*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T)) - #@silence_errors + @silence_errors def update_gradients_full(self,dL_dK,X,X2): """derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)""" if X2 is None: X2 = X diff --git a/GPy/kern/_src/splitKern.py b/GPy/kern/_src/splitKern.py index dfaf5108..27e4f76b 100644 --- a/GPy/kern/_src/splitKern.py +++ b/GPy/kern/_src/splitKern.py @@ -20,6 +20,9 @@ class DiffGenomeKern(Kern): assert X2==None K = self.kern.K(X,X2) + if self.idx_p<=0 or self.idx_p>X.shape[0]/2: + return K + slices = index_to_slices(X[:,self.index_dim]) idx_start = slices[1][0].start idx_end = idx_start+self.idx_p @@ -33,6 +36,9 @@ class DiffGenomeKern(Kern): def Kdiag(self,X): Kdiag = self.kern.Kdiag(X) + if self.idx_p<=0 or self.idx_p>X.shape[0]/2: + return Kdiag + slices = index_to_slices(X[:,self.index_dim]) idx_start = slices[1][0].start idx_end = idx_start+self.idx_p @@ -42,6 +48,10 @@ class DiffGenomeKern(Kern): def update_gradients_full(self,dL_dK,X,X2=None): assert X2==None + if self.idx_p<=0 or self.idx_p>X.shape[0]/2: + self.kern.update_gradients_full(dL_dK, X) + return + slices = index_to_slices(X[:,self.index_dim]) idx_start = slices[1][0].start idx_end = idx_start+self.idx_p diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 9bcbfac1..6354f13d 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -37,19 +37,21 @@ class BayesianGPLVM(SparseGP): self.init = init if X_variance is None: + self.logger.info("initializing latent space variance ~ uniform(0,.1)") X_variance = np.random.uniform(0,.1,X.shape) if Z is None: + self.logger.info("initializing inducing inputs") Z = np.random.permutation(X.copy())[:num_inducing] assert Z.shape[1] == X.shape[1] if kernel is None: + self.logger.info("initializing kernel RBF") kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) # + kern.white(input_dim) if likelihood is None: likelihood = Gaussian() - self.variational_prior = NormalPrior() X = NormalPosterior(X, X_variance) @@ -65,6 +67,7 @@ class BayesianGPLVM(SparseGP): inference_method = VarDTC() SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs) + self.logger.info("Adding X as parameter") self.add_parameter(self.X, index=0) def set_X_gradients(self, X, X_grad): @@ -75,7 +78,7 @@ class BayesianGPLVM(SparseGP): if isinstance(self.inference_method, VarDTC_GPU): update_gradients(self) return - + super(BayesianGPLVM, self).parameters_changed() self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) @@ -87,7 +90,7 @@ class BayesianGPLVM(SparseGP): def plot_latent(self, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40, fignum=None, plot_inducing=True, legend=True, - plot_limits=None, + plot_limits=None, aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}): import sys assert "matplotlib" in sys.modules, "matplotlib package has not been imported." @@ -107,10 +110,10 @@ class BayesianGPLVM(SparseGP): """ N_test = Y.shape[0] input_dim = self.Z.shape[1] - + means = np.zeros((N_test, input_dim)) covars = np.zeros((N_test, input_dim)) - + dpsi0 = -0.5 * self.input_dim / self.likelihood.variance dpsi2 = self.grad_dict['dL_dpsi2'][0][None, :, :] # TODO: this may change if we ignore het. likelihoods V = Y/self.likelihood.variance @@ -125,7 +128,7 @@ class BayesianGPLVM(SparseGP): dpsi1 = np.dot(self.posterior.woodbury_vector, V.T) #start = np.zeros(self.input_dim * 2) - + from scipy.optimize import minimize @@ -139,7 +142,7 @@ class BayesianGPLVM(SparseGP): X = NormalPosterior(means, covars) - return X + return X def dmu_dX(self, Xnew): """ @@ -169,7 +172,7 @@ class BayesianGPLVM(SparseGP): from ..plotting.matplot_dep import dim_reduction_plots return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs) - + def latent_cost_and_grad(mu_S, input_dim, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2): """ @@ -187,10 +190,10 @@ def latent_cost_and_grad(mu_S, input_dim, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2) psi2 = kern.psi2(Z, X) lik = dL_dpsi0 * psi0.sum() + np.einsum('ij,kj->...', dL_dpsi1, psi1) + np.einsum('ijk,lkj->...', dL_dpsi2, psi2) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S) - - dLdmu, dLdS = kern.gradients_qX_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, X) + + dLdmu, dLdS = kern.gradients_qX_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, X) dmu = dLdmu - mu # dS = S0 + S1 + S2 -0.5 + .5/S dlnS = S * (dLdS - 0.5) + .5 - + return -lik, -np.hstack((dmu.flatten(), dlnS.flatten())) diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index 8f3e55b0..7926410e 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -8,7 +8,7 @@ from base_plots import gpplot, x_frame1D, x_frame2D from ...util.misc import param_to_array from ...models.gp_coregionalized_regression import GPCoregionalizedRegression from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression - +from scipy import sparse def plot_fit(model, plot_limits=None, which_data_rows='all', which_data_ycols='all', fixed_inputs=[], @@ -61,11 +61,14 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs(): X = model.X.mean - X_variance = param_to_array(model.X.variance) + X_variance = model.X.variance else: X = model.X - X, Y = param_to_array(X, model.Y) - if hasattr(model, 'Z'): Z = param_to_array(model.Z) + #X, Y = param_to_array(X, model.Y) + Y = model.Y + if sparse.issparse(Y): Y = Y.todense().view(np.ndarray) + + if hasattr(model, 'Z'): Z = model.Z #work out what the inputs are for plotting (1D or 2D) fixed_dims = np.array([i for i,v in fixed_inputs]) diff --git a/GPy/testing/parameterized_tests.py b/GPy/testing/parameterized_tests.py index fa15d66d..c647c6eb 100644 --- a/GPy/testing/parameterized_tests.py +++ b/GPy/testing/parameterized_tests.py @@ -8,6 +8,7 @@ import GPy import numpy as np from GPy.core.parameterization.parameter_core import HierarchyError from GPy.core.parameterization.observable_array import ObsAr +from GPy.core.parameterization.transformations import NegativeLogexp class ArrayCoreTest(unittest.TestCase): def setUp(self): @@ -38,10 +39,25 @@ class ParameterizedTest(unittest.TestCase): self.test1.kern = self.rbf+self.white self.test1.add_parameter(self.test1.kern) self.test1.add_parameter(self.param, 0) + # print self.test1: + #============================================================================= + # test_model. | Value | Constraint | Prior | Tied to + # param | (25L, 2L) | {0.0,1.0} | | + # add.rbf.variance | 1.0 | 0.0,1.0 +ve | | + # add.rbf.lengthscale | 1.0 | 0.0,1.0 +ve | | + # add.white.variance | 1.0 | 0.0,1.0 +ve | | + #============================================================================= x = np.linspace(-2,6,4)[:,None] y = np.sin(x) self.testmodel = GPy.models.GPRegression(x,y) + # print self.testmodel: + #============================================================================= + # GP_regression. | Value | Constraint | Prior | Tied to + # rbf.variance | 1.0 | +ve | | + # rbf.lengthscale | 1.0 | +ve | | + # Gaussian_noise.variance | 1.0 | +ve | | + #============================================================================= def test_add_parameter(self): self.assertEquals(self.rbf._parent_index_, 0) @@ -142,8 +158,13 @@ class ParameterizedTest(unittest.TestCase): self.testmodel.randomize() self.assertEqual(val, self.testmodel.kern.lengthscale) - - + def test_add_parameter_in_hierarchy(self): + from GPy.core import Param + self.test1.kern.rbf.add_parameter(Param("NEW", np.random.rand(2), NegativeLogexp()), 1) + self.assertListEqual(self.test1.constraints[NegativeLogexp()].tolist(), range(self.param.size+1, self.param.size+1 + 2)) + self.assertListEqual(self.test1.constraints[GPy.transformations.Logistic(0,1)].tolist(), range(self.param.size)) + self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp(0,1)].tolist(), np.r_[50, 53:55].tolist()) + def test_regular_expression_misc(self): self.testmodel.kern.lengthscale.fix() val = float(self.testmodel.kern.lengthscale) @@ -174,4 +195,4 @@ class ParameterizedTest(unittest.TestCase): if __name__ == "__main__": #import sys;sys.argv = ['', 'Test.test_add_parameter'] - unittest.main() \ No newline at end of file + unittest.main() diff --git a/GPy/util/caching.py b/GPy/util/caching.py index d54b3a0b..bce51067 100644 --- a/GPy/util/caching.py +++ b/GPy/util/caching.py @@ -18,13 +18,12 @@ class Cacher(object): self.operation = operation self.order = collections.deque() self.cached_inputs = {} # point from cache_ids to a list of [ind_ids], which where used in cache cache_id - self.logger = logging.getLogger("cache") #======================================================================= # point from each ind_id to [ref(obj), cache_ids] # 0: a weak reference to the object itself # 1: the cache_ids in which this ind_id is used (len will be how many times we have seen this ind_id) - self.cached_input_ids = {} + self.cached_input_ids = {} #======================================================================= self.cached_outputs = {} # point from cache_ids to outputs @@ -36,23 +35,18 @@ class Cacher(object): def combine_inputs(self, args, kw): "Combines the args and kw in a unique way, such that ordering of kwargs does not lead to recompute" - self.logger.debug("combining args and kw") return args + tuple(c[1] for c in sorted(kw.items(), key=lambda x: x[0])) def prepare_cache_id(self, combined_args_kw, ignore_args): "get the cacheid (conc. string of argument self.ids in order) ignoring ignore_args" cache_id = "".join(self.id(a) for i, a in enumerate(combined_args_kw) if i not in ignore_args) - self.logger.debug("cache_id={} was created".format(cache_id)) return cache_id def ensure_cache_length(self, cache_id): "Ensures the cache is within its limits and has one place free" - self.logger.debug("cache length gets ensured") if len(self.order) == self.limit: - self.logger.debug("cache limit of l={} was reached".format(self.limit)) # we have reached the limit, so lets release one element cache_id = self.order.popleft() - self.logger.debug("cach_id '{}' gets removed".format(cache_id)) combined_args_kw = self.cached_inputs[cache_id] for ind in combined_args_kw: if ind is not None: @@ -66,7 +60,6 @@ class Cacher(object): else: cache_ids.remove(cache_id) self.cached_input_ids[ind_id] = [ref, cache_ids] - self.logger.debug("removing caches") del self.cached_outputs[cache_id] del self.inputs_changed[cache_id] del self.cached_inputs[cache_id] @@ -81,10 +74,8 @@ class Cacher(object): if a is not None: ind_id = self.id(a) v = self.cached_input_ids.get(ind_id, [weakref.ref(a), []]) - self.logger.debug("cache_id '{}' gets stored".format(cache_id)) v[1].append(cache_id) if len(v[1]) == 1: - self.logger.debug("adding observer to object {}".format(repr(a))) a.add_observer(self, self.on_cache_changed) self.cached_input_ids[ind_id] = v @@ -108,28 +99,21 @@ class Cacher(object): cache_id = self.prepare_cache_id(inputs, self.ignore_args) # 2: if anything is not cachable, we will just return the operation, without caching if reduce(lambda a, b: a or (not (isinstance(b, Observable) or b is None)), inputs, False): - self.logger.info("some inputs are not observable: returning without caching") - self.logger.debug(str(map(lambda x: isinstance(x, Observable) or x is None, inputs))) - self.logger.debug(str(map(repr, inputs))) return self.operation(*args, **kw) # 3&4: check whether this cache_id has been cached, then has it changed? try: if(self.inputs_changed[cache_id]): - self.logger.debug("{} already seen, but inputs changed. refreshing cacher".format(cache_id)) # 4: This happens, when elements have changed for this cache self.id self.inputs_changed[cache_id] = False self.cached_outputs[cache_id] = self.operation(*args, **kw) except KeyError: - self.logger.info("{} never seen, creating cache entry".format(cache_id)) # 3: This is when we never saw this chache_id: self.ensure_cache_length(cache_id) self.add_to_cache(cache_id, inputs, self.operation(*args, **kw)) except: - self.logger.error("an error occurred while trying to run caching for {}, resetting".format(cache_id)) self.reset() raise # 5: We have seen this cache_id and it is cached: - self.logger.info("returning cache {}".format(cache_id)) return self.cached_outputs[cache_id] def on_cache_changed(self, direct, which=None): @@ -143,7 +127,6 @@ class Cacher(object): ind_id = self.id(what) _, cache_ids = self.cached_input_ids.get(ind_id, [None, []]) for cache_id in cache_ids: - self.logger.info("callback from {} changed inputs from {}".format(ind_id, self.inputs_changed[cache_id])) self.inputs_changed[cache_id] = True def reset(self): diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index 44c9a930..36c1a481 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -51,7 +51,7 @@ if not (on_rtd): json_data=open(path).read() football_dict = json.loads(json_data) - + def prompt_user(prompt): """Ask user for agreeing to data set licenses.""" @@ -128,14 +128,14 @@ def download_url(url, store_directory, save_name = None, messages = True, suffix f.write(buff) sys.stdout.write(" "*(len(status)) + "\r") if file_size: - status = r"[{perc: <{ll}}] {dl:7.3f}/{full:.3f}MB".format(dl=file_size_dl/(1048576.), - full=file_size/(1048576.), ll=line_length, + status = r"[{perc: <{ll}}] {dl:7.3f}/{full:.3f}MB".format(dl=file_size_dl/(1048576.), + full=file_size/(1048576.), ll=line_length, perc="="*int(line_length*float(file_size_dl)/file_size)) else: - status = r"[{perc: <{ll}}] {dl:7.3f}MB".format(dl=file_size_dl/(1048576.), - ll=line_length, + status = r"[{perc: <{ll}}] {dl:7.3f}MB".format(dl=file_size_dl/(1048576.), + ll=line_length, perc="."*int(line_length*float(file_size_dl/(10*1048576.)))) - + sys.stdout.write(status) sys.stdout.flush() sys.stdout.write(" "*(len(status)) + "\r") @@ -320,7 +320,7 @@ def della_gatta_TRP63_gene_expression(data_set='della_gatta', gene_number=None): Y = Y[:, None] return data_details_return({'X': X, 'Y': Y, 'gene_number' : gene_number}, data_set) - + def football_data(season='1314', data_set='football_data'): """Football data from English games since 1993. This downloads data from football-data.co.uk for the given season. """ @@ -385,7 +385,7 @@ def spellman_yeast(data_set='spellman_yeast'): Y = read_csv(filename, header=0, index_col=0, sep='\t') return data_details_return({'Y': Y}, data_set) -def spellman_yeast_cdc(data_set='spellman_yeast'): +def spellman_yeast_cdc15(data_set='spellman_yeast'): if not data_available(data_set): download_data(data_set) from pandas import read_csv @@ -405,12 +405,13 @@ def lee_yeast_ChIP(data_set='lee_yeast_ChIP'): import zipfile dir_path = os.path.join(data_path, data_set) filename = os.path.join(dir_path, 'binding_by_gene.tsv') - X = read_csv(filename, header=1, index_col=0, sep='\t') - transcription_factors = [col for col in X.columns if col[:7] != 'Unnamed'] - annotations = X[['Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3']] - X = X[transcription_factors] - return data_details_return({'annotations' : annotations, 'X' : X, 'transcription_factors': transcription_factors}, data_set) - + S = read_csv(filename, header=1, index_col=0, sep='\t') + transcription_factors = [col for col in S.columns if col[:7] != 'Unnamed'] + annotations = S[['Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3']] + S = S[transcription_factors] + return data_details_return({'annotations' : annotations, 'Y' : S, 'transcription_factors': transcription_factors}, data_set) + + def fruitfly_tomancak(data_set='fruitfly_tomancak', gene_number=None): if not data_available(data_set): @@ -424,7 +425,7 @@ def fruitfly_tomancak(data_set='fruitfly_tomancak', gene_number=None): xt = np.linspace(0, num_time-1, num_time) xr = np.linspace(0, num_repeats-1, num_repeats) xtime, xrepeat = np.meshgrid(xt, xr) - X = np.vstack((xtime.flatten(), xrepeat.flatten())).T + X = np.vstack((xtime.flatten(), xrepeat.flatten())).T return data_details_return({'X': X, 'Y': Y, 'gene_number' : gene_number}, data_set) def drosophila_protein(data_set='drosophila_protein'): @@ -466,7 +467,7 @@ def google_trends(query_terms=['big data', 'machine learning', 'data science'], """Data downloaded from Google trends for given query terms. Warning, if you use this function multiple times in a row you get blocked due to terms of service violations. The function will cache the result of your query, if you wish to refresh an old query set refresh_data to True. The function is inspired by this notebook: http://nbviewer.ipython.org/github/sahuguet/notebooks/blob/master/GoogleTrends%20meet%20Notebook.ipynb""" query_terms.sort() import pandas - + # Create directory name for data dir_path = os.path.join(data_path,'google_trends') if not os.path.isdir(dir_path): @@ -513,9 +514,9 @@ def google_trends(query_terms=['big data', 'machine learning', 'data science'], X = np.asarray([(row, i) for i in range(terms) for row in df.index]) Y = np.asarray([[df.ix[row][query_terms[i]]] for i in range(terms) for row in df.index ]) output_info = columns[1:] - + return data_details_return({'data frame' : df, 'X': X, 'Y': Y, 'query_terms': output_info, 'info': "Data downloaded from google trends with query terms: " + ', '.join(output_info) + '.'}, data_set) - + # The data sets def oil(data_set='three_phase_oil_flow'): """The three phase oil data from Bishop and James (1993).""" @@ -646,7 +647,7 @@ def decampos_digits(data_set='decampos_characters', which_digits=[0,1,2,3,4,5,6, lbls = np.array([[l]*num_samples for l in which_digits]).reshape(Y.shape[0], 1) str_lbls = np.array([[str(l)]*num_samples for l in which_digits]) return data_details_return({'Y': Y, 'lbls': lbls, 'str_lbls' : str_lbls, 'info': 'Digits data set from the de Campos characters data'}, data_set) - + def ripley_synth(data_set='ripley_prnn_data'): if not data_available(data_set): download_data(data_set) @@ -673,7 +674,7 @@ def mauna_loa(data_set='mauna_loa', num_train=545, refresh_data=False): Y = allY[:num_train, 0:1] Ytest = allY[num_train:, 0:1] return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Mauna Loa data with " + str(num_train) + " values used as training points."}, data_set) - + def boxjenkins_airline(data_set='boxjenkins_airline', num_train=96): path = os.path.join(data_path, data_set) @@ -685,7 +686,7 @@ def boxjenkins_airline(data_set='boxjenkins_airline', num_train=96): Xtest = data[num_train:, 0:1] Ytest = data[num_train:, 1:2] return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Montly airline passenger data from Box & Jenkins 1976."}, data_set) - + def osu_run1(data_set='osu_run1', sample_every=4): path = os.path.join(data_path, data_set) @@ -724,7 +725,7 @@ def hapmap3(data_set='hapmap3'): \ -1, iff SNPij==(B2,B2) The SNP data and the meta information (such as iid, sex and phenotype) are - stored in the dataframe datadf, index is the Individual ID, + stored in the dataframe datadf, index is the Individual ID, with following columns for metainfo: * family_id -> Family ID @@ -797,7 +798,7 @@ def hapmap3(data_set='hapmap3'): status=write_status('unpacking...', curr, status) os.remove(filepath) status=write_status('reading .ped...', 25, status) - # Preprocess data: + # Preprocess data: snpstrnp = np.loadtxt(unpacked_files[0], dtype=str) status=write_status('reading .map...', 33, status) mapnp = np.loadtxt(unpacked_files[1], dtype=str) @@ -958,7 +959,7 @@ def olivetti_glasses(data_set='olivetti_glasses', num_training=200, seed=default Y = y[index[:num_training],:] Ytest = y[index[num_training:]] return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'seed' : seed, 'info': "ORL Faces with labels identifiying who is wearing glasses and who isn't. Data is randomly partitioned according to given seed. Presence or absence of glasses was labelled by James Hensman."}, 'olivetti_faces') - + def olivetti_faces(data_set='olivetti_faces'): path = os.path.join(data_path, data_set) if not data_available(data_set): @@ -971,7 +972,8 @@ def olivetti_faces(data_set='olivetti_faces'): for subject in range(40): for image in range(10): image_path = os.path.join(path, 'orl_faces', 's'+str(subject+1), str(image+1) + '.pgm') - Y.append(GPy.util.netpbmfile.imread(image_path).flatten()) + from GPy.util import netpbmfile + Y.append(netpbmfile.imread(image_path).flatten()) lbls.append(subject) Y = np.asarray(Y) lbls = np.asarray(lbls)[:, None] @@ -1194,7 +1196,7 @@ def cifar10_patches(data_set='cifar-10'): for x in range(0,32-5,5): for y in range(0,32-5,5): patches = np.concatenate((patches, images[:,x:x+5,y:y+5,:]), axis=0) - patches = patches.reshape((patches.shape[0],-1)) + patches = patches.reshape((patches.shape[0],-1)) return data_details_return({'Y': patches, "info" : "32x32 pixel patches extracted from the CIFAR-10 data by Boris Babenko to demonstrate k-means features."}, data_set) def cmu_mocap_49_balance(data_set='cmu_mocap'): diff --git a/GPy/util/initialization.py b/GPy/util/initialization.py index a90ea8f4..908da023 100644 --- a/GPy/util/initialization.py +++ b/GPy/util/initialization.py @@ -16,8 +16,8 @@ def initialize_latent(init, input_dim, Y): var = p.fracs[:input_dim] else: var = Xr.var(0) - + Xr -= Xr.mean(0) - Xr /= Xr.var(0) - + Xr /= Xr.std(0) + return Xr, var/var.max() diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index 661a2b47..bb381665 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -16,13 +16,17 @@ import warnings import os from config import * -if np.all(np.float64((scipy.__version__).split('.')[:2]) >= np.array([0, 12])): +_scipyversion = np.float64((scipy.__version__).split('.')[:2]) +_fix_dpotri_scipy_bug = True +if np.all(_scipyversion >= np.array([0, 14])): + from scipy.linalg import lapack + _fix_dpotri_scipy_bug = False +elif np.all(_scipyversion >= np.array([0, 12])): #import scipy.linalg.lapack.clapack as lapack from scipy.linalg import lapack else: from scipy.linalg.lapack import flapack as lapack - if config.getboolean('anaconda', 'installed') and config.getboolean('anaconda', 'MKL'): try: anaconda_path = str(config.get('anaconda', 'location')) @@ -30,6 +34,7 @@ if config.getboolean('anaconda', 'installed') and config.getboolean('anaconda', dsyrk = mkl_rt.dsyrk dsyr = mkl_rt.dsyr _blas_available = True + print 'anaconda installed and mkl is loaded' except: _blas_available = False else: @@ -141,16 +146,23 @@ def dpotrs(A, B, lower=1): def dpotri(A, lower=1): """ Wrapper for lapack dpotri function - + + DPOTRI - compute the inverse of a real symmetric positive + definite matrix A using the Cholesky factorization A = + U**T*U or A = L*L**T computed by DPOTRF + :param A: Matrix A :param lower: is matrix lower (true) or upper (false) :returns: A inverse """ - assert lower==1, "scipy linalg behaviour is very weird. please use lower, fortran ordered arrays" - + if _fix_dpotri_scipy_bug: + assert lower==1, "scipy linalg behaviour is very weird. please use lower, fortran ordered arrays" + lower = 0 + A = force_F_ordered(A) - R, info = lapack.dpotri(A, lower=0) + R, info = lapack.dpotri(A, lower=lower) #needs to be zero here, seems to be a scipy bug + symmetrify(R) return R, info @@ -217,7 +229,7 @@ def pdinv(A, *args): L = jitchol(A, *args) logdet = 2.*np.sum(np.log(np.diag(L))) Li = dtrtri(L) - Ai, _ = lapack.dpotri(L) + Ai, _ = dpotri(L, lower=1) # Ai = np.tril(Ai) + np.tril(Ai,-1).T symmetrify(Ai) diff --git a/GPy/util/netpbmfile.py b/GPy/util/netpbmfile.py new file mode 100644 index 00000000..030bd574 --- /dev/null +++ b/GPy/util/netpbmfile.py @@ -0,0 +1,331 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# netpbmfile.py + +# Copyright (c) 2011-2013, Christoph Gohlke +# Copyright (c) 2011-2013, The Regents of the University of California +# Produced at the Laboratory for Fluorescence Dynamics. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the copyright holders nor the names of any +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +"""Read and write image data from respectively to Netpbm files. + +This implementation follows the Netpbm format specifications at +http://netpbm.sourceforge.net/doc/. No gamma correction is performed. + +The following image formats are supported: PBM (bi-level), PGM (grayscale), +PPM (color), PAM (arbitrary), XV thumbnail (RGB332, read-only). + +:Author: + `Christoph Gohlke `_ + +:Organization: + Laboratory for Fluorescence Dynamics, University of California, Irvine + +:Version: 2013.01.18 + +Requirements +------------ +* `CPython 2.7, 3.2 or 3.3 `_ +* `Numpy 1.7 `_ +* `Matplotlib 1.2 `_ (optional for plotting) + +Examples +-------- +>>> im1 = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16) +>>> imsave('_tmp.pgm', im1) +>>> im2 = imread('_tmp.pgm') +>>> assert numpy.all(im1 == im2) + +""" + +from __future__ import division, print_function + +import sys +import re +import math +from copy import deepcopy + +import numpy + +__version__ = '2013.01.18' +__docformat__ = 'restructuredtext en' +__all__ = ['imread', 'imsave', 'NetpbmFile'] + + +def imread(filename, *args, **kwargs): + """Return image data from Netpbm file as numpy array. + + `args` and `kwargs` are arguments to NetpbmFile.asarray(). + + Examples + -------- + >>> image = imread('_tmp.pgm') + + """ + try: + netpbm = NetpbmFile(filename) + image = netpbm.asarray() + finally: + netpbm.close() + return image + + +def imsave(filename, data, maxval=None, pam=False): + """Write image data to Netpbm file. + + Examples + -------- + >>> image = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16) + >>> imsave('_tmp.pgm', image) + + """ + try: + netpbm = NetpbmFile(data, maxval=maxval) + netpbm.write(filename, pam=pam) + finally: + netpbm.close() + + +class NetpbmFile(object): + """Read and write Netpbm PAM, PBM, PGM, PPM, files.""" + + _types = {b'P1': b'BLACKANDWHITE', b'P2': b'GRAYSCALE', b'P3': b'RGB', + b'P4': b'BLACKANDWHITE', b'P5': b'GRAYSCALE', b'P6': b'RGB', + b'P7 332': b'RGB', b'P7': b'RGB_ALPHA'} + + def __init__(self, arg=None, **kwargs): + """Initialize instance from filename, open file, or numpy array.""" + for attr in ('header', 'magicnum', 'width', 'height', 'maxval', + 'depth', 'tupltypes', '_filename', '_fh', '_data'): + setattr(self, attr, None) + if arg is None: + self._fromdata([], **kwargs) + elif isinstance(arg, basestring): + self._fh = open(arg, 'rb') + self._filename = arg + self._fromfile(self._fh, **kwargs) + elif hasattr(arg, 'seek'): + self._fromfile(arg, **kwargs) + self._fh = arg + else: + self._fromdata(arg, **kwargs) + + def asarray(self, copy=True, cache=False, **kwargs): + """Return image data from file as numpy array.""" + data = self._data + if data is None: + data = self._read_data(self._fh, **kwargs) + if cache: + self._data = data + else: + return data + return deepcopy(data) if copy else data + + def write(self, arg, **kwargs): + """Write instance to file.""" + if hasattr(arg, 'seek'): + self._tofile(arg, **kwargs) + else: + with open(arg, 'wb') as fid: + self._tofile(fid, **kwargs) + + def close(self): + """Close open file. Future asarray calls might fail.""" + if self._filename and self._fh: + self._fh.close() + self._fh = None + + def __del__(self): + self.close() + + def _fromfile(self, fh): + """Initialize instance from open file.""" + fh.seek(0) + data = fh.read(4096) + if (len(data) < 7) or not (b'0' < data[1:2] < b'8'): + raise ValueError("Not a Netpbm file:\n%s" % data[:32]) + try: + self._read_pam_header(data) + except Exception: + try: + self._read_pnm_header(data) + except Exception: + raise ValueError("Not a Netpbm file:\n%s" % data[:32]) + + def _read_pam_header(self, data): + """Read PAM header and initialize instance.""" + regroups = re.search( + b"(^P7[\n\r]+(?:(?:[\n\r]+)|(?:#.*)|" + b"(HEIGHT\s+\d+)|(WIDTH\s+\d+)|(DEPTH\s+\d+)|(MAXVAL\s+\d+)|" + b"(?:TUPLTYPE\s+\w+))*ENDHDR\n)", data).groups() + self.header = regroups[0] + self.magicnum = b'P7' + for group in regroups[1:]: + key, value = group.split() + setattr(self, unicode(key).lower(), int(value)) + matches = re.findall(b"(TUPLTYPE\s+\w+)", self.header) + self.tupltypes = [s.split(None, 1)[1] for s in matches] + + def _read_pnm_header(self, data): + """Read PNM header and initialize instance.""" + bpm = data[1:2] in b"14" + regroups = re.search(b"".join(( + b"(^(P[123456]|P7 332)\s+(?:#.*[\r\n])*", + b"\s*(\d+)\s+(?:#.*[\r\n])*", + b"\s*(\d+)\s+(?:#.*[\r\n])*" * (not bpm), + b"\s*(\d+)\s(?:\s*#.*[\r\n]\s)*)")), data).groups() + (1, ) * bpm + self.header = regroups[0] + self.magicnum = regroups[1] + self.width = int(regroups[2]) + self.height = int(regroups[3]) + self.maxval = int(regroups[4]) + self.depth = 3 if self.magicnum in b"P3P6P7 332" else 1 + self.tupltypes = [self._types[self.magicnum]] + + def _read_data(self, fh, byteorder='>'): + """Return image data from open file as numpy array.""" + fh.seek(len(self.header)) + data = fh.read() + dtype = 'u1' if self.maxval < 256 else byteorder + 'u2' + depth = 1 if self.magicnum == b"P7 332" else self.depth + shape = [-1, self.height, self.width, depth] + size = numpy.prod(shape[1:]) + if self.magicnum in b"P1P2P3": + data = numpy.array(data.split(None, size)[:size], dtype) + data = data.reshape(shape) + elif self.maxval == 1: + shape[2] = int(math.ceil(self.width / 8)) + data = numpy.frombuffer(data, dtype).reshape(shape) + data = numpy.unpackbits(data, axis=-2)[:, :, :self.width, :] + else: + data = numpy.frombuffer(data, dtype) + data = data[:size * (data.size // size)].reshape(shape) + if data.shape[0] < 2: + data = data.reshape(data.shape[1:]) + if data.shape[-1] < 2: + data = data.reshape(data.shape[:-1]) + if self.magicnum == b"P7 332": + rgb332 = numpy.array(list(numpy.ndindex(8, 8, 4)), numpy.uint8) + rgb332 *= [36, 36, 85] + data = numpy.take(rgb332, data, axis=0) + return data + + def _fromdata(self, data, maxval=None): + """Initialize instance from numpy array.""" + data = numpy.array(data, ndmin=2, copy=True) + if data.dtype.kind not in "uib": + raise ValueError("not an integer type: %s" % data.dtype) + if data.dtype.kind == 'i' and numpy.min(data) < 0: + raise ValueError("data out of range: %i" % numpy.min(data)) + if maxval is None: + maxval = numpy.max(data) + maxval = 255 if maxval < 256 else 65535 + if maxval < 0 or maxval > 65535: + raise ValueError("data out of range: %i" % maxval) + data = data.astype('u1' if maxval < 256 else '>u2') + self._data = data + if data.ndim > 2 and data.shape[-1] in (3, 4): + self.depth = data.shape[-1] + self.width = data.shape[-2] + self.height = data.shape[-3] + self.magicnum = b'P7' if self.depth == 4 else b'P6' + else: + self.depth = 1 + self.width = data.shape[-1] + self.height = data.shape[-2] + self.magicnum = b'P5' if maxval > 1 else b'P4' + self.maxval = maxval + self.tupltypes = [self._types[self.magicnum]] + self.header = self._header() + + def _tofile(self, fh, pam=False): + """Write Netbm file.""" + fh.seek(0) + fh.write(self._header(pam)) + data = self.asarray(copy=False) + if self.maxval == 1: + data = numpy.packbits(data, axis=-1) + data.tofile(fh) + + def _header(self, pam=False): + """Return file header as byte string.""" + if pam or self.magicnum == b'P7': + header = "\n".join(( + "P7", + "HEIGHT %i" % self.height, + "WIDTH %i" % self.width, + "DEPTH %i" % self.depth, + "MAXVAL %i" % self.maxval, + "\n".join("TUPLTYPE %s" % unicode(i) for i in self.tupltypes), + "ENDHDR\n")) + elif self.maxval == 1: + header = "P4 %i %i\n" % (self.width, self.height) + elif self.depth == 1: + header = "P5 %i %i %i\n" % (self.width, self.height, self.maxval) + else: + header = "P6 %i %i %i\n" % (self.width, self.height, self.maxval) + if sys.version_info[0] > 2: + header = bytes(header, 'ascii') + return header + + def __str__(self): + """Return information about instance.""" + return unicode(self.header) + + +if sys.version_info[0] > 2: + basestring = str + unicode = lambda x: str(x, 'ascii') + +if __name__ == "__main__": + # Show images specified on command line or all images in current directory + from glob import glob + from matplotlib import pyplot + files = sys.argv[1:] if len(sys.argv) > 1 else glob('*.p*m') + for fname in files: + try: + pam = NetpbmFile(fname) + img = pam.asarray(copy=False) + if False: + pam.write('_tmp.pgm.out', pam=True) + img2 = imread('_tmp.pgm.out') + assert numpy.all(img == img2) + imsave('_tmp.pgm.out', img) + img2 = imread('_tmp.pgm.out') + assert numpy.all(img == img2) + pam.close() + except ValueError as e: + print(fname, e) + continue + _shape = img.shape + if img.ndim > 3 or (img.ndim > 2 and img.shape[-1] not in (3, 4)): + img = img[0] + cmap = 'gray' if pam.maxval > 1 else 'binary' + pyplot.imshow(img, cmap, interpolation='nearest') + pyplot.title("%s %s %s %s" % (fname, unicode(pam.magicnum), + _shape, img.dtype)) + pyplot.show() diff --git a/GPy/util/subarray_and_sorting.py b/GPy/util/subarray_and_sorting.py index 33901851..0966084c 100644 --- a/GPy/util/subarray_and_sorting.py +++ b/GPy/util/subarray_and_sorting.py @@ -16,13 +16,13 @@ def common_subarrays(X, axis=0): for the subarray in X, where index is the index to the remaining axis. :param :class:`np.ndarray` X: 2d array to check for common subarrays in - :param int axis: axis to apply subarray detection over. - When the index is 0, compare rows -- columns, otherwise. + :param int axis: axis to apply subarray detection over. + When the index is 0, compare rows -- columns, otherwise. Examples: ========= - In a 2d array: + In a 2d array: >>> import numpy as np >>> X = np.zeros((3,6), dtype=bool) >>> X[[1,1,1],[0,4,5]] = 1; X[1:,[2,3]] = 1 @@ -48,14 +48,10 @@ def common_subarrays(X, axis=0): assert X.ndim == 2 and axis in (0,1), "Only implemented for 2D arrays" subarrays = defaultdict(list) cnt = count() - logger = logging.getLogger("common_subarrays") def accumulate(x, s, c): - logger.debug("creating tuple") t = tuple(x) - logger.debug("tuple done") col = c.next() iadd(s[t], [col]) - logger.debug("added col {}".format(col)) return None if axis == 0: [accumulate(x, subarrays, cnt) for x in X] else: [accumulate(x, subarrays, cnt) for x in X.T] @@ -63,4 +59,4 @@ def common_subarrays(X, axis=0): if __name__ == '__main__': import doctest - doctest.testmod() \ No newline at end of file + doctest.testmod() diff --git a/GPy/util/univariate_Gaussian.py b/GPy/util/univariate_Gaussian.py index 702ab25c..b5472e0a 100644 --- a/GPy/util/univariate_Gaussian.py +++ b/GPy/util/univariate_Gaussian.py @@ -40,6 +40,37 @@ def std_norm_cdf(x): weave.inline(code, arg_names=['x', 'cdf_x', 'N'], support_code=support_code) return cdf_x +def std_norm_cdf_np(x): + """ + Cumulative standard Gaussian distribution + Based on Abramowitz, M. and Stegun, I. (1970) + Around 3 times slower when x is a scalar otherwise quite a lot slower + """ + x_shape = np.asarray(x).shape + + if len(x_shape) == 0 or x_shape[0] == 1: + sign = np.sign(x) + x *= sign + x /= np.sqrt(2.) + t = 1.0/(1.0 + 0.3275911*x) + erf = 1. - np.exp(-x**2)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429))))) + cdf_x = 0.5*(1.0 + sign*erf) + return cdf_x + else: + x = np.atleast_1d(x).copy() + cdf_x = np.zeros_like(x) + sign = np.ones_like(x) + neg_x_ind = x<0 + sign[neg_x_ind] = -1.0 + x[neg_x_ind] = -x[neg_x_ind] + x /= np.sqrt(2.) + t = 1.0/(1.0 + 0.3275911*x) + erf = 1. - np.exp(-x**2)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429))))) + cdf_x = 0.5*(1.0 + sign*erf) + cdf_x = cdf_x.reshape(x_shape) + return cdf_x + + def inv_std_norm_cdf(x): """ Inverse cumulative standard Gaussian distribution