Merge branch 'devel' of https://github.com/SheffieldML/GPy into devel

2026-05-15 06:52:39 +02:00 · 2014-07-24 20:55:23 +01:00 · 2014-07-24 20:55:23 +01:00 · 1c9eb270bf
commit 1c9eb270bf
parent ec5e9443ce 8c80fb9c52
21 changed files with 693 additions and 279 deletions
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -12,6 +12,10 @@ from .. import likelihoods
 from ..likelihoods.gaussian import Gaussian
 from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation, LatentFunctionInference
 from parameterization.variational import VariationalPosterior
+from scipy.sparse.base import issparse
+
+import logging
+logger = logging.getLogger("GP")

 class GP(Model):
    """
@ -34,12 +38,14 @@ class GP(Model):
        assert X.ndim == 2
        if isinstance(X, (ObsAr, VariationalPosterior)):
            self.X = X.copy()
-        else: self.X = ObsAr(X.copy())
+        else: self.X = ObsAr(X)

        self.num_data, self.input_dim = self.X.shape

        assert Y.ndim == 2
-        self.Y = ObsAr(Y.copy())
+        logger.info("initializing Y")
+        if issparse(Y): self.Y = Y
+        else: self.Y = ObsAr(Y)
        assert Y.shape[0] == self.num_data
        _, self.output_dim = self.Y.shape

@ -54,6 +60,7 @@ class GP(Model):
        self.likelihood = likelihood

        #find a sensible inference method
+        logger.info("initializing inference method")
        if inference_method is None:
            if isinstance(likelihood, likelihoods.Gaussian) or isinstance(likelihood, likelihoods.MixedNoise):
                inference_method = exact_gaussian_inference.ExactGaussianInference()
@ -62,6 +69,7 @@ class GP(Model):
                print "defaulting to ", inference_method, "for latent function inference"
        self.inference_method = inference_method

+        logger.info("adding kernel and likelihood as parameters")
        self.add_parameter(self.kern)
        self.add_parameter(self.likelihood)

--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -61,7 +61,7 @@ class Model(Parameterized):
        on the current machine.

        """
-        initial_parameters = self.optimizer_array
+        initial_parameters = self.optimizer_array.copy()

        if parallel:
            try:
@ -97,9 +97,9 @@ class Model(Parameterized):

        if len(self.optimization_runs):
            i = np.argmin([o.f_opt for o in self.optimization_runs])
-            self._set_params_transformed(self.optimization_runs[i].x_opt)
+            self.optimizer_array = self.optimization_runs[i].x_opt
        else:
-            self._set_params_transformed(initial_parameters)
+            self.optimizer_array = initial_parameters

    def ensure_default_constraints(self, warning=True):
        """
@ -225,12 +225,16 @@ class Model(Parameterized):
        if self.size == 0:
            raise RuntimeError, "Model without parameters cannot be optimized"

-        if optimizer is None:
-            optimizer = self.preferred_optimizer
-
        if start == None:
            start = self.optimizer_array

+        if optimizer is None:
+            optimizer = self.preferred_optimizer
+
+        if isinstance(optimizer, optimization.Optimizer):
+            opt = optimizer
+            opt.model = self
+        else:
            optimizer = optimization.get_optimizer(optimizer)
            opt = optimizer(start, model=self, **kwargs)

@ -249,7 +253,7 @@ class Model(Parameterized):
    def _checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
        """
        Check the gradient of the ,odel by comparing to a numerical
-        estimate.  If the verbose flag is passed, invividual
+        estimate.  If the verbose flag is passed, individual
        components are tested (and printed)

        :param verbose: If True, print a "full" checking of each parameter
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@ -751,8 +751,6 @@ class OptimizationHandlable(Indexable):
        Transform the gradients by multiplying the gradient factor for each
        constraint to it.
        """
-        if self.has_parent():
-            return g
        [np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
        if self._has_fixes(): return g[self._fixes_]
        return g
@ -793,7 +791,7 @@ class OptimizationHandlable(Indexable):
    #===========================================================================
    # Randomizeable
    #===========================================================================
-    def randomize(self, rand_gen=np.random.normal, loc=0, scale=1, *args, **kwargs):
+    def randomize(self, rand_gen=np.random.normal, *args, **kwargs):
        """
        Randomize the model.
        Make this draw from the prior if one exists, else draw from given random generator
@ -804,7 +802,7 @@ class OptimizationHandlable(Indexable):
        :param args, kwargs: will be passed through to random number generator
        """
        # first take care of all parameters (from N(0,1))
-        x = rand_gen(loc=loc, scale=scale, size=self._size_transformed(), *args, **kwargs)
+        x = rand_gen(size=self._size_transformed(), *args, **kwargs)
        # now draw from prior where possible
        [np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
        self.optimizer_array = x  # makes sure all of the tied parameters get the same init (since there's only one prior object...)
@ -835,6 +833,11 @@ class OptimizationHandlable(Indexable):
        1.) connect param_array of children to self.param_array
        2.) tell all children to propagate further
        """
+        if self.param_array.size != self.size:
+            self._param_array_ = np.empty(self.size, dtype=np.float64)
+        if self.gradient.size != self.size:
+            self._gradient_array_ = np.empty(self.size, dtype=np.float64)
+
        pi_old_size = 0
        for pi in self.parameters:
            pislice = slice(pi_old_size, pi_old_size + pi.size)
@ -848,6 +851,9 @@ class OptimizationHandlable(Indexable):
            pi._propagate_param_grad(parray[pislice], garray[pislice])
            pi_old_size += pi.size

+    def _connect_parameters(self):
+        pass
+
 class Parameterizable(OptimizationHandlable):
    """
    A parameterisable class.
@ -874,6 +880,9 @@ class Parameterizable(OptimizationHandlable):
        """
        Array representing the parameters of this class.
        There is only one copy of all parameters in memory, two during optimization.
+
+        !WARNING!: setting the parameter array MUST always be done in memory:
+        m.param_array[:] = m_copy.param_array
        """
        if self.__dict__.get('_param_array_', None) is None:
            self._param_array_ = np.empty(self.size, dtype=np.float64)
@ -986,6 +995,11 @@ class Parameterizable(OptimizationHandlable):
    # notification system
    #===========================================================================
    def _parameters_changed_notification(self, me, which=None):
+        """
+        In parameterizable we just need to make sure, that the next call to optimizer_array
+        will update the optimizer_array to the latest parameters
+        """
+        self._optimizer_copy_transformed = False # tells the optimizer array to update on next request
        self.parameters_changed()
    def _pass_through_notify_observers(self, me, which=None):
        self.notify_observers(which=which)
@ -1017,4 +1031,3 @@ class Parameterizable(OptimizationHandlable):
        updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer``
        """
        pass
-
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@ -8,11 +8,23 @@ from re import compile, _pattern_type
 from param import ParamConcatenation
 from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing

+import logging
+logger = logging.getLogger("parameters changed meta")
+
 class ParametersChangedMeta(type):
    def __call__(self, *args, **kw):
-        instance = super(ParametersChangedMeta, self).__call__(*args, **kw)
-        instance.parameters_changed()
-        return instance
+        self._in_init_ = True
+        #import ipdb;ipdb.set_trace()
+        self = super(ParametersChangedMeta, self).__call__(*args, **kw)
+        logger.debug("finished init")
+        self._in_init_ = False
+        logger.debug("connecting parameters")
+        self._highest_parent_._connect_parameters()
+        self._highest_parent_._notify_parent_change()
+        self._highest_parent_._connect_fixes()
+        logger.debug("calling parameters changed")
+        self.parameters_changed()
+        return self

 class Parameterized(Parameterizable):
    """
@ -64,14 +76,12 @@ class Parameterized(Parameterizable):
    #===========================================================================
    def __init__(self, name=None, parameters=[], *a, **kw):
        super(Parameterized, self).__init__(name=name, *a, **kw)
-        self._in_init_ = True
        self.size = sum(p.size for p in self.parameters)
        self.add_observer(self, self._parameters_changed_notification, -100)
        if not self._has_fixes():
            self._fixes_ = None
        self._param_slices_ = []
-        self._connect_parameters()
-        del self._in_init_
+        #self._connect_parameters()
        self.add_parameters(*parameters)

    def build_pydot(self, G=None):
@ -125,6 +135,9 @@ class Parameterized(Parameterizable):
                param._parent_.remove_parameter(param)
            # make sure the size is set
            if index is None:
+                start = sum(p.size for p in self.parameters)
+                self.constraints.shift_right(start, param.size)
+                self.priors.shift_right(start, param.size)
                self.constraints.update(param.constraints, self.size)
                self.priors.update(param.priors, self.size)
                self.parameters.append(param)
@ -143,14 +156,16 @@ class Parameterized(Parameterizable):
                parent.size += param.size
                parent = parent._parent_

+            if not self._in_init_:
                self._connect_parameters()
+                self._notify_parent_change()

                self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names)
                self._highest_parent_._notify_parent_change()
                self._highest_parent_._connect_fixes()

        else:
-            raise HierarchyError, """Parameter exists already and no copy made"""
+            raise HierarchyError, """Parameter exists already, try making a copy"""


    def add_parameters(self, *parameters):
@ -198,26 +213,28 @@ class Parameterized(Parameterizable):
            # no parameters for this class
            return
        if self.param_array.size != self.size:
-            self.param_array = np.empty(self.size, dtype=np.float64)
+            self._param_array_ = np.empty(self.size, dtype=np.float64)
        if self.gradient.size != self.size:
            self._gradient_array_ = np.empty(self.size, dtype=np.float64)

        old_size = 0
        self._param_slices_ = []
        for i, p in enumerate(self.parameters):
+            if not p.param_array.flags['C_CONTIGUOUS']:
+                raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS"
+
            p._parent_ = self
            p._parent_index_ = i

            pslice = slice(old_size, old_size + p.size)
+
            # first connect all children
            p._propagate_param_grad(self.param_array[pslice], self.gradient_full[pslice])
+
            # then connect children to self
            self.param_array[pslice] = p.param_array.flat  # , requirements=['C', 'W']).ravel(order='C')
            self.gradient_full[pslice] = p.gradient_full.flat  # , requirements=['C', 'W']).ravel(order='C')

-            if not p.param_array.flags['C_CONTIGUOUS']:
-                raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS"
-
            p.param_array.data = self.param_array[pslice].data
            p.gradient_full.data = self.gradient_full[pslice].data

--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@ -8,6 +8,9 @@ from ..inference.latent_function_inference import var_dtc
 from .. import likelihoods
 from parameterization.variational import VariationalPosterior

+import logging
+logger = logging.getLogger("sparse gp")
+
 class SparseGP(GP):
    """
    A general purpose Sparse GP model
@ -46,7 +49,7 @@ class SparseGP(GP):
        self.num_inducing = Z.shape[0]

        GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata)
-
+        logger.info("Adding Z as parameter")
        self.add_parameter(self.Z, index=0)

    def has_uncertain_inputs(self):
@ -57,10 +60,14 @@ class SparseGP(GP):
        self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
        if isinstance(self.X, VariationalPosterior):
            #gradients wrt kernel
-            dL_dKmm = self.grad_dict.pop('dL_dKmm')
+            dL_dKmm = self.grad_dict['dL_dKmm']
            self.kern.update_gradients_full(dL_dKmm, self.Z, None)
            target = self.kern.gradient.copy()
-            self.kern.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
+            self.kern.update_gradients_expectations(variational_posterior=self.X,
+                                                    Z=self.Z,
+                                                    dL_dpsi0=self.grad_dict['dL_dpsi0'],
+                                                    dL_dpsi1=self.grad_dict['dL_dpsi1'],
+                                                    dL_dpsi2=self.grad_dict['dL_dpsi2'])
            self.kern.gradient += target

            #gradients wrt Z
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@ -296,15 +296,16 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
    from GPy.models import BayesianGPLVM
    from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData

-    D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 7, 9
+    D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
    _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
    Y = Ylist[0]
    k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)

-    inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool)
-    Y[inan] = _np.nan
+    inan = _np.random.binomial(1, .8, size=Y.shape).astype(bool) # 80% missing data
+    Ymissing = Y.copy()
+    Ymissing[inan] = _np.nan

-    m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, 
+    m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
                      inference_method=VarDTCMissingData(inan=inan), kernel=k)

    m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
@ -414,7 +415,7 @@ def olivetti_faces(optimize=True, verbose=True, plot=True):
    if optimize: m.optimize('bfgs', messages=verbose, max_iters=1000)
    if plot:
        ax = m.plot_latent(which_indices=(0, 1))
-        y = m.likelihood.Y[0, :]
+        y = m.Y[0, :]
        data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False)
        lvm = GPy.plotting.matplot_dep.visualize.lvm(m.X.mean[0, :].copy(), m, data_show, ax)
        raw_input('Press enter to finish')
--- a/GPy/inference/latent_function_inference/var_dtc.py
+++ b/GPy/inference/latent_function_inference/var_dtc.py
@ -9,6 +9,8 @@ import numpy as np
 from ...util.misc import param_to_array
 from . import LatentFunctionInference
 log_2_pi = np.log(2*np.pi)
+import logging, itertools
+logger = logging.getLogger('vardtc')

 class VarDTC(LatentFunctionInference):
    """
@ -192,11 +194,12 @@ class VarDTC(LatentFunctionInference):
        return post, log_marginal, grad_dict

 class VarDTCMissingData(LatentFunctionInference):
-    const_jitter = 1e-6
+    const_jitter = 1e-10
    def __init__(self, limit=1, inan=None):
        from ...util.caching import Cacher
        self._Y = Cacher(self._subarray_computations, limit)
-        self._inan = inan
+        if inan is not None: self._inan = ~inan
+        else: self._inan = None
        pass

    def set_limit(self, limit):
@ -217,21 +220,35 @@ class VarDTCMissingData(LatentFunctionInference):
        if self._inan is None:
            inan = np.isnan(Y)
            has_none = inan.any()
+            self._inan = ~inan
        else:
            inan = self._inan
            has_none = True
        if has_none:
-            from ...util.subarray_and_sorting import common_subarrays
-            self._subarray_indices = []
-            for v,ind in common_subarrays(inan, 1).iteritems():
-                if not np.all(v):
-                    v = ~np.array(v, dtype=bool)
-                    ind = np.array(ind, dtype=int)
-                    if ind.size == Y.shape[1]:
-                        ind = slice(None)
-                    self._subarray_indices.append([v,ind])
-            Ys = [Y[v, :][:, ind] for v, ind in self._subarray_indices]
-            traces = [(y**2).sum() for y in Ys]
+            #print "caching missing data slices, this can take several minutes depending on the number of unique dimensions of the data..."
+            #csa = common_subarrays(inan, 1)
+            size = Y.shape[1]
+            #logger.info('preparing subarrays {:3.3%}'.format((i+1.)/size))
+            Ys = []
+            next_ten = [0.]
+            count = itertools.count()
+            for v, y in itertools.izip(inan.T, Y.T[:,:,None]):
+                i = count.next()
+                if ((i+1.)/size) >= next_ten[0]:
+                    logger.info('preparing subarrays {:>6.1%}'.format((i+1.)/size))
+                    next_ten[0] += .1
+                Ys.append(y[v,:])
+
+            next_ten = [0.]
+            count = itertools.count()
+            def trace(y):
+                i = count.next()
+                if ((i+1.)/size) >= next_ten[0]:
+                    logger.info('preparing traces {:>6.1%}'.format((i+1.)/size))
+                    next_ten[0] += .1
+                y = y[inan[:,i],i:i+1]
+                return np.einsum('ij,ij->', y,y)
+            traces = [trace(Y) for _ in xrange(size)]
            return Ys, traces
        else:
            self._subarray_indices = [[slice(None),slice(None)]]
@ -253,7 +270,6 @@ class VarDTCMissingData(LatentFunctionInference):
        beta_all = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
        het_noise = beta_all.size != 1

-        import itertools
        num_inducing = Z.shape[0]

        dL_dpsi0_all = np.zeros(Y.shape[0])
@ -273,22 +289,17 @@ class VarDTCMissingData(LatentFunctionInference):
        Lm = jitchol(Kmm)
        if uncertain_inputs: LmInv = dtrtri(Lm)

-        VVT_factor_all = np.empty(Y.shape)
-        full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1]
-        if not full_VVT_factor:
-            psi1V = np.dot(Y.T*beta_all, psi1_all).T
-
-        for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices):
-            if het_noise: beta = beta_all[ind]
+        size = Y.shape[1]
+        next_ten = 0
+        for i, [y, v, trYYT] in enumerate(itertools.izip(Ys, self._inan.T, traces)):
+            if ((i+1.)/size) >= next_ten:
+                logger.info('inference {:> 6.1%}'.format((i+1.)/size))
+                next_ten += .1
+            if het_noise: beta = beta_all[i]
            else: beta = beta_all

-            VVT_factor = (beta*y)
-            try:
-                VVT_factor_all[v, ind].flat = VVT_factor.flat
-            except ValueError:
-                mult = np.ravel_multi_index((v.nonzero()[0][:,None],ind[None,:]), VVT_factor_all.shape)
-                VVT_factor_all.flat[mult] = VVT_factor
-            output_dim = y.shape[1]
+            VVT_factor = (y*beta)
+            output_dim = 1#len(ind)

            psi0 = psi0_all[v]
            psi1 = psi1_all[v, :]
@ -330,7 +341,6 @@ class VarDTCMissingData(LatentFunctionInference):
                VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
                psi1, het_noise, uncertain_inputs)

-            #import ipdb;ipdb.set_trace()
            dL_dpsi0_all[v] += dL_dpsi0
            dL_dpsi1_all[v, :] += dL_dpsi1
            if uncertain_inputs:
@ -347,19 +357,20 @@ class VarDTCMissingData(LatentFunctionInference):
                psi0, psi1, beta,
                data_fit, num_data, output_dim, trYYT, Y)

-            if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf
-            else:
-                print 'foobar'
-                tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
-                tmp, _ = dpotrs(LB, tmp, lower=1)
-                woodbury_vector[:, ind] = dtrtrs(Lm, tmp, lower=1, trans=1)[0]
+            #if full_VVT_factor:
+            woodbury_vector[:, i:i+1] = Cpsi1Vf
+            #else:
+            #    print 'foobar'
+            #    tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
+            #    tmp, _ = dpotrs(LB, tmp, lower=1)
+            #    woodbury_vector[:, ind] = dtrtrs(Lm, tmp, lower=1, trans=1)[0]

            #import ipdb;ipdb.set_trace()
            Bi, _ = dpotri(LB, lower=1)
            symmetrify(Bi)
            Bi = -dpotri(LB, lower=1)[0]
            diag.add(Bi, 1)
-            woodbury_inv_all[:, :, ind] = backsub_both_sides(Lm, Bi)[:,:,None]
+            woodbury_inv_all[:, :, i:i+1] = backsub_both_sides(Lm, Bi)[:,:,None]

        dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)

@ -376,23 +387,6 @@ class VarDTCMissingData(LatentFunctionInference):
                         'dL_dKnm':dL_dpsi1_all,
                         'dL_dthetaL':dL_dthetaL}

-        #get sufficient things for posterior prediction
-        #TODO: do we really want to do this in  the loop?
-        #if not full_VVT_factor:
-        #    print 'foobar'
-        #    psi1V = np.dot(Y.T*beta_all, psi1_all).T
-        #    tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
-        #    tmp, _ = dpotrs(LB_all, tmp, lower=1)
-        #    woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
-        #import ipdb;ipdb.set_trace()
-        #Bi, _ = dpotri(LB_all, lower=1)
-        #symmetrify(Bi)
-        #Bi = -dpotri(LB_all, lower=1)[0]
-        #from ...util import diag
-        #diag.add(Bi, 1)
-
-        #woodbury_inv = backsub_both_sides(Lm, Bi)
-
        post = Posterior(woodbury_inv=woodbury_inv_all, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)

        return post, log_marginal, grad_dict
--- a/GPy/inference/latent_function_inference/var_dtc_parallel.py
+++ b/GPy/inference/latent_function_inference/var_dtc_parallel.py
@ -112,12 +112,12 @@ class VarDTC_minibatch(LatentFunctionInference):
                if het_noise:
                    psi2_full += beta_slice*psi2
                else:
-                    psi2_full += psi2
+                    psi2_full += psi2.sum(0)
            else:
                if het_noise:
                    psi2_full += beta_slice*np.outer(psi1,psi1)
                else:
-                    psi2_full += np.outer(psi1,psi1)
+                    psi2_full += np.einsum('nm,jk->mk',psi1,psi1)

        if not het_noise:
            psi0_full *= beta
@ -128,7 +128,7 @@ class VarDTC_minibatch(LatentFunctionInference):
        #======================================================================
        # Compute Common Components
        #======================================================================
-        
+        self.psi1Y = psi1Y_full
        Kmm = kern.K(Z).copy()
        diag.add(Kmm, self.const_jitter)
        Lm = jitchol(Kmm)
@ -159,7 +159,10 @@ class VarDTC_minibatch(LatentFunctionInference):
            logL_R = -np.log(beta).sum()
        else:
            logL_R = -num_data*np.log(beta)
-        logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.-output_dim*(-np.log(np.diag(Lm)).sum()+np.log(np.diag(LL)).sum())
+        logL = (
+                -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.
+                -output_dim*(-np.log(np.diag(Lm)).sum()+np.log(np.diag(LL)).sum())
+                )

        #======================================================================
        # Compute dL_dKmm
@ -256,14 +259,14 @@ class VarDTC_minibatch(LatentFunctionInference):

        if het_noise:
            if uncertain_inputs:
-                psiR = np.einsum('mo,nmo->n',dL_dpsi2R,psi2)
+                psiR = np.einsum('mo,nmo->',dL_dpsi2R,psi2)
            else:
-                psiR = np.einsum('nm,no,mo->n',psi1,psi1,dL_dpsi2R)
+                psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R)

            dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0)-output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum(axis=-1)
        else:
            if uncertain_inputs:
-                psiR = np.einsum('mo,mo->',dL_dpsi2R,psi2)
+                psiR = np.einsum('mo,nmo->',dL_dpsi2R,psi2)
            else:
                psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R)

@ -305,30 +308,44 @@ def update_gradients(model):
        if isinstance(model.X, VariationalPosterior):
            X_slice = model.X[n_range[0]:n_range[1]]

+            dL_dpsi1 = grad_dict['dL_dpsi1']#[None, :]
+            dL_dpsi2 = grad_dict['dL_dpsi2'][None, :, :]
            #gradients w.r.t. kernel
-            model.kern.update_gradients_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
+            model.kern.update_gradients_expectations(variational_posterior=X_slice,Z=model.Z,dL_dpsi0=grad_dict['dL_dpsi0'],dL_dpsi1=dL_dpsi1,dL_dpsi2=dL_dpsi2)
            kern_grad += model.kern.gradient

            #gradients w.r.t. Z
            model.Z.gradient += model.kern.gradients_Z_expectations(
-                               dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice)
+                                    dL_dpsi0=grad_dict['dL_dpsi0'],
+                                    dL_dpsi1=dL_dpsi1,
+                                    dL_dpsi2=dL_dpsi2,
+                                    Z=model.Z, variational_posterior=X_slice)

            #gradients w.r.t. posterior parameters of X
-            X_grad = model.kern.gradients_qX_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
-            model.set_X_gradients(X_slice, X_grad)
+            X_grad = model.kern.gradients_qX_expectations(
+                                    variational_posterior=X_slice,
+                                    Z=model.Z,
+                                    dL_dpsi0=grad_dict['dL_dpsi0'],
+                                    dL_dpsi1=dL_dpsi1,
+                                    dL_dpsi2=dL_dpsi2)
+
+            model.X.mean[n_range[0]:n_range[1]].gradient = X_grad[0]
+            model.X.variance[n_range[0]:n_range[1]].gradient = X_grad[1]

            if het_noise:
                dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
            else:
                dL_dthetaL += grad_dict['dL_dthetaL']
-    
-    # Set the gradients w.r.t. kernel
-    model.kern.gradient = kern_grad
-
+    #import ipdb;ipdb.set_trace()
+    model.grad_dict = grad_dict
+    if isinstance(model.X, VariationalPosterior):
        # Update Log-likelihood
        model._log_marginal_likelihood -= model.variational_prior.KL_divergence(model.X)
        # update for the KL divergence
        model.variational_prior.update_gradients_KL(model.X)

+    # Set the gradients w.r.t. kernel
+    model.kern.gradient = kern_grad
+
    # dL_dthetaL
    model.likelihood.update_gradients(dL_dthetaL)
--- a/GPy/inference/optimization/scg.py
+++ b/GPy/inference/optimization/scg.py
@ -56,13 +56,13 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
    if gtol is None:
        gtol = 1e-5

-    sigma0 = 1.0e-8
+    sigma0 = 1.0e-7
    fold = f(x, *optargs) # Initial function value.
    function_eval = 1
    fnow = fold
    gradnew = gradf(x, *optargs) # Initial gradient.
-    if any(np.isnan(gradnew)):
-        raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value"
+    #if any(np.isnan(gradnew)):
+    #    raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value"
    current_grad = np.dot(gradnew, gradnew)
    gradold = gradnew.copy()
    d = -gradnew # Initial search direction.
@ -168,13 +168,13 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
        if Delta < 0.25:
            beta = min(4.0 * beta, betamax)
        if Delta > 0.75:
-            beta = max(0.5 * beta, betamin)
+            beta = max(0.25 * beta, betamin)

        # Update search direction using Polak-Ribiere formula, or re-start
        # in direction of negative gradient after nparams steps.
        if nsuccess == x.size:
            d = -gradnew
-#             beta = 1.  # TODO: betareset!!
+            beta = 1. # This is not in the original paper
            nsuccess = 0
        elif success:
            Gamma = np.dot(gradold - gradnew, gradnew) / (mu)
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@ -37,19 +37,21 @@ class BayesianGPLVM(SparseGP):
        self.init = init

        if X_variance is None:
+            self.logger.info("initializing latent space variance ~ uniform(0,.1)")
            X_variance = np.random.uniform(0,.1,X.shape)

        if Z is None:
+            self.logger.info("initializing inducing inputs")
            Z = np.random.permutation(X.copy())[:num_inducing]
        assert Z.shape[1] == X.shape[1]

        if kernel is None:
+            self.logger.info("initializing kernel RBF")
            kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) # + kern.white(input_dim)

        if likelihood is None:
            likelihood = Gaussian()

-
        self.variational_prior = NormalPrior()
        X = NormalPosterior(X, X_variance)

@ -65,6 +67,7 @@ class BayesianGPLVM(SparseGP):
                inference_method = VarDTC()

        SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
+        self.logger.info("Adding X as parameter")
        self.add_parameter(self.X, index=0)

    def set_X_gradients(self, X, X_grad):
--- a/GPy/plotting/matplot_dep/dim_reduction_plots.py
+++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py
@ -84,6 +84,7 @@ def plot_latent(model, labels=None, which_indices=None,
                            cmap=pb.cm.binary, **imshow_kwargs)

    # make sure labels are in order of input:
+    labels = np.asarray(labels)
    ulabels = []
    for lab in labels:
        if not lab in ulabels:
--- a/GPy/plotting/matplot_dep/models_plots.py
+++ b/GPy/plotting/matplot_dep/models_plots.py
@ -8,7 +8,7 @@ from base_plots import gpplot, x_frame1D, x_frame2D
 from ...util.misc import param_to_array
 from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
 from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
-
+from scipy import sparse

 def plot_fit(model, plot_limits=None, which_data_rows='all',
        which_data_ycols='all', fixed_inputs=[],
@ -61,11 +61,14 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',

    if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
        X = model.X.mean
-        X_variance = param_to_array(model.X.variance)
+        X_variance = model.X.variance
    else:
        X = model.X
-    X, Y = param_to_array(X, model.Y)
-    if hasattr(model, 'Z'): Z = param_to_array(model.Z)
+    #X, Y = param_to_array(X, model.Y)
+    Y = model.Y
+    if sparse.issparse(Y): Y = Y.todense().view(np.ndarray)
+
+    if hasattr(model, 'Z'): Z = model.Z

    #work out what the inputs are for plotting (1D or 2D)
    fixed_dims = np.array([i for i,v in fixed_inputs])
--- a/GPy/testing/parameterized_tests.py
+++ b/GPy/testing/parameterized_tests.py
@ -8,6 +8,7 @@ import GPy
 import numpy as np
 from GPy.core.parameterization.parameter_core import HierarchyError
 from GPy.core.parameterization.observable_array import ObsAr
+from GPy.core.parameterization.transformations import NegativeLogexp

 class ArrayCoreTest(unittest.TestCase):
    def setUp(self):
@ -38,10 +39,25 @@ class ParameterizedTest(unittest.TestCase):
        self.test1.kern = self.rbf+self.white
        self.test1.add_parameter(self.test1.kern)
        self.test1.add_parameter(self.param, 0)
+        # print self.test1:
+        #=============================================================================
+        # test_model.          |    Value    |  Constraint   |  Prior  |  Tied to
+        # param                |  (25L, 2L)  |   {0.0,1.0}   |         |
+        # add.rbf.variance     |        1.0  |  0.0,1.0 +ve  |         |
+        # add.rbf.lengthscale  |        1.0  |  0.0,1.0 +ve  |         |
+        # add.white.variance   |        1.0  |  0.0,1.0 +ve  |         |
+        #=============================================================================

        x = np.linspace(-2,6,4)[:,None]
        y = np.sin(x)
        self.testmodel = GPy.models.GPRegression(x,y)
+        # print self.testmodel:
+        #=============================================================================
+        # GP_regression.           |  Value  |  Constraint  |  Prior  |  Tied to
+        # rbf.variance             |    1.0  |     +ve      |         |
+        # rbf.lengthscale          |    1.0  |     +ve      |         |
+        # Gaussian_noise.variance  |    1.0  |     +ve      |         |
+        #=============================================================================

    def test_add_parameter(self):
        self.assertEquals(self.rbf._parent_index_, 0)
@ -142,7 +158,12 @@ class ParameterizedTest(unittest.TestCase):
        self.testmodel.randomize()
        self.assertEqual(val, self.testmodel.kern.lengthscale)

-    
+    def test_add_parameter_in_hierarchy(self):
+        from GPy.core import Param
+        self.test1.kern.rbf.add_parameter(Param("NEW", np.random.rand(2), NegativeLogexp()), 1)
+        self.assertListEqual(self.test1.constraints[NegativeLogexp()].tolist(), range(self.param.size+1, self.param.size+1 + 2))
+        self.assertListEqual(self.test1.constraints[GPy.transformations.Logistic(0,1)].tolist(), range(self.param.size))
+        self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp(0,1)].tolist(), np.r_[50, 53:55].tolist())

    def test_regular_expression_misc(self):
        self.testmodel.kern.lengthscale.fix()
--- a/GPy/util/caching.py
+++ b/GPy/util/caching.py
@ -18,7 +18,6 @@ class Cacher(object):
        self.operation = operation
        self.order = collections.deque()
        self.cached_inputs = {}  # point from cache_ids to a list of [ind_ids], which where used in cache cache_id
-        self.logger = logging.getLogger("cache")

        #=======================================================================
        # point from each ind_id to [ref(obj), cache_ids]
@ -36,23 +35,18 @@ class Cacher(object):

    def combine_inputs(self, args, kw):
        "Combines the args and kw in a unique way, such that ordering of kwargs does not lead to recompute"
-        self.logger.debug("combining args and kw")
        return args + tuple(c[1] for c in sorted(kw.items(), key=lambda x: x[0]))

    def prepare_cache_id(self, combined_args_kw, ignore_args):
        "get the cacheid (conc. string of argument self.ids in order) ignoring ignore_args"
        cache_id = "".join(self.id(a) for i, a in enumerate(combined_args_kw) if i not in ignore_args)
-        self.logger.debug("cache_id={} was created".format(cache_id))
        return cache_id

    def ensure_cache_length(self, cache_id):
        "Ensures the cache is within its limits and has one place free"
-        self.logger.debug("cache length gets ensured")
        if len(self.order) == self.limit:
-            self.logger.debug("cache limit of l={} was reached".format(self.limit))
            # we have reached the limit, so lets release one element
            cache_id = self.order.popleft()
-            self.logger.debug("cach_id '{}' gets removed".format(cache_id))
            combined_args_kw = self.cached_inputs[cache_id]
            for ind in combined_args_kw:
                if ind is not None:
@ -66,7 +60,6 @@ class Cacher(object):
                        else:
                            cache_ids.remove(cache_id)
                            self.cached_input_ids[ind_id] = [ref, cache_ids]
-            self.logger.debug("removing caches")
            del self.cached_outputs[cache_id]
            del self.inputs_changed[cache_id]
            del self.cached_inputs[cache_id]
@ -81,10 +74,8 @@ class Cacher(object):
            if a is not None:
                ind_id = self.id(a)
                v = self.cached_input_ids.get(ind_id, [weakref.ref(a), []])
-                self.logger.debug("cache_id '{}' gets stored".format(cache_id))
                v[1].append(cache_id)
                if len(v[1]) == 1:
-                    self.logger.debug("adding observer to object {}".format(repr(a)))
                    a.add_observer(self, self.on_cache_changed)
                self.cached_input_ids[ind_id] = v

@ -108,28 +99,21 @@ class Cacher(object):
        cache_id = self.prepare_cache_id(inputs, self.ignore_args)
        # 2: if anything is not cachable, we will just return the operation, without caching
        if reduce(lambda a, b: a or (not (isinstance(b, Observable) or b is None)), inputs, False):
-            self.logger.info("some inputs are not observable: returning without caching")
-            self.logger.debug(str(map(lambda x: isinstance(x, Observable) or x is None, inputs)))
-            self.logger.debug(str(map(repr, inputs)))
            return self.operation(*args, **kw)
        # 3&4: check whether this cache_id has been cached, then has it changed?
        try:
            if(self.inputs_changed[cache_id]):
-                self.logger.debug("{} already seen, but inputs changed. refreshing cacher".format(cache_id))
                # 4: This happens, when elements have changed for this cache self.id
                self.inputs_changed[cache_id] = False
                self.cached_outputs[cache_id] = self.operation(*args, **kw)
        except KeyError:
-            self.logger.info("{} never seen, creating cache entry".format(cache_id))
            # 3: This is when we never saw this chache_id:
            self.ensure_cache_length(cache_id)
            self.add_to_cache(cache_id, inputs, self.operation(*args, **kw))
        except:
-            self.logger.error("an error occurred while trying to run caching for {}, resetting".format(cache_id))
            self.reset()
            raise
        # 5: We have seen this cache_id and it is cached:
-        self.logger.info("returning cache {}".format(cache_id))
        return self.cached_outputs[cache_id]

    def on_cache_changed(self, direct, which=None):
@ -143,7 +127,6 @@ class Cacher(object):
                ind_id = self.id(what)
                _, cache_ids = self.cached_input_ids.get(ind_id, [None, []])
                for cache_id in cache_ids:
-                    self.logger.info("callback from {} changed inputs from {}".format(ind_id, self.inputs_changed[cache_id]))
                    self.inputs_changed[cache_id] = True

    def reset(self):
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@ -988,7 +988,8 @@ def olivetti_faces(data_set='olivetti_faces'):
    for subject in range(40):
        for image in range(10):
            image_path = os.path.join(path, 'orl_faces', 's'+str(subject+1), str(image+1) + '.pgm')
-            Y.append(GPy.util.netpbmfile.imread(image_path).flatten())
+            from GPy.util import netpbmfile
+            Y.append(netpbmfile.imread(image_path).flatten())
            lbls.append(subject)
    Y = np.asarray(Y)
    lbls = np.asarray(lbls)[:, None]
--- a/GPy/util/initialization.py
+++ b/GPy/util/initialization.py
@ -18,6 +18,6 @@ def initialize_latent(init, input_dim, Y):
        var = Xr.var(0)

    Xr -= Xr.mean(0)
-    Xr /= Xr.var(0)
+    Xr /= Xr.std(0)

    return Xr, var/var.max()
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@ -15,6 +15,7 @@ import scipy
 import warnings
 import os
 from config import *
+import logging

 _scipyversion = np.float64((scipy.__version__).split('.')[:2])
 _fix_dpotri_scipy_bug = True
@ -93,14 +94,20 @@ def jitchol(A, maxtries=5):
            raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
        jitter = diagA.mean() * 1e-6
        while maxtries > 0 and np.isfinite(jitter):
-            print 'Warning: adding jitter of {:.10e}'.format(jitter)
            try:
-                return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
+                L = linalg.cholesky(A + np.eye(A.shape[0]) * jitter, lower=True)
            except:
                jitter *= 10
            finally:
                maxtries -= 1
        raise linalg.LinAlgError, "not positive definite, even with jitter."
+    import traceback
+    try: raise
+    except:
+        logging.warning('\n'.join(['Added jitter of {:.10e}'.format(jitter),
+            '  in '+traceback.format_list(traceback.extract_stack(limit=2)[-2:-1])[0][2:]]))
+    import ipdb;ipdb.set_trace()
+    return L



@ -122,10 +129,17 @@ def dtrtrs(A, B, lower=1, trans=0, unitdiag=0):
    """
    Wrapper for lapack dtrtrs function

+    DTRTRS solves a triangular system of the form
+
+        A * X = B  or  A**T * X = B,
+
+    where A is a triangular matrix of order N, and B is an N-by-NRHS
+    matrix.  A check is made to verify that A is nonsingular.
+
    :param A: Matrix A(triangular)
    :param B: Matrix B
    :param lower: is matrix lower (true) or upper (false)
-    :returns:
+    :returns: Solution to A * X = B or A**T * X = B

    """
    A = np.asfortranarray(A)
--- a/GPy/util/netpbmfile.py
+++ b/GPy/util/netpbmfile.py
@ -0,0 +1,331 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# netpbmfile.py
+
+# Copyright (c) 2011-2013, Christoph Gohlke
+# Copyright (c) 2011-2013, The Regents of the University of California
+# Produced at the Laboratory for Fluorescence Dynamics.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of the copyright holders nor the names of any
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""Read and write image data from respectively to Netpbm files.
+
+This implementation follows the Netpbm format specifications at
+http://netpbm.sourceforge.net/doc/. No gamma correction is performed.
+
+The following image formats are supported: PBM (bi-level), PGM (grayscale),
+PPM (color), PAM (arbitrary), XV thumbnail (RGB332, read-only).
+
+:Author:
+  `Christoph Gohlke <http://www.lfd.uci.edu/~gohlke/>`_
+
+:Organization:
+  Laboratory for Fluorescence Dynamics, University of California, Irvine
+
+:Version: 2013.01.18
+
+Requirements
+------------
+* `CPython 2.7, 3.2 or 3.3 <http://www.python.org>`_
+* `Numpy 1.7 <http://www.numpy.org>`_
+* `Matplotlib 1.2 <http://www.matplotlib.org>`_  (optional for plotting)
+
+Examples
+--------
+>>> im1 = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16)
+>>> imsave('_tmp.pgm', im1)
+>>> im2 = imread('_tmp.pgm')
+>>> assert numpy.all(im1 == im2)
+
+"""
+
+from __future__ import division, print_function
+
+import sys
+import re
+import math
+from copy import deepcopy
+
+import numpy
+
+__version__ = '2013.01.18'
+__docformat__ = 'restructuredtext en'
+__all__ = ['imread', 'imsave', 'NetpbmFile']
+
+
+def imread(filename, *args, **kwargs):
+    """Return image data from Netpbm file as numpy array.
+
+    `args` and `kwargs` are arguments to NetpbmFile.asarray().
+
+    Examples
+    --------
+    >>> image = imread('_tmp.pgm')
+
+    """
+    try:
+        netpbm = NetpbmFile(filename)
+        image = netpbm.asarray()
+    finally:
+        netpbm.close()
+    return image
+
+
+def imsave(filename, data, maxval=None, pam=False):
+    """Write image data to Netpbm file.
+
+    Examples
+    --------
+    >>> image = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16)
+    >>> imsave('_tmp.pgm', image)
+
+    """
+    try:
+        netpbm = NetpbmFile(data, maxval=maxval)
+        netpbm.write(filename, pam=pam)
+    finally:
+        netpbm.close()
+
+
+class NetpbmFile(object):
+    """Read and write Netpbm PAM, PBM, PGM, PPM, files."""
+
+    _types = {b'P1': b'BLACKANDWHITE', b'P2': b'GRAYSCALE', b'P3': b'RGB',
+              b'P4': b'BLACKANDWHITE', b'P5': b'GRAYSCALE', b'P6': b'RGB',
+              b'P7 332': b'RGB', b'P7': b'RGB_ALPHA'}
+
+    def __init__(self, arg=None, **kwargs):
+        """Initialize instance from filename, open file, or numpy array."""
+        for attr in ('header', 'magicnum', 'width', 'height', 'maxval',
+                     'depth', 'tupltypes', '_filename', '_fh', '_data'):
+            setattr(self, attr, None)
+        if arg is None:
+            self._fromdata([], **kwargs)
+        elif isinstance(arg, basestring):
+            self._fh = open(arg, 'rb')
+            self._filename = arg
+            self._fromfile(self._fh, **kwargs)
+        elif hasattr(arg, 'seek'):
+            self._fromfile(arg, **kwargs)
+            self._fh = arg
+        else:
+            self._fromdata(arg, **kwargs)
+
+    def asarray(self, copy=True, cache=False, **kwargs):
+        """Return image data from file as numpy array."""
+        data = self._data
+        if data is None:
+            data = self._read_data(self._fh, **kwargs)
+            if cache:
+                self._data = data
+            else:
+                return data
+        return deepcopy(data) if copy else data
+
+    def write(self, arg, **kwargs):
+        """Write instance to file."""
+        if hasattr(arg, 'seek'):
+            self._tofile(arg, **kwargs)
+        else:
+            with open(arg, 'wb') as fid:
+                self._tofile(fid, **kwargs)
+
+    def close(self):
+        """Close open file. Future asarray calls might fail."""
+        if self._filename and self._fh:
+            self._fh.close()
+            self._fh = None
+
+    def __del__(self):
+        self.close()
+
+    def _fromfile(self, fh):
+        """Initialize instance from open file."""
+        fh.seek(0)
+        data = fh.read(4096)
+        if (len(data) < 7) or not (b'0' < data[1:2] < b'8'):
+            raise ValueError("Not a Netpbm file:\n%s" % data[:32])
+        try:
+            self._read_pam_header(data)
+        except Exception:
+            try:
+                self._read_pnm_header(data)
+            except Exception:
+                raise ValueError("Not a Netpbm file:\n%s" % data[:32])
+
+    def _read_pam_header(self, data):
+        """Read PAM header and initialize instance."""
+        regroups = re.search(
+            b"(^P7[\n\r]+(?:(?:[\n\r]+)|(?:#.*)|"
+            b"(HEIGHT\s+\d+)|(WIDTH\s+\d+)|(DEPTH\s+\d+)|(MAXVAL\s+\d+)|"
+            b"(?:TUPLTYPE\s+\w+))*ENDHDR\n)", data).groups()
+        self.header = regroups[0]
+        self.magicnum = b'P7'
+        for group in regroups[1:]:
+            key, value = group.split()
+            setattr(self, unicode(key).lower(), int(value))
+        matches = re.findall(b"(TUPLTYPE\s+\w+)", self.header)
+        self.tupltypes = [s.split(None, 1)[1] for s in matches]
+
+    def _read_pnm_header(self, data):
+        """Read PNM header and initialize instance."""
+        bpm = data[1:2] in b"14"
+        regroups = re.search(b"".join((
+            b"(^(P[123456]|P7 332)\s+(?:#.*[\r\n])*",
+            b"\s*(\d+)\s+(?:#.*[\r\n])*",
+            b"\s*(\d+)\s+(?:#.*[\r\n])*" * (not bpm),
+            b"\s*(\d+)\s(?:\s*#.*[\r\n]\s)*)")), data).groups() + (1, ) * bpm
+        self.header = regroups[0]
+        self.magicnum = regroups[1]
+        self.width = int(regroups[2])
+        self.height = int(regroups[3])
+        self.maxval = int(regroups[4])
+        self.depth = 3 if self.magicnum in b"P3P6P7 332" else 1
+        self.tupltypes = [self._types[self.magicnum]]
+
+    def _read_data(self, fh, byteorder='>'):
+        """Return image data from open file as numpy array."""
+        fh.seek(len(self.header))
+        data = fh.read()
+        dtype = 'u1' if self.maxval < 256 else byteorder + 'u2'
+        depth = 1 if self.magicnum == b"P7 332" else self.depth
+        shape = [-1, self.height, self.width, depth]
+        size = numpy.prod(shape[1:])
+        if self.magicnum in b"P1P2P3":
+            data = numpy.array(data.split(None, size)[:size], dtype)
+            data = data.reshape(shape)
+        elif self.maxval == 1:
+            shape[2] = int(math.ceil(self.width / 8))
+            data = numpy.frombuffer(data, dtype).reshape(shape)
+            data = numpy.unpackbits(data, axis=-2)[:, :, :self.width, :]
+        else:
+            data = numpy.frombuffer(data, dtype)
+            data = data[:size * (data.size // size)].reshape(shape)
+        if data.shape[0] < 2:
+            data = data.reshape(data.shape[1:])
+        if data.shape[-1] < 2:
+            data = data.reshape(data.shape[:-1])
+        if self.magicnum == b"P7 332":
+            rgb332 = numpy.array(list(numpy.ndindex(8, 8, 4)), numpy.uint8)
+            rgb332 *= [36, 36, 85]
+            data = numpy.take(rgb332, data, axis=0)
+        return data
+
+    def _fromdata(self, data, maxval=None):
+        """Initialize instance from numpy array."""
+        data = numpy.array(data, ndmin=2, copy=True)
+        if data.dtype.kind not in "uib":
+            raise ValueError("not an integer type: %s" % data.dtype)
+        if data.dtype.kind == 'i' and numpy.min(data) < 0:
+            raise ValueError("data out of range: %i" % numpy.min(data))
+        if maxval is None:
+            maxval = numpy.max(data)
+            maxval = 255 if maxval < 256 else 65535
+        if maxval < 0 or maxval > 65535:
+            raise ValueError("data out of range: %i" % maxval)
+        data = data.astype('u1' if maxval < 256 else '>u2')
+        self._data = data
+        if data.ndim > 2 and data.shape[-1] in (3, 4):
+            self.depth = data.shape[-1]
+            self.width = data.shape[-2]
+            self.height = data.shape[-3]
+            self.magicnum = b'P7' if self.depth == 4 else b'P6'
+        else:
+            self.depth = 1
+            self.width = data.shape[-1]
+            self.height = data.shape[-2]
+            self.magicnum = b'P5' if maxval > 1 else b'P4'
+        self.maxval = maxval
+        self.tupltypes = [self._types[self.magicnum]]
+        self.header = self._header()
+
+    def _tofile(self, fh, pam=False):
+        """Write Netbm file."""
+        fh.seek(0)
+        fh.write(self._header(pam))
+        data = self.asarray(copy=False)
+        if self.maxval == 1:
+            data = numpy.packbits(data, axis=-1)
+        data.tofile(fh)
+
+    def _header(self, pam=False):
+        """Return file header as byte string."""
+        if pam or self.magicnum == b'P7':
+            header = "\n".join((
+                "P7",
+                "HEIGHT %i" % self.height,
+                "WIDTH %i" % self.width,
+                "DEPTH %i" % self.depth,
+                "MAXVAL %i" % self.maxval,
+                "\n".join("TUPLTYPE %s" % unicode(i) for i in self.tupltypes),
+                "ENDHDR\n"))
+        elif self.maxval == 1:
+            header = "P4 %i %i\n" % (self.width, self.height)
+        elif self.depth == 1:
+            header = "P5 %i %i %i\n" % (self.width, self.height, self.maxval)
+        else:
+            header = "P6 %i %i %i\n" % (self.width, self.height, self.maxval)
+        if sys.version_info[0] > 2:
+            header = bytes(header, 'ascii')
+        return header
+
+    def __str__(self):
+        """Return information about instance."""
+        return unicode(self.header)
+
+
+if sys.version_info[0] > 2:
+    basestring = str
+    unicode = lambda x: str(x, 'ascii')
+
+if __name__ == "__main__":
+    # Show images specified on command line or all images in current directory
+    from glob import glob
+    from matplotlib import pyplot
+    files = sys.argv[1:] if len(sys.argv) > 1 else glob('*.p*m')
+    for fname in files:
+        try:
+            pam = NetpbmFile(fname)
+            img = pam.asarray(copy=False)
+            if False:
+                pam.write('_tmp.pgm.out', pam=True)
+                img2 = imread('_tmp.pgm.out')
+                assert numpy.all(img == img2)
+                imsave('_tmp.pgm.out', img)
+                img2 = imread('_tmp.pgm.out')
+                assert numpy.all(img == img2)
+            pam.close()
+        except ValueError as e:
+            print(fname, e)
+            continue
+        _shape = img.shape
+        if img.ndim > 3 or (img.ndim > 2 and img.shape[-1] not in (3, 4)):
+            img = img[0]
+        cmap = 'gray' if pam.maxval > 1 else 'binary'
+        pyplot.imshow(img, cmap, interpolation='nearest')
+        pyplot.title("%s %s %s %s" % (fname, unicode(pam.magicnum),
+                                      _shape, img.dtype))
+        pyplot.show()
--- a/GPy/util/subarray_and_sorting.py
+++ b/GPy/util/subarray_and_sorting.py
@ -48,14 +48,10 @@ def common_subarrays(X, axis=0):
    assert X.ndim == 2 and axis in (0,1), "Only implemented for 2D arrays"
    subarrays = defaultdict(list)
    cnt = count()
-    logger = logging.getLogger("common_subarrays")
    def accumulate(x, s, c):
-        logger.debug("creating tuple")
        t = tuple(x)
-        logger.debug("tuple done")
        col = c.next()
        iadd(s[t], [col])
-        logger.debug("added col {}".format(col))
        return None
    if axis == 0: [accumulate(x, subarrays, cnt) for x in X]
    else: [accumulate(x, subarrays, cnt) for x in X.T]