Merge branch 'devel' of github.com:SheffieldML/GPy into devel

2026-05-09 03:52:39 +02:00 · 2014-11-13 11:18:30 +00:00 · 2014-11-13 11:18:30 +00:00 · b5e3788d0a
commit b5e3788d0a
parent d9022c8cee 7f7b0da7b9
19 changed files with 686 additions and 497 deletions
--- a/GPy/init.py
+++ b/GPy/init.py
@ -39,6 +39,11 @@ def load(file_path):
    :param file_name: path/to/file.pickle
    """
    import cPickle as pickle
    try:
        with open(file_path, 'rb') as f:
            m = pickle.load(f)
    except:
        import pickle as pickle
        with open(file_path, 'rb') as f:
            m = pickle.load(f)
    return m
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -3,16 +3,12 @@
 import numpy as np
 import sys
 import warnings
 from .. import kern
 from ..util.linalg import dtrtrs
 from model import Model
 from parameterization import ObsAr
 from .. import likelihoods
-from ..likelihoods.gaussian import Gaussian
+from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
 from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation, LatentFunctionInference
 from parameterization.variational import VariationalPosterior
 from scipy.sparse.base import issparse
 import logging
 from GPy.util.normalizer import MeanNorm
@ -115,7 +111,6 @@ class GP(Model):
        if X is not None:
            if self.X in self.parameters:
                # LVM models
                from ..core.parameterization.variational import VariationalPosterior
                if isinstance(self.X, VariationalPosterior):
                    assert isinstance(X, type(self.X)), "The given X must have the same type as the X in the model!"
                    self.unlink_parameter(self.X)
@ -458,7 +453,11 @@ class GP(Model):
        :param optimize: whether to optimize the location of new X (True by default)
        :type optimize: boolean
        :return: a tuple containing the posterior estimation of X and the model that optimize X
 <<<<<<< HEAD
        :rtype: (GPy.core.parameterization.variational.VariationalPosterior or numpy.ndarray, GPy.core.Model)
 =======
        :rtype: (:class:`~GPy.core.parameterization.variational.VariationalPosterior` or numpy.ndarray, :class:`~GPy.core.model.Model`)
 >>>>>>> 22d30d9d39c70f806fe5bcb815cce9c8eb0f8dca
        """
        from ..inference.latent_function_inference.inferenceX import infer_newX
        return infer_newX(self, Y_new, optimize=optimize)
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -238,6 +238,10 @@ class Model(Parameterized):
        if self.size == 0:
            print 'nothing to optimize'
        if not self.update_model():
            print "setting updates on again"
            self.update_model(True)
        if start == None:
            start = self.optimizer_array
--- a/GPy/core/parameterization/observable.py
+++ b/GPy/core/parameterization/observable.py
@ -0,0 +1,69 @@
 '''
 Created on 30 Oct 2014
@author: maxz
 '''
 class Observable(object):
    """
    Observable pattern for parameterization.
    This Object allows for observers to register with self and a (bound!) function
    as an observer. Every time the observable changes, it sends a notification with
    self as only argument to all its observers.
    """
    def __init__(self, *args, **kwargs):
        super(Observable, self).__init__()
        from lists_and_dicts import ObserverList
        self.observers = ObserverList()
    def add_observer(self, observer, callble, priority=0):
        """
        Add an observer `observer` with the callback `callble`
        and priority `priority` to this observers list.
        """
        self.observers.add(priority, observer, callble)
    def remove_observer(self, observer, callble=None):
        """
        Either (if callble is None) remove all callables,
        which were added alongside observer,
        or remove callable `callble` which was added alongside
        the observer `observer`.
        """
        to_remove = []
        for poc in self.observers:
            _, obs, clble = poc
            if callble is not None:
                if (obs is observer) and (callble == clble):
                    to_remove.append(poc)
            else:
                if obs is observer:
                    to_remove.append(poc)
        for r in to_remove:
            self.observers.remove(*r)
    def notify_observers(self, which=None, min_priority=None):
        """
        Notifies all observers. Which is the element, which kicked off this
        notification loop. The first argument will be self, the second `which`.
        NOTE: notifies only observers with priority p > min_priority!
                                                    ^^^^^^^^^^^^^^^^
        :param min_priority: only notify observers with priority > min_priority
                             if min_priority is None, notify all observers in order
        """
        if which is None:
            which = self
        if min_priority is None:
            [callble(self, which=which) for _, _, callble in self.observers]
        else:
            for p, _, callble in self.observers:
                if p <= min_priority:
                    break
                callble(self, which=which)
    def change_priority(self, observer, callble, priority):
        self.remove_observer(observer, callble)
        self.add_observer(observer, callble, priority)
--- a/GPy/core/parameterization/observable_array.py
+++ b/GPy/core/parameterization/observable_array.py
@ -1,10 +1,11 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
-__updated__ = '2014-10-29'
+__updated__ = '2014-11-11'
 import numpy as np
-from parameter_core import Observable, Pickleable
+from parameter_core import Pickleable
 from observable import Observable
 class ObsAr(np.ndarray, Pickleable, Observable):
    """
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@ -17,6 +17,7 @@ from transformations import Transformation,Logexp, NegativeLogexp, Logistic, __f
 import numpy as np
 import re
 import logging
 from updateable import Updateable
 class HierarchyError(Exception):
    """
@ -40,115 +41,6 @@ def adjust_name_for_printing(name):
    return ''
 class Observable(object):
    """
    Observable pattern for parameterization.
    This Object allows for observers to register with self and a (bound!) function
    as an observer. Every time the observable changes, it sends a notification with
    self as only argument to all its observers.
    """
    _updates = True
    def __init__(self, *args, **kwargs):
        super(Observable, self).__init__()
        from lists_and_dicts import ObserverList
        self.observers = ObserverList()
    @property
    def updates(self):
        raise DeprecationWarning("updates is now a function, see update(True|False|None)")
    @updates.setter
    def updates(self, ups):
        raise DeprecationWarning("updates is now a function, see update(True|False|None)")
    def update_model(self, updates=None):
        """
        Get or set, whether automatic updates are performed. When updates are
        off, the model might be in a non-working state. To make the model work
        turn updates on again.
        :param bool|None updates:
            bool: whether to do updates
            None: get the current update state
        """
        if updates is None:
            p = getattr(self, '_highest_parent_', None)
            if p is not None:
                self._updates = p._updates
            return self._updates
        assert isinstance(updates, bool), "updates are either on (True) or off (False)"
        p = getattr(self, '_highest_parent_', None)
        if p is not None:
            p._updates = updates
        else:
            self._updates = updates
        self.trigger_update()
    def toggle_update(self):
        self.update_model(not self.update())
    def trigger_update(self, trigger_parent=True):
        """
        Update the model from the current state.
        Make sure that updates are on, otherwise this
        method will do nothing
        :param bool trigger_parent: Whether to trigger the parent, after self has updated
        """
        if not self.update_model() or (hasattr(self, "_in_init_") and self._in_init_):
            #print "Warning: updates are off, updating the model will do nothing"
            return
        self._trigger_params_changed(trigger_parent)
    def add_observer(self, observer, callble, priority=0):
        """
        Add an observer `observer` with the callback `callble`
        and priority `priority` to this observers list.
        """
        self.observers.add(priority, observer, callble)
    def remove_observer(self, observer, callble=None):
        """
        Either (if callble is None) remove all callables,
        which were added alongside observer,
        or remove callable `callble` which was added alongside
        the observer `observer`.
        """
        to_remove = []
        for poc in self.observers:
            _, obs, clble = poc
            if callble is not None:
                if (obs is observer) and (callble == clble):
                    to_remove.append(poc)
            else:
                if obs is observer:
                    to_remove.append(poc)
        for r in to_remove:
            self.observers.remove(*r)
    def notify_observers(self, which=None, min_priority=None):
        """
        Notifies all observers. Which is the element, which kicked off this
        notification loop. The first argument will be self, the second `which`.
        NOTE: notifies only observers with priority p > min_priority!
                                                    ^^^^^^^^^^^^^^^^
        :param min_priority: only notify observers with priority > min_priority
                             if min_priority is None, notify all observers in order
        """
        if not self.update_model():
            return
        if which is None:
            which = self
        if min_priority is None:
            [callble(self, which=which) for _, _, callble in self.observers]
        else:
            for p, _, callble in self.observers:
                if p <= min_priority:
                    break
                callble(self, which=which)
 class Parentable(object):
    """
@ -363,7 +255,7 @@ class Nameable(Gradcheckable):
        return adjust(self.name)
-class Indexable(Nameable, Observable):
+class Indexable(Nameable, Updateable):
    """
    Make an object constrainable with Priors and Transformations.
    TODO: Mappings!!
@ -757,7 +649,7 @@ class OptimizationHandlable(Indexable):
        #self._highest_parent_.tie.propagate_val()
        self._optimizer_copy_transformed = False
-        self._trigger_params_changed()
+        self.trigger_update()
    def _get_params_transformed(self):
        raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
@ -1117,3 +1009,29 @@ class Parameterizable(OptimizationHandlable):
        updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer``
        """
        pass
    def save(self, filename, ftype='HDF5'):
        """
        Save all the model parameters into a file (HDF5 by default).
        """
        from . import Param
        from ...util.misc import param_to_array
        def gather_params(self, plist):
            if isinstance(self,Param):
                plist.append(self)
        plist = []
        self.traverse(gather_params, plist)
        names = self.parameter_names(adjust_for_printing=True)
        if ftype=='HDF5':
            try:
                import h5py
                f = h5py.File(filename,'w')
                for p,n in zip(plist,names):
                    n = n.replace('.','_')
                    p = param_to_array(p)
                    d = f.create_dataset(n,p.shape,dtype=p.dtype)
                    d[:] = p
                f.close()
            except:
                raise 'Fails to write the parameters into a HDF5 file!'
--- a/GPy/core/parameterization/transformations.py
+++ b/GPy/core/parameterization/transformations.py
@ -131,6 +131,13 @@ class NormalTheta(Transformation):
    def __str__(self):
        return "theta"
    def __getstate__(self):
        return [self.mu_indices, self.var_indices]
    def __setstate__(self, state):
        self.mu_indices = state[0]
        self.var_indices = state[1]
 class NormalNaturalAntti(NormalTheta):
    _instances = []
    _logexp = Logexp()
--- a/GPy/core/parameterization/updateable.py
+++ b/GPy/core/parameterization/updateable.py
@ -0,0 +1,63 @@
 '''
 Created on 11 Nov 2014
@author: maxz
 '''
 from observable import Observable
 class Updateable(Observable):
    """
    A model can be updated or not.
    Make sure updates can be switched on and off.
    """
    _updates = True
    def __init__(self, *args, **kwargs):
        super(Updateable, self).__init__(*args, **kwargs)
    @property
    def updates(self):
        raise DeprecationWarning("updates is now a function, see update(True|False|None)")
    @updates.setter
    def updates(self, ups):
        raise DeprecationWarning("updates is now a function, see update(True|False|None)")
    def update_model(self, updates=None):
        """
        Get or set, whether automatic updates are performed. When updates are
        off, the model might be in a non-working state. To make the model work
        turn updates on again.
        :param bool|None updates:
            bool: whether to do updates
            None: get the current update state
        """
        if updates is None:
            p = getattr(self, '_highest_parent_', None)
            if p is not None:
                self._updates = p._updates
            return self._updates
        assert isinstance(updates, bool), "updates are either on (True) or off (False)"
        p = getattr(self, '_highest_parent_', None)
        if p is not None:
            p._updates = updates
        self._updates = updates
        self.trigger_update()
    def toggle_update(self):
        self.update_model(not self.update_model())
    def trigger_update(self, trigger_parent=True):
        """
        Update the model from the current state.
        Make sure that updates are on, otherwise this
        method will do nothing
        :param bool trigger_parent: Whether to trigger the parent, after self has updated
        """
        if not self.update_model() or (hasattr(self, "_in_init_") and self._in_init_):
            #print "Warning: updates are off, updating the model will do nothing"
            return
        self._trigger_params_changed(trigger_parent)
--- a/GPy/core/parameterization/variational.py
+++ b/GPy/core/parameterization/variational.py
@ -8,6 +8,8 @@ import numpy as np
 from parameterized import Parameterized
 from param import Param
 from transformations import Logexp, Logistic,__fixed__
 from GPy.util.misc import param_to_array
 from GPy.util.caching import Cache_this
 class VariationalPrior(Parameterized):
    def __init__(self, name='latent space', **kw):
@ -48,7 +50,8 @@ class SpikeAndSlabPrior(VariationalPrior):
    def KL_divergence(self, variational_posterior):
        mu = variational_posterior.mean
        S = variational_posterior.variance
-        gamma = variational_posterior.binary_prob
+        gamma,gamma1 = variational_posterior.gamma_probabilities()
        log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
        if len(self.pi.shape)==2:
            idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
            pi = self.pi[idx]
@ -57,20 +60,21 @@ class SpikeAndSlabPrior(VariationalPrior):
        var_mean = np.square(mu)/self.variance
        var_S = (S/self.variance - np.log(S))
-        var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()
+        var_gamma = (gamma*(log_gamma-np.log(pi))).sum()+(gamma1*(log_gamma1-np.log(1-pi))).sum()
        return var_gamma+ (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2.
    def update_gradients_KL(self, variational_posterior):
        mu = variational_posterior.mean
        S = variational_posterior.variance
-        gamma = variational_posterior.binary_prob
+        gamma,gamma1 = variational_posterior.gamma_probabilities()
        log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
        if len(self.pi.shape)==2:
            idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
            pi = self.pi[idx]
        else:
            pi = self.pi
-        gamma.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
+        variational_posterior.binary_prob.gradient -= (np.log((1-pi)/pi)+log_gamma-log_gamma1+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.)*gamma*gamma1
        mu.gradient -= gamma*mu/self.variance
        S.gradient -= (1./self.variance - 1./S) * gamma /2.
        if self.learnPi:
@ -158,9 +162,25 @@ class SpikeAndSlabPosterior(VariationalPosterior):
        binary_prob : the probability of the distribution on the slab part.
        """
        super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
-        self.gamma = Param("binary_prob",binary_prob, Logistic(1e-10,1.-1e-10))
+        self.gamma = Param("binary_prob",binary_prob)
        self.link_parameter(self.gamma)
    @Cache_this(limit=5)
    def gamma_probabilities(self):
        prob = np.zeros_like(param_to_array(self.gamma))
        prob[self.gamma>-710] = 1./(1.+np.exp(-self.gamma[self.gamma>-710]))
        prob1 = np.zeros_like(param_to_array(self.gamma))
        prob1[self.gamma<710] = 1./(1.+np.exp(self.gamma[self.gamma<710]))
        return prob, prob1
    @Cache_this(limit=5)
    def gamma_log_prob(self):
        loggamma = param_to_array(self.gamma).copy()
        loggamma[loggamma>-40] = -np.log1p(np.exp(-loggamma[loggamma>-40]))
        loggamma1 = param_to_array(self.gamma).copy()
        loggamma1[loggamma1<40] = -np.log1p(np.exp(loggamma1[loggamma1<40]))
        return loggamma,loggamma1
    def set_gradients(self, grad):
        self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@ -219,7 +219,9 @@ def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False):
    import numpy as np
    np.random.seed(3000)
-    k = GPy.kern.Matern32(Q_signal, 10., lengthscale=1+(np.random.uniform(1,6,Q_signal)), ARD=1)
+    k = GPy.kern.Matern32(Q_signal, 1., lengthscale=(np.random.uniform(1, 6, Q_signal)), ARD=1)
    for i in range(Q_signal):
        k += GPy.kern.PeriodicExponential(1, variance=1., active_dims=[i], period=3., lower=-2, upper=6)
    t = np.c_[[np.linspace(-1, 5, N) for _ in range(Q_signal)]].T
    K = k.K(t)
    s2, s1, s3, sS = np.random.multivariate_normal(np.zeros(K.shape[0]), K, size=(4))[:, :, None]
@ -365,7 +367,7 @@ def ssgplvm_simulation(optimize=True, verbose=1,
 def bgplvm_simulation_missing_data(optimize=True, verbose=1,
                      plot=True, plot_sim=False,
-                      max_iters=2e4,
+                      max_iters=2e4, percent_missing=.1,
                      ):
    from GPy import kern
    from GPy.models.bayesian_gplvm_minibatch import BayesianGPLVMMiniBatch
@ -375,7 +377,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
    Y = Ylist[0]
    k = kern.Linear(Q, ARD=True)  # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
-    inan = _np.random.binomial(1, .2, size=Y.shape).astype(bool) # 80% missing data
+    inan = _np.random.binomial(1, percent_missing, size=Y.shape).astype(bool)  # 80% missing data
    Ymissing = Y.copy()
    Ymissing[inan] = _np.nan
--- a/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py
+++ b/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py
@ -7,6 +7,201 @@ The package for the psi statistics computation
 import numpy as np
 try:
    from scipy import weave
    def _psicomputations(variance, lengthscale, Z, variational_posterior):
        """
        Z - MxQ
        mu - NxQ
        S - NxQ
        gamma - NxQ
        """
        # here are the "statistics" for psi0, psi1 and psi2
        # Produced intermediate results:
        # _psi1                NxM
        mu = variational_posterior.mean
        S = variational_posterior.variance
        N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
        l2 = np.square(lengthscale)
        log_denom1 = np.log(S/l2+1)
        log_denom2 = np.log(2*S/l2+1)
        log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
        variance = float(variance)
        psi0 = np.empty(N)
        psi0[:] = variance
        psi1 = np.empty((N,M))
        psi2n = np.empty((N,M,M))
        from ....util.misc import param_to_array
        S = param_to_array(S)
        mu = param_to_array(mu)
        Z = param_to_array(Z)
        support_code = """
        #include <math.h>
        """
        code = """
        for(int n=0; n<N; n++) {
            for(int m1=0;m1<M;m1++) {
                double log_psi1=0;
                for(int m2=0;m2<=m1;m2++) {
                    double log_psi2_n=0;
                    for(int q=0;q<Q;q++) {
                        double Snq = S(n,q);
                        double lq = l2(q);
                        double Zm1q = Z(m1,q);
                        double Zm2q = Z(m2,q);
                        if(m2==0) {
                            // Compute Psi_1
                            double muZ = mu(n,q)-Z(m1,q);
                            double psi1_exp1 = log_gamma(n,q) - (muZ*muZ/(Snq+lq) +log_denom1(n,q))/2.;
                            double psi1_exp2 = log_gamma1(n,q) -Zm1q*Zm1q/(2.*lq);
                            log_psi1 += (psi1_exp1>psi1_exp2)?psi1_exp1+log1p(exp(psi1_exp2-psi1_exp1)):psi1_exp2+log1p(exp(psi1_exp1-psi1_exp2));
                        }
                        // Compute Psi_2
                        double muZhat = mu(n,q) - (Zm1q+Zm2q)/2.;
                        double Z2 = Zm1q*Zm1q+ Zm2q*Zm2q;
                        double dZ = Zm1q - Zm2q;
                        double psi2_exp1 = dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q);
                        double psi2_exp2 = log_gamma1(n,q) - Z2/(2.*lq);
                        log_psi2_n += (psi2_exp1>psi2_exp2)?psi2_exp1+log1p(exp(psi2_exp2-psi2_exp1)):psi2_exp2+log1p(exp(psi2_exp1-psi2_exp2));                    
                    }
                    double exp_psi2_n = exp(log_psi2_n);
                    psi2n(n,m1,m2) = variance*variance*exp_psi2_n;
                    if(m1!=m2) { psi2n(n,m2,m1) = variance*variance*exp_psi2_n;}
                }
                psi1(n,m1) = variance*exp(log_psi1);
            }
        }
        """
        weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz)
        psi2 = psi2n.sum(axis=0)
        return psi0,psi1,psi2,psi2n
    from GPy.util.caching import Cacher
    psicomputations = Cacher(_psicomputations, limit=1)
    def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
        ARD = (len(lengthscale)!=1)
        _,psi1,_,psi2n = psicomputations(variance, lengthscale, Z, variational_posterior)
        mu = variational_posterior.mean
        S = variational_posterior.variance
        N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
        l2 = np.square(lengthscale)
        log_denom1 = np.log(S/l2+1)
        log_denom2 = np.log(2*S/l2+1)
        log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
        gamma, gamma1 = variational_posterior.gamma_probabilities()
        variance = float(variance)
        dvar = np.zeros(1)
        dmu = np.zeros((N,Q))
        dS = np.zeros((N,Q))
        dgamma = np.zeros((N,Q))
        dl = np.zeros(Q)
        dZ = np.zeros((M,Q))
        dvar += np.sum(dL_dpsi0)
        from ....util.misc import param_to_array
        S = param_to_array(S)
        mu = param_to_array(mu)
        Z = param_to_array(Z)
        support_code = """
        #include <math.h>
        """
        code = """
        for(int n=0; n<N; n++) {
            for(int m1=0;m1<M;m1++) {
                double log_psi1=0;
                for(int m2=0;m2<M;m2++) {
                    double log_psi2_n=0;
                    for(int q=0;q<Q;q++) {
                        double Snq = S(n,q);
                        double lq = l2(q);
                        double Zm1q = Z(m1,q);
                        double Zm2q = Z(m2,q);
                        double gnq = gamma(n,q);
                        double g1nq = gamma1(n,q);
                        double mu_nq = mu(n,q);
                        if(m2==0) {
                            // Compute Psi_1                        
                            double lpsi1 = psi1(n,m1)*dL_dpsi1(n,m1);
                            if(q==0) {dvar(0) += lpsi1/variance;}
                            double Zmu = Zm1q - mu_nq;
                            double denom = Snq+lq;
                            double Zmu2_denom = Zmu*Zmu/denom;
                            double exp1 = log_gamma(n,q)-(Zmu*Zmu/(Snq+lq)+log_denom1(n,q))/(2.);
                            double exp2 = log_gamma1(n,q)-Zm1q*Zm1q/(2.*lq);
                            double d_exp1,d_exp2;
                            if(exp1>exp2) {
                                d_exp1 = 1.;
                                d_exp2 = exp(exp2-exp1);
                            } else {
                                d_exp1 = exp(exp1-exp2);
                                d_exp2 = 1.;
                            }
                            double exp_sum = d_exp1+d_exp2;
                            dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum);
                            dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.;
                            dgamma(n,q) += lpsi1*(d_exp1*g1nq-d_exp2*gnq)/exp_sum;
                            dl(q) += lpsi1*((Zmu2_denom+Snq/lq)/denom*d_exp1+Zm1q*Zm1q/(lq*lq)*d_exp2)/(2.*exp_sum);
                            dZ(m1,q) += lpsi1*(-Zmu/denom*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
                        }
                        // Compute Psi_2
                        double lpsi2 = psi2n(n,m1,m2)*dL_dpsi2(m1,m2);
                        if(q==0) {dvar(0) += lpsi2*2/variance;}
                        double dZm1m2 = Zm1q - Zm2q;
                        double Z2 = Zm1q*Zm1q+Zm2q*Zm2q;
                        double muZhat =  mu_nq - (Zm1q + Zm2q)/2.;
                        double denom = 2.*Snq+lq;
                        double muZhat2_denom = muZhat*muZhat/denom;
                        double exp1 = dZm1m2*dZm1m2/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q);
                        double exp2 = log_gamma1(n,q) - Z2/(2.*lq);
                        double d_exp1,d_exp2;
                        if(exp1>exp2) {
                            d_exp1 = 1.;
                            d_exp2 = exp(exp2-exp1);
                        } else {
                            d_exp1 = exp(exp1-exp2);
                            d_exp2 = 1.;
                        }
                        double exp_sum = d_exp1+d_exp2;
                        dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum;
                        dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum;
                        dgamma(n,q) += lpsi2*(d_exp1*g1nq-d_exp2*gnq)/exp_sum;
                        dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum;
                        dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum;                   
                    }
                }
            }
        }
        """
        weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','gamma1','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz)
        dl *= 2.*lengthscale
        if not ARD:
            dl = dl.sum()
        return dvar, dl, dZ, dmu, dS, dgamma
 except:
    def psicomputations(variance, lengthscale, Z, variational_posterior):
        """
        Z - MxQ
--- a/GPy/models/bayesian_gplvm_minibatch.py
+++ b/GPy/models/bayesian_gplvm_minibatch.py
@ -54,6 +54,8 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
        self.variational_prior = NormalPrior()
        X = NormalPosterior(X, X_variance)
        self.kl_factr = 1.
        if inference_method is None:
            from ..inference.latent_function_inference.var_dtc import VarDTC
            self.logger.debug("creating inference_method var_dtc")
@ -81,18 +83,19 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
    def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None):
        posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices)
-        kl_fctr = 1.
+        current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations(
                                            variational_posterior=X,
                                            Z=Z, dL_dpsi0=grad_dict['dL_dpsi0'],
                                            dL_dpsi1=grad_dict['dL_dpsi1'],
                                            dL_dpsi2=grad_dict['dL_dpsi2'])
        kl_fctr = self.kl_factr
        if self.missing_data:
            d = self.output_dim
            log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)/d
        else:
            log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)
        current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations(
                                            variational_posterior=X,
                                            Z=Z, dL_dpsi0=grad_dict['dL_dpsi0'],
                                            dL_dpsi1=grad_dict['dL_dpsi1'],
                                            dL_dpsi2=grad_dict['dL_dpsi2'])
        # Subsetting Variational Posterior objects, makes the gradients
        # empty. We need them to be 0 though:
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@ -100,12 +100,11 @@ class MRD(BayesianGPLVMMiniBatch):
        self.logger.info("building kernels")
        if kernel is None:
            from ..kern import RBF
-            kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i]) for i in range(len(Ylist))]
+            kernels = [RBF(input_dim, ARD=1, lengthscale=1./fracs[i]) for i in range(len(Ylist))]
        elif isinstance(kernel, Kern):
            kernels = []
            for i in range(len(Ylist)):
                k = kernel.copy()
                print k is kernel, k.observers, k.constraints
                kernels.append(k)
        else:
            assert len(kernel) == len(Ylist), "need one kernel per output"
@ -144,8 +143,16 @@ class MRD(BayesianGPLVMMiniBatch):
        for i, n, k, l, Y, im, bs in itertools.izip(itertools.count(), Ynames, kernels, likelihoods, Ylist, self.inference_method, batchsize):
            assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another"
            md = np.isnan(Y).any()
-            spgp = SparseGPMiniBatch(self.X, Y, Z, k, l, im, n, None, normalizer, md, stochastic, bs)
+            spgp = BayesianGPLVMMiniBatch(Y, input_dim, X, X_variance,
                                          Z=Z, kernel=k, likelihood=l,
                                          inference_method=im, name=n,
                                          normalizer=normalizer,
                                          missing_data=md,
                                          stochastic=stochastic,
                                          batchsize=bs)
            spgp.kl_factr = 1./len(Ynames)
            spgp.unlink_parameter(spgp.Z)
            spgp.unlink_parameter(spgp.X)
            del spgp.Z
            del spgp.X
            spgp.Z = self.Z
@ -165,20 +172,10 @@ class MRD(BayesianGPLVMMiniBatch):
            self.logger.info('working on im <{}>'.format(hex(id(i))))
            self.Z.gradient[:] += b.full_values['Zgrad']
-            grad_dict = b.grad_dict
+            grad_dict = b.full_values
-            if isinstance(self.X, VariationalPosterior):
+            self.X.mean.gradient += grad_dict['meangrad']
-                dL_dmean, dL_dS = b.kern.gradients_qX_expectations(
+            self.X.variance.gradient += grad_dict['vargrad']
                        grad_dict['dL_dpsi0'],
                        grad_dict['dL_dpsi1'],
                        grad_dict['dL_dpsi2'],
                        self.Z, self.X)
                self.X.mean.gradient += dL_dmean
                self.X.variance.gradient += dL_dS
            else:
                #gradients wrt kernel
                self.X.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'], self.X, self.Z)
        if isinstance(self.X, VariationalPosterior):
            # update for the KL divergence
@ -238,7 +235,7 @@ class MRD(BayesianGPLVMMiniBatch):
                pass
            if axes is None:
                ax = fig.add_subplot(1, len(self.bgplvms), i + 1, sharex=sharex_ax, sharey=sharey_ax)
-            elif isinstance(axes, (tuple, list)):
+            elif isinstance(axes, (tuple, list, np.ndarray)):
                ax = axes[i]
            else:
                raise ValueError("Need one axes per latent dimension input_dim")
@ -286,7 +283,7 @@ class MRD(BayesianGPLVMMiniBatch):
            titles = [r'${}$'.format(name) for name in self.names]
        ymax = reduce(max, [np.ceil(max(g.kern.input_sensitivity())) for g in self.bgplvms])
        def plotf(i, g, ax):
-            ax.set_ylim([0,ymax])
+            #ax.set_ylim([0,ymax])
            return g.kern.plot_ARD(ax=ax, title=titles[i], *args, **kwargs)
        fig = self._handle_plotting(fignum, ax, plotf, sharex=sharex, sharey=sharey)
        return fig
--- a/GPy/models/sparse_gp_minibatch.py
+++ b/GPy/models/sparse_gp_minibatch.py
@ -56,6 +56,7 @@ Created on 3 Nov 2014
                raise NotImplementedError, "what to do what to do?"
            print "defaulting to ", inference_method, "for latent function inference"
        self.kl_factr = 1.
        self.Z = Param('inducing inputs', Z)
        self.num_inducing = Z.shape[0]
--- a/GPy/models/ss_gplvm.py
+++ b/GPy/models/ss_gplvm.py
@ -39,10 +39,7 @@ class SSGPLVM(SparseGP_MPI):
            X_variance = np.random.uniform(0,.1,X.shape)
        if Gamma is None:
-            gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
+            gamma = np.random.randn(X.shape[0], input_dim)
            gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim)
            gamma[gamma>1.-1e-9] = 1.-1e-9
            gamma[gamma<1e-9] = 1e-9
        else:
            gamma = Gamma.copy()
--- a/GPy/plotting/matplot_dep/dim_reduction_plots.py
+++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py
@ -162,7 +162,7 @@ def plot_latent(model, labels=None, which_indices=None,
        else:
            x = X[index, input_1]
            y = X[index, input_2]
-        ax.scatter(x, y, marker=m, s=s, c=Tango.nextMedium(), label=this_label, linewidth=.5, edgecolor='k', alpha=.9)
+        ax.scatter(x, y, marker=m, s=s, c=Tango.nextMedium(), label=this_label, linewidth=.2, edgecolor='k', alpha=.9)
    ax.set_xlabel('latent dimension %i' % input_1)
    ax.set_ylabel('latent dimension %i' % input_2)
--- a/GPy/util/caching.py
+++ b/GPy/util/caching.py
@ -1,5 +1,5 @@
-from ..core.parameterization.parameter_core import Observable
+from ..core.parameterization.observable import Observable
-import collections, weakref, logging
+import collections, weakref
 class Cacher(object):
    def __init__(self, operation, limit=5, ignore_args=(), force_kwargs=()):
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@ -328,105 +328,6 @@ def ppca(Y, Q, iterations=100):
        pass
    return np.asarray_chkfinite(exp_x), np.asarray_chkfinite(W)
 def ppca_missing_data_at_random(Y, Q, iters=100):
    """
    EM implementation of Probabilistic pca for when there is missing data.
    Taken from <SheffieldML, https://github.com/SheffieldML>
    .. math:
        \\mathbf{Y} = \mathbf{XW} + \\epsilon \\text{, where}
        \\epsilon = \\mathcal{N}(0, \\sigma^2 \mathbf{I})
    :returns: X, W, sigma^2
    """
    from numpy.ma import dot as madot
    import diag
    from GPy.util.subarray_and_sorting import common_subarrays
    import time
    debug = 1
    # Initialise W randomly
    N, D = Y.shape
    W = np.random.randn(Q, D) * 1e-3
    Y = np.ma.masked_invalid(Y, copy=1)
    nu = 1.
    #num_obs_i = 1./Y.count()
    Ycentered = Y - Y.mean(0)
    X = np.zeros((N,Q))
    cs = common_subarrays(Y.mask)
    cr = common_subarrays(Y.mask, 1)
    Sigma = np.zeros((N, Q, Q))
    Sigma2 = np.zeros((N, Q, Q))
    mu = np.zeros(D)
    """
    if debug:
        import matplotlib.pyplot as pylab
        fig = pylab.figure("FIT MISSING DATA");
        ax = fig.gca()
        ax.cla()
        lines = pylab.plot(np.zeros((N,Q)).dot(W))
    """
    W2 = np.zeros((Q,D))
    for i in range(iters):
 #         Sigma = np.linalg.solve(diag.add(madot(W,W.T), nu), diag.times(np.eye(Q),nu))
 #         exp_x = madot(madot(Ycentered, W.T),Sigma)/nu
 #         Ycentered = (Y - exp_x.dot(W).mean(0))
 #         #import ipdb;ipdb.set_trace()
 #         #Ycentered = mu
 #         W = np.linalg.solve(madot(exp_x.T,exp_x) + Sigma, madot(exp_x.T, Ycentered))
 #         nu = (((Ycentered - madot(exp_x, W))**2).sum(0) + madot(W.T,madot(Sigma,W)).sum(0)).sum()/N
        for csi, (mask, index) in enumerate(cs.iteritems()):
            mask = ~np.array(mask)
            Sigma2[index, :, :] = nu * np.linalg.inv(diag.add(W2[:,mask].dot(W2[:,mask].T), nu))
            #X[index,:] = madot((Sigma[csi]/nu),madot(W,Ycentered[index].T))[:,0]
        X2 = ((Sigma2/nu) * (madot(Ycentered,W2.T).base)[:,:,None]).sum(-1)
        mu2 = (Y - X.dot(W)).mean(0)
        for n in range(N):
            Sigma[n] = nu * np.linalg.inv(diag.add(W[:,~Y.mask[n]].dot(W[:,~Y.mask[n]].T), nu))
            X[n, :] = (Sigma[n]/nu).dot(W[:,~Y.mask[n]].dot(Ycentered[n,~Y.mask[n]].T))
        for d in range(D):
            mu[d] = (Y[~Y.mask[:,d], d] - X[~Y.mask[:,d]].dot(W[:, d])).mean()
        Ycentered = (Y - mu)
        nu3 = 0.
        for cri, (mask, index) in enumerate(cr.iteritems()):
            mask = ~np.array(mask)
            W2[:,index] = np.linalg.solve(X[mask].T.dot(X[mask]) + Sigma[mask].sum(0), madot(X[mask].T, Ycentered[mask,index]))[:,None]
            W2[:,index] = np.linalg.solve(X.T.dot(X) + Sigma.sum(0), madot(X.T, Ycentered[:,index]))
            #nu += (((Ycentered[mask,index] - X[mask].dot(W[:,index]))**2).sum(0) + W[:,index].T.dot(Sigma[mask].sum(0).dot(W[:,index])).sum(0)).sum()
            nu3 += (((Ycentered[index] - X.dot(W[:,index]))**2).sum(0) + W[:,index].T.dot(Sigma.sum(0).dot(W[:,index])).sum(0)).sum()
        nu3 /= N
        nu = 0.
        nu2 = 0.
        W = np.zeros((Q,D))
        for j in range(D):
            W[:,j] = np.linalg.solve(X[~Y.mask[:,j]].T.dot(X[~Y.mask[:,j]]) + Sigma[~Y.mask[:,j]].sum(0), madot(X[~Y.mask[:,j]].T, Ycentered[~Y.mask[:,j],j]))
            nu2f = np.tensordot(W[:,j].T, Sigma[~Y.mask[:,j],:,:], [0,1]).dot(W[:,j])
            nu2s = W[:,j].T.dot(Sigma[~Y.mask[:,j],:,:].sum(0).dot(W[:,j]))
            nu2 += (((Ycentered[~Y.mask[:,j],j] - X[~Y.mask[:,j],:].dot(W[:,j]))**2) + nu2f).sum()
            for i in range(N):
                if not Y.mask[i,j]:
                    nu += ((Ycentered[i,j] - X[i,:].dot(W[:,j]))**2) + W[:,j].T.dot(Sigma[i,:,:].dot(W[:,j]))
        nu /= N
        nu2 /= N
        nu4 = (((Ycentered - X.dot(W))**2).sum(0) + W.T.dot(Sigma.sum(0).dot(W)).sum(0)).sum()/N
        import ipdb;ipdb.set_trace()
        """
        if debug:
            #print Sigma[0]
            print "nu:", nu, "sum(X):", X.sum()
            pred_y = X.dot(W)
            for x, l in zip(pred_y.T, lines):
                l.set_ydata(x)
            ax.autoscale_view()
            ax.set_ylim(pred_y.min(), pred_y.max())
            fig.canvas.draw()
            time.sleep(.3)
        """
    return np.asarray_chkfinite(X), np.asarray_chkfinite(W), nu
 def tdot_numpy(mat, out=None):
    return np.dot(mat, mat.T, out)
--- a/setup.py
+++ b/setup.py
@ -20,6 +20,7 @@ setup(name = 'GPy',
      url = "http://sheffieldml.github.com/GPy/",
      packages = ["GPy.models",
                  "GPy.inference.optimization",
                  "GPy.inference.mcmc",
                  "GPy.inference",
                  "GPy.inference.latent_function_inference",
                  "GPy.likelihoods", "GPy.mappings",
@ -31,14 +32,20 @@ setup(name = 'GPy',
                  "GPy.plotting.matplot_dep.latent_space_visualizations",
                  "GPy.plotting.matplot_dep", "GPy.plotting"],
      package_dir={'GPy': 'GPy'},
-      package_data = {'GPy': ['defaults.cfg', 'installation.cfg', 'util/data_resources.json', 'util/football_teams.json']},
+      package_data = {'GPy': ['defaults.cfg', 'installation.cfg',
                              'util/data_resources.json',
                              'util/football_teams.json']},
      include_package_data = True,
      py_modules = ['GPy.__init__'],
      test_suite = 'GPy.testing',
      long_description=read('README.md'),
-      install_requires=['numpy>=1.8', 'scipy>=0.14'],
+      install_requires=['numpy>=1.7', 'scipy>=0.12'],
-      extras_require = {
+      extras_require = {'docs':['matplotlib >=1.3','Sphinx','IPython']},
-        'docs':['matplotlib>=1.4','Sphinx','ipython'],
+      classifiers=['License :: OSI Approved :: BSD License',
-      },
+                   'Natural Language :: English',
-      classifiers=[
+                   'Operating System :: MacOS :: MacOS X',
-      "License :: OSI Approved :: BSD License"],
+                   'Operating System :: Microsoft :: Windows',
-      zip_safe = False
+                   'Operating System :: POSIX :: Linux',
                   'Programming Language :: Python :: 2.7',
                   'Topic :: Scientific/Engineering :: Artificial Intelligence']
      )