Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
James Hensman 2014-11-13 11:18:30 +00:00
commit b5e3788d0a
19 changed files with 686 additions and 497 deletions

View file

@ -39,6 +39,11 @@ def load(file_path):
:param file_name: path/to/file.pickle
"""
import cPickle as pickle
with open(file_path, 'rb') as f:
m = pickle.load(f)
try:
with open(file_path, 'rb') as f:
m = pickle.load(f)
except:
import pickle as pickle
with open(file_path, 'rb') as f:
m = pickle.load(f)
return m

View file

@ -3,16 +3,12 @@
import numpy as np
import sys
import warnings
from .. import kern
from ..util.linalg import dtrtrs
from model import Model
from parameterization import ObsAr
from .. import likelihoods
from ..likelihoods.gaussian import Gaussian
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation, LatentFunctionInference
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
from parameterization.variational import VariationalPosterior
from scipy.sparse.base import issparse
import logging
from GPy.util.normalizer import MeanNorm
@ -115,7 +111,6 @@ class GP(Model):
if X is not None:
if self.X in self.parameters:
# LVM models
from ..core.parameterization.variational import VariationalPosterior
if isinstance(self.X, VariationalPosterior):
assert isinstance(X, type(self.X)), "The given X must have the same type as the X in the model!"
self.unlink_parameter(self.X)
@ -458,7 +453,11 @@ class GP(Model):
:param optimize: whether to optimize the location of new X (True by default)
:type optimize: boolean
:return: a tuple containing the posterior estimation of X and the model that optimize X
<<<<<<< HEAD
:rtype: (GPy.core.parameterization.variational.VariationalPosterior or numpy.ndarray, GPy.core.Model)
=======
:rtype: (:class:`~GPy.core.parameterization.variational.VariationalPosterior` or numpy.ndarray, :class:`~GPy.core.model.Model`)
>>>>>>> 22d30d9d39c70f806fe5bcb815cce9c8eb0f8dca
"""
from ..inference.latent_function_inference.inferenceX import infer_newX
return infer_newX(self, Y_new, optimize=optimize)

View file

@ -238,6 +238,10 @@ class Model(Parameterized):
if self.size == 0:
print 'nothing to optimize'
if not self.update_model():
print "setting updates on again"
self.update_model(True)
if start == None:
start = self.optimizer_array
@ -279,10 +283,10 @@ class Model(Parameterized):
Note:-
The gradient is considered correct if the ratio of the analytical
and numerical gradients is within <tolerance> of unity.
The *dF_ratio* indicates the limit of numerical accuracy of numerical gradients.
If it is too small, e.g., smaller than 1e-12, the numerical gradients are usually
not accurate enough for the tests (shown with blue).
The *dF_ratio* indicates the limit of numerical accuracy of numerical gradients.
If it is too small, e.g., smaller than 1e-12, the numerical gradients are usually
not accurate enough for the tests (shown with blue).
"""
x = self.optimizer_array.copy()

View file

@ -0,0 +1,69 @@
'''
Created on 30 Oct 2014
@author: maxz
'''
class Observable(object):
"""
Observable pattern for parameterization.
This Object allows for observers to register with self and a (bound!) function
as an observer. Every time the observable changes, it sends a notification with
self as only argument to all its observers.
"""
def __init__(self, *args, **kwargs):
super(Observable, self).__init__()
from lists_and_dicts import ObserverList
self.observers = ObserverList()
def add_observer(self, observer, callble, priority=0):
"""
Add an observer `observer` with the callback `callble`
and priority `priority` to this observers list.
"""
self.observers.add(priority, observer, callble)
def remove_observer(self, observer, callble=None):
"""
Either (if callble is None) remove all callables,
which were added alongside observer,
or remove callable `callble` which was added alongside
the observer `observer`.
"""
to_remove = []
for poc in self.observers:
_, obs, clble = poc
if callble is not None:
if (obs is observer) and (callble == clble):
to_remove.append(poc)
else:
if obs is observer:
to_remove.append(poc)
for r in to_remove:
self.observers.remove(*r)
def notify_observers(self, which=None, min_priority=None):
"""
Notifies all observers. Which is the element, which kicked off this
notification loop. The first argument will be self, the second `which`.
NOTE: notifies only observers with priority p > min_priority!
^^^^^^^^^^^^^^^^
:param min_priority: only notify observers with priority > min_priority
if min_priority is None, notify all observers in order
"""
if which is None:
which = self
if min_priority is None:
[callble(self, which=which) for _, _, callble in self.observers]
else:
for p, _, callble in self.observers:
if p <= min_priority:
break
callble(self, which=which)
def change_priority(self, observer, callble, priority):
self.remove_observer(observer, callble)
self.add_observer(observer, callble, priority)

View file

@ -1,10 +1,11 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
__updated__ = '2014-10-29'
__updated__ = '2014-11-11'
import numpy as np
from parameter_core import Observable, Pickleable
from parameter_core import Pickleable
from observable import Observable
class ObsAr(np.ndarray, Pickleable, Observable):
"""

View file

@ -17,6 +17,7 @@ from transformations import Transformation,Logexp, NegativeLogexp, Logistic, __f
import numpy as np
import re
import logging
from updateable import Updateable
class HierarchyError(Exception):
"""
@ -40,115 +41,6 @@ def adjust_name_for_printing(name):
return ''
class Observable(object):
"""
Observable pattern for parameterization.
This Object allows for observers to register with self and a (bound!) function
as an observer. Every time the observable changes, it sends a notification with
self as only argument to all its observers.
"""
_updates = True
def __init__(self, *args, **kwargs):
super(Observable, self).__init__()
from lists_and_dicts import ObserverList
self.observers = ObserverList()
@property
def updates(self):
raise DeprecationWarning("updates is now a function, see update(True|False|None)")
@updates.setter
def updates(self, ups):
raise DeprecationWarning("updates is now a function, see update(True|False|None)")
def update_model(self, updates=None):
"""
Get or set, whether automatic updates are performed. When updates are
off, the model might be in a non-working state. To make the model work
turn updates on again.
:param bool|None updates:
bool: whether to do updates
None: get the current update state
"""
if updates is None:
p = getattr(self, '_highest_parent_', None)
if p is not None:
self._updates = p._updates
return self._updates
assert isinstance(updates, bool), "updates are either on (True) or off (False)"
p = getattr(self, '_highest_parent_', None)
if p is not None:
p._updates = updates
else:
self._updates = updates
self.trigger_update()
def toggle_update(self):
self.update_model(not self.update())
def trigger_update(self, trigger_parent=True):
"""
Update the model from the current state.
Make sure that updates are on, otherwise this
method will do nothing
:param bool trigger_parent: Whether to trigger the parent, after self has updated
"""
if not self.update_model() or (hasattr(self, "_in_init_") and self._in_init_):
#print "Warning: updates are off, updating the model will do nothing"
return
self._trigger_params_changed(trigger_parent)
def add_observer(self, observer, callble, priority=0):
"""
Add an observer `observer` with the callback `callble`
and priority `priority` to this observers list.
"""
self.observers.add(priority, observer, callble)
def remove_observer(self, observer, callble=None):
"""
Either (if callble is None) remove all callables,
which were added alongside observer,
or remove callable `callble` which was added alongside
the observer `observer`.
"""
to_remove = []
for poc in self.observers:
_, obs, clble = poc
if callble is not None:
if (obs is observer) and (callble == clble):
to_remove.append(poc)
else:
if obs is observer:
to_remove.append(poc)
for r in to_remove:
self.observers.remove(*r)
def notify_observers(self, which=None, min_priority=None):
"""
Notifies all observers. Which is the element, which kicked off this
notification loop. The first argument will be self, the second `which`.
NOTE: notifies only observers with priority p > min_priority!
^^^^^^^^^^^^^^^^
:param min_priority: only notify observers with priority > min_priority
if min_priority is None, notify all observers in order
"""
if not self.update_model():
return
if which is None:
which = self
if min_priority is None:
[callble(self, which=which) for _, _, callble in self.observers]
else:
for p, _, callble in self.observers:
if p <= min_priority:
break
callble(self, which=which)
class Parentable(object):
"""
@ -307,11 +199,11 @@ class Gradcheckable(Pickleable, Parentable):
:param float step: the stepsize for the numerical three point gradient estimate.
:param float tolerance: the tolerance for the gradient ratio or difference.
:param float df_tolerance: the tolerance for df_tolerance
Note:-
The *dF_ratio* indicates the limit of accuracy of numerical gradients.
If it is too small, e.g., smaller than 1e-12, the numerical gradients
are usually not accurate enough for the tests (shown with blue).
The *dF_ratio* indicates the limit of accuracy of numerical gradients.
If it is too small, e.g., smaller than 1e-12, the numerical gradients
are usually not accurate enough for the tests (shown with blue).
"""
if self.has_parent():
return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance, df_tolerance=df_tolerance)
@ -363,7 +255,7 @@ class Nameable(Gradcheckable):
return adjust(self.name)
class Indexable(Nameable, Observable):
class Indexable(Nameable, Updateable):
"""
Make an object constrainable with Priors and Transformations.
TODO: Mappings!!
@ -726,7 +618,7 @@ class OptimizationHandlable(Indexable):
fixes[self.constraints[__fixed__]] = FIXED
return self._optimizer_copy_[np.logical_and(fixes, self._highest_parent_.tie.getTieFlag(self))]
elif self._has_fixes():
return self._optimizer_copy_[self._fixes_]
return self._optimizer_copy_[self._fixes_]
self._optimizer_copy_transformed = True
@ -757,7 +649,7 @@ class OptimizationHandlable(Indexable):
#self._highest_parent_.tie.propagate_val()
self._optimizer_copy_transformed = False
self._trigger_params_changed()
self.trigger_update()
def _get_params_transformed(self):
raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
@ -1117,3 +1009,29 @@ class Parameterizable(OptimizationHandlable):
updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer``
"""
pass
def save(self, filename, ftype='HDF5'):
"""
Save all the model parameters into a file (HDF5 by default).
"""
from . import Param
from ...util.misc import param_to_array
def gather_params(self, plist):
if isinstance(self,Param):
plist.append(self)
plist = []
self.traverse(gather_params, plist)
names = self.parameter_names(adjust_for_printing=True)
if ftype=='HDF5':
try:
import h5py
f = h5py.File(filename,'w')
for p,n in zip(plist,names):
n = n.replace('.','_')
p = param_to_array(p)
d = f.create_dataset(n,p.shape,dtype=p.dtype)
d[:] = p
f.close()
except:
raise 'Fails to write the parameters into a HDF5 file!'

View file

@ -131,6 +131,13 @@ class NormalTheta(Transformation):
def __str__(self):
return "theta"
def __getstate__(self):
return [self.mu_indices, self.var_indices]
def __setstate__(self, state):
self.mu_indices = state[0]
self.var_indices = state[1]
class NormalNaturalAntti(NormalTheta):
_instances = []
_logexp = Logexp()

View file

@ -0,0 +1,63 @@
'''
Created on 11 Nov 2014
@author: maxz
'''
from observable import Observable
class Updateable(Observable):
"""
A model can be updated or not.
Make sure updates can be switched on and off.
"""
_updates = True
def __init__(self, *args, **kwargs):
super(Updateable, self).__init__(*args, **kwargs)
@property
def updates(self):
raise DeprecationWarning("updates is now a function, see update(True|False|None)")
@updates.setter
def updates(self, ups):
raise DeprecationWarning("updates is now a function, see update(True|False|None)")
def update_model(self, updates=None):
"""
Get or set, whether automatic updates are performed. When updates are
off, the model might be in a non-working state. To make the model work
turn updates on again.
:param bool|None updates:
bool: whether to do updates
None: get the current update state
"""
if updates is None:
p = getattr(self, '_highest_parent_', None)
if p is not None:
self._updates = p._updates
return self._updates
assert isinstance(updates, bool), "updates are either on (True) or off (False)"
p = getattr(self, '_highest_parent_', None)
if p is not None:
p._updates = updates
self._updates = updates
self.trigger_update()
def toggle_update(self):
self.update_model(not self.update_model())
def trigger_update(self, trigger_parent=True):
"""
Update the model from the current state.
Make sure that updates are on, otherwise this
method will do nothing
:param bool trigger_parent: Whether to trigger the parent, after self has updated
"""
if not self.update_model() or (hasattr(self, "_in_init_") and self._in_init_):
#print "Warning: updates are off, updating the model will do nothing"
return
self._trigger_params_changed(trigger_parent)

View file

@ -8,6 +8,8 @@ import numpy as np
from parameterized import Parameterized
from param import Param
from transformations import Logexp, Logistic,__fixed__
from GPy.util.misc import param_to_array
from GPy.util.caching import Cache_this
class VariationalPrior(Parameterized):
def __init__(self, name='latent space', **kw):
@ -48,7 +50,8 @@ class SpikeAndSlabPrior(VariationalPrior):
def KL_divergence(self, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
gamma,gamma1 = variational_posterior.gamma_probabilities()
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
@ -57,20 +60,21 @@ class SpikeAndSlabPrior(VariationalPrior):
var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S))
var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()
var_gamma = (gamma*(log_gamma-np.log(pi))).sum()+(gamma1*(log_gamma1-np.log(1-pi))).sum()
return var_gamma+ (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2.
def update_gradients_KL(self, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
gamma,gamma1 = variational_posterior.gamma_probabilities()
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
else:
pi = self.pi
gamma.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
variational_posterior.binary_prob.gradient -= (np.log((1-pi)/pi)+log_gamma-log_gamma1+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.)*gamma*gamma1
mu.gradient -= gamma*mu/self.variance
S.gradient -= (1./self.variance - 1./S) * gamma /2.
if self.learnPi:
@ -158,8 +162,24 @@ class SpikeAndSlabPosterior(VariationalPosterior):
binary_prob : the probability of the distribution on the slab part.
"""
super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
self.gamma = Param("binary_prob",binary_prob, Logistic(1e-10,1.-1e-10))
self.gamma = Param("binary_prob",binary_prob)
self.link_parameter(self.gamma)
@Cache_this(limit=5)
def gamma_probabilities(self):
prob = np.zeros_like(param_to_array(self.gamma))
prob[self.gamma>-710] = 1./(1.+np.exp(-self.gamma[self.gamma>-710]))
prob1 = np.zeros_like(param_to_array(self.gamma))
prob1[self.gamma<710] = 1./(1.+np.exp(self.gamma[self.gamma<710]))
return prob, prob1
@Cache_this(limit=5)
def gamma_log_prob(self):
loggamma = param_to_array(self.gamma).copy()
loggamma[loggamma>-40] = -np.log1p(np.exp(-loggamma[loggamma>-40]))
loggamma1 = param_to_array(self.gamma).copy()
loggamma1[loggamma1<40] = -np.log1p(np.exp(loggamma1[loggamma1<40]))
return loggamma,loggamma1
def set_gradients(self, grad):
self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as _np
#default_seed = _np.random.seed(123344)
# default_seed = _np.random.seed(123344)
def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan=False):
"""
@ -28,7 +28,7 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T
# k = GPy.kern.RBF_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
#k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.RBF(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.RBF(input_dim, .3, _np.ones(input_dim) * .2, ARD=True)
# k = GPy.kern.RBF(input_dim, .5, 2., ARD=0) + GPy.kern.RBF(input_dim, .3, .2, ARD=0)
@ -40,30 +40,30 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan
if nan:
m.inference_method = GPy.inference.latent_function_inference.var_dtc.VarDTCMissingData()
m.Y[_np.random.binomial(1,p,size=(Y.shape)).astype(bool)] = _np.nan
m.Y[_np.random.binomial(1, p, size=(Y.shape)).astype(bool)] = _np.nan
m.parameters_changed()
#===========================================================================
# randomly obstruct data with percentage p
#===========================================================================
#m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing)
#m.lengthscales = lengthscales
# m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing)
# m.lengthscales = lengthscales
if plot:
import matplotlib.pyplot as pb
m.plot()
pb.title('PCA initialisation')
#m2.plot()
#pb.title('PCA initialisation')
# m2.plot()
# pb.title('PCA initialisation')
if optimize:
m.optimize('scg', messages=verbose)
#m2.optimize('scg', messages=verbose)
# m2.optimize('scg', messages=verbose)
if plot:
m.plot()
pb.title('After optimisation')
#m2.plot()
#pb.title('After optimisation')
# m2.plot()
# pb.title('After optimisation')
return m
@ -169,7 +169,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
data = GPy.util.datasets.oil()
kernel = GPy.kern.RBF(Q, 1., 1./_np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2))
kernel = GPy.kern.RBF(Q, 1., 1. / _np.random.uniform(0, 1, (Q,)), ARD=True) # + GPy.kern.Bias(Q, _np.exp(-2))
Y = data['X'][:N]
m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k)
m.data_labels = data['Y'][:N].argmax(axis=1)
@ -180,8 +180,8 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
if plot:
fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
m.plot_latent(ax=latent_axes, labels=m.data_labels)
data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0,:]))
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean.values[0:1,:], # @UnusedVariable
data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0, :]))
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean.values[0:1, :], # @UnusedVariable
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels)
raw_input('Press enter to finish')
plt.close(fig)
@ -195,7 +195,7 @@ def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40
_np.random.seed(0)
data = pods.datasets.oil()
kernel = GPy.kern.RBF(Q, 1., 1./_np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2))
kernel = GPy.kern.RBF(Q, 1., 1. / _np.random.uniform(0, 1, (Q,)), ARD=True) # + GPy.kern.Bias(Q, _np.exp(-2))
Y = data['X'][:N]
m = GPy.models.SSGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k)
m.data_labels = data['Y'][:N].argmax(axis=1)
@ -206,8 +206,8 @@ def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40
if plot:
fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
m.plot_latent(ax=latent_axes, labels=m.data_labels)
data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0,:]))
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean.values[0:1,:], # @UnusedVariable
data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0, :]))
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean.values[0:1, :], # @UnusedVariable
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels)
raw_input('Press enter to finish')
plt.close(fig)
@ -219,10 +219,12 @@ def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False):
import numpy as np
np.random.seed(3000)
k = GPy.kern.Matern32(Q_signal, 10., lengthscale=1+(np.random.uniform(1,6,Q_signal)), ARD=1)
t = np.c_[[np.linspace(-1,5,N) for _ in range(Q_signal)]].T
k = GPy.kern.Matern32(Q_signal, 1., lengthscale=(np.random.uniform(1, 6, Q_signal)), ARD=1)
for i in range(Q_signal):
k += GPy.kern.PeriodicExponential(1, variance=1., active_dims=[i], period=3., lower=-2, upper=6)
t = np.c_[[np.linspace(-1, 5, N) for _ in range(Q_signal)]].T
K = k.K(t)
s2, s1, s3, sS = np.random.multivariate_normal(np.zeros(K.shape[0]), K, size=(4))[:,:,None]
s2, s1, s3, sS = np.random.multivariate_normal(np.zeros(K.shape[0]), K, size=(4))[:, :, None]
Y1, Y2, Y3, S1, S2, S3 = _generate_high_dimensional_output(D1, D2, D3, s1, s2, s3, sS)
@ -243,7 +245,7 @@ def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False):
ax.legend()
for i, Y in enumerate(Ylist):
ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i)
ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable
ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable
ax.set_title("Y{}".format(i + 1))
plt.draw()
plt.tight_layout()
@ -255,7 +257,7 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim=False):
x = _np.linspace(0, 4 * _np.pi, N)[:, None]
s1 = _np.vectorize(lambda x: _np.sin(x))
s2 = _np.vectorize(lambda x: _np.cos(x)**2)
s2 = _np.vectorize(lambda x: _np.cos(x) ** 2)
s3 = _np.vectorize(lambda x:-_np.exp(-_np.cos(2 * x)))
sS = _np.vectorize(lambda x: _np.cos(x))
@ -288,7 +290,7 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim=False):
ax.legend()
for i, Y in enumerate(Ylist):
ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i)
ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable
ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable
ax.set_title("Y{}".format(i + 1))
plt.draw()
plt.tight_layout()
@ -323,10 +325,10 @@ def bgplvm_simulation(optimize=True, verbose=1,
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 3, 9
_, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
Y = Ylist[0]
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
#k = kern.RBF(Q, ARD=True, lengthscale=10.)
k = kern.Linear(Q, ARD=True) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
# k = kern.RBF(Q, ARD=True, lengthscale=10.)
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
m.X.variance[:] = _np.random.uniform(0, .01, m.X.shape)
m.likelihood.variance = .1
if optimize:
@ -348,10 +350,10 @@ def ssgplvm_simulation(optimize=True, verbose=1,
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 3, 9
_, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
Y = Ylist[0]
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
#k = kern.RBF(Q, ARD=True, lengthscale=10.)
k = kern.Linear(Q, ARD=True) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
# k = kern.RBF(Q, ARD=True, lengthscale=10.)
m = SSGPLVM(Y, Q, init="pca", num_inducing=num_inducing, kernel=k)
m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
m.X.variance[:] = _np.random.uniform(0, .01, m.X.shape)
m.likelihood.variance = .1
if optimize:
@ -365,7 +367,7 @@ def ssgplvm_simulation(optimize=True, verbose=1,
def bgplvm_simulation_missing_data(optimize=True, verbose=1,
plot=True, plot_sim=False,
max_iters=2e4,
max_iters=2e4, percent_missing=.1,
):
from GPy import kern
from GPy.models.bayesian_gplvm_minibatch import BayesianGPLVMMiniBatch
@ -373,9 +375,9 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
_, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
Y = Ylist[0]
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
k = kern.Linear(Q, ARD=True) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
inan = _np.random.binomial(1, .2, size=Y.shape).astype(bool) # 80% missing data
inan = _np.random.binomial(1, percent_missing, size=Y.shape).astype(bool) # 80% missing data
Ymissing = Y.copy()
Ymissing[inan] = _np.nan
@ -401,11 +403,11 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
_, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
#Ylist = [Ylist[0]]
# Ylist = [Ylist[0]]
k = kern.Linear(Q, ARD=True)
m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, initx="PCA_concat", initz='permute', **kw)
m['.*noise'] = [Y.var()/40. for Y in Ylist]
m['.*noise'] = [Y.var() / 40. for Y in Ylist]
if optimize:
print "Optimizing Model:"
@ -422,7 +424,7 @@ def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim
D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
_, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
#Ylist = [Ylist[0]]
# Ylist = [Ylist[0]]
k = kern.Linear(Q, ARD=True)
inanlist = []
@ -553,7 +555,7 @@ def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True):
data = pods.datasets.osu_run1()
# optimize
back_kernel=GPy.kern.RBF(data['Y'].shape[1], lengthscale=5.)
back_kernel = GPy.kern.RBF(data['Y'].shape[1], lengthscale=5.)
mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel)
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
@ -563,7 +565,7 @@ def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True):
y = m.likelihood.Y[0, :]
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
#raw_input('Press enter to finish')
# raw_input('Press enter to finish')
return m
@ -627,7 +629,7 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose
Y = data['Y']
Y_mean = Y.mean(0)
Y_std = Y.std(0)
m = GPy.models.GPLVM((Y-Y_mean)/Y_std, 2)
m = GPy.models.GPLVM((Y - Y_mean) / Y_std, 2)
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
if plot:
@ -651,17 +653,17 @@ def ssgplvm_simulation_linear():
x = np.empty(Q)
dies = np.random.rand(Q)
for q in xrange(Q):
if dies[q]<pi:
if dies[q] < pi:
x[q] = np.random.randn()
else:
x[q] = 0.
return x
Y = np.empty((N,D))
X = np.empty((N,Q))
Y = np.empty((N, D))
X = np.empty((N, Q))
# Generate data from random sampled weight matrices
for n in xrange(N):
X[n] = sample_X(Q,pi)
w = np.random.randn(D,Q)
Y[n] = np.dot(w,X[n])
X[n] = sample_X(Q, pi)
w = np.random.randn(D, Q)
Y[n] = np.dot(w, X[n])

View file

@ -7,193 +7,388 @@ The package for the psi statistics computation
import numpy as np
def psicomputations(variance, lengthscale, Z, variational_posterior):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# _psi1 NxM
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
try:
from scipy import weave
def _psicomputations(variance, lengthscale, Z, variational_posterior):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# _psi1 NxM
mu = variational_posterior.mean
S = variational_posterior.variance
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
l2 = np.square(lengthscale)
log_denom1 = np.log(S/l2+1)
log_denom2 = np.log(2*S/l2+1)
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
variance = float(variance)
psi0 = np.empty(N)
psi0[:] = variance
psi1 = np.empty((N,M))
psi2n = np.empty((N,M,M))
from ....util.misc import param_to_array
S = param_to_array(S)
mu = param_to_array(mu)
Z = param_to_array(Z)
support_code = """
#include <math.h>
"""
code = """
for(int n=0; n<N; n++) {
for(int m1=0;m1<M;m1++) {
double log_psi1=0;
for(int m2=0;m2<=m1;m2++) {
double log_psi2_n=0;
for(int q=0;q<Q;q++) {
double Snq = S(n,q);
double lq = l2(q);
double Zm1q = Z(m1,q);
double Zm2q = Z(m2,q);
if(m2==0) {
// Compute Psi_1
double muZ = mu(n,q)-Z(m1,q);
double psi1_exp1 = log_gamma(n,q) - (muZ*muZ/(Snq+lq) +log_denom1(n,q))/2.;
double psi1_exp2 = log_gamma1(n,q) -Zm1q*Zm1q/(2.*lq);
log_psi1 += (psi1_exp1>psi1_exp2)?psi1_exp1+log1p(exp(psi1_exp2-psi1_exp1)):psi1_exp2+log1p(exp(psi1_exp1-psi1_exp2));
}
// Compute Psi_2
double muZhat = mu(n,q) - (Zm1q+Zm2q)/2.;
double Z2 = Zm1q*Zm1q+ Zm2q*Zm2q;
double dZ = Zm1q - Zm2q;
double psi2_exp1 = dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q);
double psi2_exp2 = log_gamma1(n,q) - Z2/(2.*lq);
log_psi2_n += (psi2_exp1>psi2_exp2)?psi2_exp1+log1p(exp(psi2_exp2-psi2_exp1)):psi2_exp2+log1p(exp(psi2_exp1-psi2_exp2));
}
double exp_psi2_n = exp(log_psi2_n);
psi2n(n,m1,m2) = variance*variance*exp_psi2_n;
if(m1!=m2) { psi2n(n,m2,m1) = variance*variance*exp_psi2_n;}
}
psi1(n,m1) = variance*exp(log_psi1);
}
}
"""
weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz)
psi2 = psi2n.sum(axis=0)
return psi0,psi1,psi2,psi2n
from GPy.util.caching import Cacher
psicomputations = Cacher(_psicomputations, limit=1)
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)
_,psi1,_,psi2n = psicomputations(variance, lengthscale, Z, variational_posterior)
mu = variational_posterior.mean
S = variational_posterior.variance
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
l2 = np.square(lengthscale)
log_denom1 = np.log(S/l2+1)
log_denom2 = np.log(2*S/l2+1)
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
gamma, gamma1 = variational_posterior.gamma_probabilities()
variance = float(variance)
dvar = np.zeros(1)
dmu = np.zeros((N,Q))
dS = np.zeros((N,Q))
dgamma = np.zeros((N,Q))
dl = np.zeros(Q)
dZ = np.zeros((M,Q))
dvar += np.sum(dL_dpsi0)
from ....util.misc import param_to_array
S = param_to_array(S)
mu = param_to_array(mu)
Z = param_to_array(Z)
support_code = """
#include <math.h>
"""
code = """
for(int n=0; n<N; n++) {
for(int m1=0;m1<M;m1++) {
double log_psi1=0;
for(int m2=0;m2<M;m2++) {
double log_psi2_n=0;
for(int q=0;q<Q;q++) {
double Snq = S(n,q);
double lq = l2(q);
double Zm1q = Z(m1,q);
double Zm2q = Z(m2,q);
double gnq = gamma(n,q);
double g1nq = gamma1(n,q);
double mu_nq = mu(n,q);
if(m2==0) {
// Compute Psi_1
double lpsi1 = psi1(n,m1)*dL_dpsi1(n,m1);
if(q==0) {dvar(0) += lpsi1/variance;}
double Zmu = Zm1q - mu_nq;
double denom = Snq+lq;
double Zmu2_denom = Zmu*Zmu/denom;
double exp1 = log_gamma(n,q)-(Zmu*Zmu/(Snq+lq)+log_denom1(n,q))/(2.);
double exp2 = log_gamma1(n,q)-Zm1q*Zm1q/(2.*lq);
double d_exp1,d_exp2;
if(exp1>exp2) {
d_exp1 = 1.;
d_exp2 = exp(exp2-exp1);
} else {
d_exp1 = exp(exp1-exp2);
d_exp2 = 1.;
}
double exp_sum = d_exp1+d_exp2;
dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum);
dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.;
dgamma(n,q) += lpsi1*(d_exp1*g1nq-d_exp2*gnq)/exp_sum;
dl(q) += lpsi1*((Zmu2_denom+Snq/lq)/denom*d_exp1+Zm1q*Zm1q/(lq*lq)*d_exp2)/(2.*exp_sum);
dZ(m1,q) += lpsi1*(-Zmu/denom*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
}
// Compute Psi_2
double lpsi2 = psi2n(n,m1,m2)*dL_dpsi2(m1,m2);
if(q==0) {dvar(0) += lpsi2*2/variance;}
double dZm1m2 = Zm1q - Zm2q;
double Z2 = Zm1q*Zm1q+Zm2q*Zm2q;
double muZhat = mu_nq - (Zm1q + Zm2q)/2.;
double denom = 2.*Snq+lq;
double muZhat2_denom = muZhat*muZhat/denom;
double exp1 = dZm1m2*dZm1m2/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q);
double exp2 = log_gamma1(n,q) - Z2/(2.*lq);
double d_exp1,d_exp2;
if(exp1>exp2) {
d_exp1 = 1.;
d_exp2 = exp(exp2-exp1);
} else {
d_exp1 = exp(exp1-exp2);
d_exp2 = 1.;
}
double exp_sum = d_exp1+d_exp2;
dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum;
dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum;
dgamma(n,q) += lpsi2*(d_exp1*g1nq-d_exp2*gnq)/exp_sum;
dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum;
dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
}
}
}
}
"""
weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','gamma1','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz)
dl *= 2.*lengthscale
if not ARD:
dl = dl.sum()
return dvar, dl, dZ, dmu, dS, dgamma
except:
def psicomputations(variance, lengthscale, Z, variational_posterior):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# _psi1 NxM
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
psi0 = np.empty(mu.shape[0])
psi0[:] = variance
psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma)
psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma)
return psi0, psi1, psi2
psi0 = np.empty(mu.shape[0])
psi0[:] = variance
psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma)
psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma)
return psi0, psi1, psi2
def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1
# Produced intermediate results:
# _psi1 NxM
lengthscale2 = np.square(lengthscale)
# psi1
_psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ
_psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ
_psi1_exponent1 = np.log(gamma[:,None,:]) - (_psi1_dist_sq + np.log(_psi1_denom))/2. # NxMxQ
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) - (np.square(Z[None,:,:])/lengthscale2)/2. # NxMxQ
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
return _psi1
def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi2
# Produced intermediate results:
# _psi2 MxM
def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1
# Produced intermediate results:
# _psi1 NxM
lengthscale2 = np.square(lengthscale)
lengthscale2 = np.square(lengthscale)
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
# psi2
_psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom)
_psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
return _psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)
# psi1
_psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ
_psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ
_psi1_exponent1 = np.log(gamma[:,None,:]) - (_psi1_dist_sq + np.log(_psi1_denom))/2. # NxMxQ
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) - (np.square(Z[None,:,:])/lengthscale2)/2. # NxMxQ
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
return _psi1
dL_dlengscale = dl_psi1 + dl_psi2
if not ARD:
dL_dlengscale = dL_dlengscale.sum()
dL_dgamma = dgamma_psi1 + dgamma_psi2
dL_dmu = dmu_psi1 + dmu_psi2
dL_dS = dS_psi1 + dS_psi2
dL_dZ = dZ_psi1 + dZ_psi2
def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi2
# Produced intermediate results:
# _psi2 MxM
lengthscale2 = np.square(lengthscale)
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma):
"""
dL_dpsi1 - NxM
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1
# Produced intermediate results: dL_dparams w.r.t. psi1
# _dL_dvariance 1
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
# psi2
_psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom)
_psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
lengthscale2 = np.square(lengthscale)
# psi1
_psi1_denom = S / lengthscale2 + 1. # NxQ
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom[:,None,:]) # NxMxQ
_psi1_common = gamma / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #NxQ
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom[:, None,:])) # NxMxQ
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
_psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
_dL_dvariance = np.einsum('nm,nm->',dL_dpsi1, _psi1)/variance # 1
_dL_dgamma = np.einsum('nm,nmq,nmq->nq',dL_dpsi1, _psi1_q, (_psi1_exp_dist_sq/_psi1_denom_sqrt[:,None,:]-_psi1_exp_Z)) # NxQ
_dL_dmu = np.einsum('nm, nmq, nmq, nmq, nq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_dist,_psi1_common) # NxQ
_dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ
_dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z))
_dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z))
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
dL_dpsi2 - MxM
"""
# here are the "statistics" for psi2
# Produced the derivatives w.r.t. psi2:
# _dL_dvariance 1
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
return _psi2
lengthscale2 = np.square(lengthscale)
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
dL_dlengscale = dl_psi1 + dl_psi2
if not ARD:
dL_dlengscale = dL_dlengscale.sum()
dL_dgamma = dgamma_psi1 + dgamma_psi2
dL_dmu = dmu_psi1 + dmu_psi2
dL_dS = dS_psi1 + dS_psi2
dL_dZ = dZ_psi1 + dZ_psi2
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
# psi2
_psi2_denom = 2.*S / lengthscale2 + 1. # NxQ
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom[:,None,None,:])
_psi2_common = gamma/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # NxQ
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom[:,None,None,:])+np.log(gamma[:,None,None,:]) #N,M,M,Q
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
_psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
_dL_dvariance = np.einsum('mo,mo->',dL_dpsi2,_psi2)*2./variance
_dL_dgamma = np.einsum('mo,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,(_psi2_exp_dist_sq/_psi2_denom_sqrt[:,None,None,:] - _psi2_exp_Z))
_dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq)
_dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq)
_dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z))
_dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z))
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma):
"""
dL_dpsi1 - NxM
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1
# Produced intermediate results: dL_dparams w.r.t. psi1
# _dL_dvariance 1
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale)
# psi1
_psi1_denom = S / lengthscale2 + 1. # NxQ
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom[:,None,:]) # NxMxQ
_psi1_common = gamma / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #NxQ
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom[:, None,:])) # NxMxQ
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
_psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
_dL_dvariance = np.einsum('nm,nm->',dL_dpsi1, _psi1)/variance # 1
_dL_dgamma = np.einsum('nm,nmq,nmq->nq',dL_dpsi1, _psi1_q, (_psi1_exp_dist_sq/_psi1_denom_sqrt[:,None,:]-_psi1_exp_Z)) # NxQ
_dL_dmu = np.einsum('nm, nmq, nmq, nmq, nq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_dist,_psi1_common) # NxQ
_dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ
_dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z))
_dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z))
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
dL_dpsi2 - MxM
"""
# here are the "statistics" for psi2
# Produced the derivatives w.r.t. psi2:
# _dL_dvariance 1
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale)
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
# psi2
_psi2_denom = 2.*S / lengthscale2 + 1. # NxQ
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom[:,None,None,:])
_psi2_common = gamma/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # NxQ
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom[:,None,None,:])+np.log(gamma[:,None,None,:]) #N,M,M,Q
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
_psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
_dL_dvariance = np.einsum('mo,mo->',dL_dpsi2,_psi2)*2./variance
_dL_dgamma = np.einsum('mo,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,(_psi2_exp_dist_sq/_psi2_denom_sqrt[:,None,None,:] - _psi2_exp_Z))
_dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq)
_dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq)
_dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z))
_dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z))
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma

View file

@ -54,6 +54,8 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
self.variational_prior = NormalPrior()
X = NormalPosterior(X, X_variance)
self.kl_factr = 1.
if inference_method is None:
from ..inference.latent_function_inference.var_dtc import VarDTC
self.logger.debug("creating inference_method var_dtc")
@ -81,18 +83,19 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None):
posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices)
kl_fctr = 1.
current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations(
variational_posterior=X,
Z=Z, dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=grad_dict['dL_dpsi1'],
dL_dpsi2=grad_dict['dL_dpsi2'])
kl_fctr = self.kl_factr
if self.missing_data:
d = self.output_dim
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)/d
else:
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)
current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations(
variational_posterior=X,
Z=Z, dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=grad_dict['dL_dpsi1'],
dL_dpsi2=grad_dict['dL_dpsi2'])
# Subsetting Variational Posterior objects, makes the gradients
# empty. We need them to be 0 though:

View file

@ -100,12 +100,11 @@ class MRD(BayesianGPLVMMiniBatch):
self.logger.info("building kernels")
if kernel is None:
from ..kern import RBF
kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i]) for i in range(len(Ylist))]
kernels = [RBF(input_dim, ARD=1, lengthscale=1./fracs[i]) for i in range(len(Ylist))]
elif isinstance(kernel, Kern):
kernels = []
for i in range(len(Ylist)):
k = kernel.copy()
print k is kernel, k.observers, k.constraints
kernels.append(k)
else:
assert len(kernel) == len(Ylist), "need one kernel per output"
@ -144,8 +143,16 @@ class MRD(BayesianGPLVMMiniBatch):
for i, n, k, l, Y, im, bs in itertools.izip(itertools.count(), Ynames, kernels, likelihoods, Ylist, self.inference_method, batchsize):
assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another"
md = np.isnan(Y).any()
spgp = SparseGPMiniBatch(self.X, Y, Z, k, l, im, n, None, normalizer, md, stochastic, bs)
spgp = BayesianGPLVMMiniBatch(Y, input_dim, X, X_variance,
Z=Z, kernel=k, likelihood=l,
inference_method=im, name=n,
normalizer=normalizer,
missing_data=md,
stochastic=stochastic,
batchsize=bs)
spgp.kl_factr = 1./len(Ynames)
spgp.unlink_parameter(spgp.Z)
spgp.unlink_parameter(spgp.X)
del spgp.Z
del spgp.X
spgp.Z = self.Z
@ -165,20 +172,10 @@ class MRD(BayesianGPLVMMiniBatch):
self.logger.info('working on im <{}>'.format(hex(id(i))))
self.Z.gradient[:] += b.full_values['Zgrad']
grad_dict = b.grad_dict
grad_dict = b.full_values
if isinstance(self.X, VariationalPosterior):
dL_dmean, dL_dS = b.kern.gradients_qX_expectations(
grad_dict['dL_dpsi0'],
grad_dict['dL_dpsi1'],
grad_dict['dL_dpsi2'],
self.Z, self.X)
self.X.mean.gradient += dL_dmean
self.X.variance.gradient += dL_dS
else:
#gradients wrt kernel
self.X.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'], self.X, self.Z)
self.X.mean.gradient += grad_dict['meangrad']
self.X.variance.gradient += grad_dict['vargrad']
if isinstance(self.X, VariationalPosterior):
# update for the KL divergence
@ -238,7 +235,7 @@ class MRD(BayesianGPLVMMiniBatch):
pass
if axes is None:
ax = fig.add_subplot(1, len(self.bgplvms), i + 1, sharex=sharex_ax, sharey=sharey_ax)
elif isinstance(axes, (tuple, list)):
elif isinstance(axes, (tuple, list, np.ndarray)):
ax = axes[i]
else:
raise ValueError("Need one axes per latent dimension input_dim")
@ -286,7 +283,7 @@ class MRD(BayesianGPLVMMiniBatch):
titles = [r'${}$'.format(name) for name in self.names]
ymax = reduce(max, [np.ceil(max(g.kern.input_sensitivity())) for g in self.bgplvms])
def plotf(i, g, ax):
ax.set_ylim([0,ymax])
#ax.set_ylim([0,ymax])
return g.kern.plot_ARD(ax=ax, title=titles[i], *args, **kwargs)
fig = self._handle_plotting(fignum, ax, plotf, sharex=sharex, sharey=sharey)
return fig

View file

@ -56,6 +56,7 @@ Created on 3 Nov 2014
raise NotImplementedError, "what to do what to do?"
print "defaulting to ", inference_method, "for latent function inference"
self.kl_factr = 1.
self.Z = Param('inducing inputs', Z)
self.num_inducing = Z.shape[0]

View file

@ -39,10 +39,7 @@ class SSGPLVM(SparseGP_MPI):
X_variance = np.random.uniform(0,.1,X.shape)
if Gamma is None:
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim)
gamma[gamma>1.-1e-9] = 1.-1e-9
gamma[gamma<1e-9] = 1e-9
gamma = np.random.randn(X.shape[0], input_dim)
else:
gamma = Gamma.copy()

View file

@ -162,7 +162,7 @@ def plot_latent(model, labels=None, which_indices=None,
else:
x = X[index, input_1]
y = X[index, input_2]
ax.scatter(x, y, marker=m, s=s, c=Tango.nextMedium(), label=this_label, linewidth=.5, edgecolor='k', alpha=.9)
ax.scatter(x, y, marker=m, s=s, c=Tango.nextMedium(), label=this_label, linewidth=.2, edgecolor='k', alpha=.9)
ax.set_xlabel('latent dimension %i' % input_1)
ax.set_ylabel('latent dimension %i' % input_2)

View file

@ -1,5 +1,5 @@
from ..core.parameterization.parameter_core import Observable
import collections, weakref, logging
from ..core.parameterization.observable import Observable
import collections, weakref
class Cacher(object):
def __init__(self, operation, limit=5, ignore_args=(), force_kwargs=()):

View file

@ -328,105 +328,6 @@ def ppca(Y, Q, iterations=100):
pass
return np.asarray_chkfinite(exp_x), np.asarray_chkfinite(W)
def ppca_missing_data_at_random(Y, Q, iters=100):
"""
EM implementation of Probabilistic pca for when there is missing data.
Taken from <SheffieldML, https://github.com/SheffieldML>
.. math:
\\mathbf{Y} = \mathbf{XW} + \\epsilon \\text{, where}
\\epsilon = \\mathcal{N}(0, \\sigma^2 \mathbf{I})
:returns: X, W, sigma^2
"""
from numpy.ma import dot as madot
import diag
from GPy.util.subarray_and_sorting import common_subarrays
import time
debug = 1
# Initialise W randomly
N, D = Y.shape
W = np.random.randn(Q, D) * 1e-3
Y = np.ma.masked_invalid(Y, copy=1)
nu = 1.
#num_obs_i = 1./Y.count()
Ycentered = Y - Y.mean(0)
X = np.zeros((N,Q))
cs = common_subarrays(Y.mask)
cr = common_subarrays(Y.mask, 1)
Sigma = np.zeros((N, Q, Q))
Sigma2 = np.zeros((N, Q, Q))
mu = np.zeros(D)
"""
if debug:
import matplotlib.pyplot as pylab
fig = pylab.figure("FIT MISSING DATA");
ax = fig.gca()
ax.cla()
lines = pylab.plot(np.zeros((N,Q)).dot(W))
"""
W2 = np.zeros((Q,D))
for i in range(iters):
# Sigma = np.linalg.solve(diag.add(madot(W,W.T), nu), diag.times(np.eye(Q),nu))
# exp_x = madot(madot(Ycentered, W.T),Sigma)/nu
# Ycentered = (Y - exp_x.dot(W).mean(0))
# #import ipdb;ipdb.set_trace()
# #Ycentered = mu
# W = np.linalg.solve(madot(exp_x.T,exp_x) + Sigma, madot(exp_x.T, Ycentered))
# nu = (((Ycentered - madot(exp_x, W))**2).sum(0) + madot(W.T,madot(Sigma,W)).sum(0)).sum()/N
for csi, (mask, index) in enumerate(cs.iteritems()):
mask = ~np.array(mask)
Sigma2[index, :, :] = nu * np.linalg.inv(diag.add(W2[:,mask].dot(W2[:,mask].T), nu))
#X[index,:] = madot((Sigma[csi]/nu),madot(W,Ycentered[index].T))[:,0]
X2 = ((Sigma2/nu) * (madot(Ycentered,W2.T).base)[:,:,None]).sum(-1)
mu2 = (Y - X.dot(W)).mean(0)
for n in range(N):
Sigma[n] = nu * np.linalg.inv(diag.add(W[:,~Y.mask[n]].dot(W[:,~Y.mask[n]].T), nu))
X[n, :] = (Sigma[n]/nu).dot(W[:,~Y.mask[n]].dot(Ycentered[n,~Y.mask[n]].T))
for d in range(D):
mu[d] = (Y[~Y.mask[:,d], d] - X[~Y.mask[:,d]].dot(W[:, d])).mean()
Ycentered = (Y - mu)
nu3 = 0.
for cri, (mask, index) in enumerate(cr.iteritems()):
mask = ~np.array(mask)
W2[:,index] = np.linalg.solve(X[mask].T.dot(X[mask]) + Sigma[mask].sum(0), madot(X[mask].T, Ycentered[mask,index]))[:,None]
W2[:,index] = np.linalg.solve(X.T.dot(X) + Sigma.sum(0), madot(X.T, Ycentered[:,index]))
#nu += (((Ycentered[mask,index] - X[mask].dot(W[:,index]))**2).sum(0) + W[:,index].T.dot(Sigma[mask].sum(0).dot(W[:,index])).sum(0)).sum()
nu3 += (((Ycentered[index] - X.dot(W[:,index]))**2).sum(0) + W[:,index].T.dot(Sigma.sum(0).dot(W[:,index])).sum(0)).sum()
nu3 /= N
nu = 0.
nu2 = 0.
W = np.zeros((Q,D))
for j in range(D):
W[:,j] = np.linalg.solve(X[~Y.mask[:,j]].T.dot(X[~Y.mask[:,j]]) + Sigma[~Y.mask[:,j]].sum(0), madot(X[~Y.mask[:,j]].T, Ycentered[~Y.mask[:,j],j]))
nu2f = np.tensordot(W[:,j].T, Sigma[~Y.mask[:,j],:,:], [0,1]).dot(W[:,j])
nu2s = W[:,j].T.dot(Sigma[~Y.mask[:,j],:,:].sum(0).dot(W[:,j]))
nu2 += (((Ycentered[~Y.mask[:,j],j] - X[~Y.mask[:,j],:].dot(W[:,j]))**2) + nu2f).sum()
for i in range(N):
if not Y.mask[i,j]:
nu += ((Ycentered[i,j] - X[i,:].dot(W[:,j]))**2) + W[:,j].T.dot(Sigma[i,:,:].dot(W[:,j]))
nu /= N
nu2 /= N
nu4 = (((Ycentered - X.dot(W))**2).sum(0) + W.T.dot(Sigma.sum(0).dot(W)).sum(0)).sum()/N
import ipdb;ipdb.set_trace()
"""
if debug:
#print Sigma[0]
print "nu:", nu, "sum(X):", X.sum()
pred_y = X.dot(W)
for x, l in zip(pred_y.T, lines):
l.set_ydata(x)
ax.autoscale_view()
ax.set_ylim(pred_y.min(), pred_y.max())
fig.canvas.draw()
time.sleep(.3)
"""
return np.asarray_chkfinite(X), np.asarray_chkfinite(W), nu
def tdot_numpy(mat, out=None):
return np.dot(mat, mat.T, out)

View file

@ -20,6 +20,7 @@ setup(name = 'GPy',
url = "http://sheffieldml.github.com/GPy/",
packages = ["GPy.models",
"GPy.inference.optimization",
"GPy.inference.mcmc",
"GPy.inference",
"GPy.inference.latent_function_inference",
"GPy.likelihoods", "GPy.mappings",
@ -31,14 +32,20 @@ setup(name = 'GPy',
"GPy.plotting.matplot_dep.latent_space_visualizations",
"GPy.plotting.matplot_dep", "GPy.plotting"],
package_dir={'GPy': 'GPy'},
package_data = {'GPy': ['defaults.cfg', 'installation.cfg', 'util/data_resources.json', 'util/football_teams.json']},
package_data = {'GPy': ['defaults.cfg', 'installation.cfg',
'util/data_resources.json',
'util/football_teams.json']},
include_package_data = True,
py_modules = ['GPy.__init__'],
test_suite = 'GPy.testing',
long_description=read('README.md'),
install_requires=['numpy>=1.8', 'scipy>=0.14'],
extras_require = {
'docs':['matplotlib>=1.4','Sphinx','ipython'],
},
classifiers=[
"License :: OSI Approved :: BSD License"],
zip_safe = False
install_requires=['numpy>=1.7', 'scipy>=0.12'],
extras_require = {'docs':['matplotlib >=1.3','Sphinx','IPython']},
classifiers=['License :: OSI Approved :: BSD License',
'Natural Language :: English',
'Operating System :: MacOS :: MacOS X',
'Operating System :: Microsoft :: Windows',
'Operating System :: POSIX :: Linux',
'Programming Language :: Python :: 2.7',
'Topic :: Scientific/Engineering :: Artificial Intelligence']
)