Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
Ricardo 2014-07-11 10:43:02 +01:00
commit 369cc0ba2b
25 changed files with 737 additions and 277 deletions

View file

@ -12,6 +12,10 @@ from .. import likelihoods
from ..likelihoods.gaussian import Gaussian from ..likelihoods.gaussian import Gaussian
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation, LatentFunctionInference from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation, LatentFunctionInference
from parameterization.variational import VariationalPosterior from parameterization.variational import VariationalPosterior
from scipy.sparse.base import issparse
import logging
logger = logging.getLogger("GP")
class GP(Model): class GP(Model):
""" """
@ -34,12 +38,14 @@ class GP(Model):
assert X.ndim == 2 assert X.ndim == 2
if isinstance(X, (ObsAr, VariationalPosterior)): if isinstance(X, (ObsAr, VariationalPosterior)):
self.X = X.copy() self.X = X.copy()
else: self.X = ObsAr(X.copy()) else: self.X = ObsAr(X)
self.num_data, self.input_dim = self.X.shape self.num_data, self.input_dim = self.X.shape
assert Y.ndim == 2 assert Y.ndim == 2
self.Y = ObsAr(Y.copy()) logger.info("initializing Y")
if issparse(Y): self.Y = Y
else: self.Y = ObsAr(Y)
assert Y.shape[0] == self.num_data assert Y.shape[0] == self.num_data
_, self.output_dim = self.Y.shape _, self.output_dim = self.Y.shape
@ -54,6 +60,7 @@ class GP(Model):
self.likelihood = likelihood self.likelihood = likelihood
#find a sensible inference method #find a sensible inference method
logger.info("initializing inference method")
if inference_method is None: if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian) or isinstance(likelihood, likelihoods.MixedNoise): if isinstance(likelihood, likelihoods.Gaussian) or isinstance(likelihood, likelihoods.MixedNoise):
inference_method = exact_gaussian_inference.ExactGaussianInference() inference_method = exact_gaussian_inference.ExactGaussianInference()
@ -62,6 +69,7 @@ class GP(Model):
print "defaulting to ", inference_method, "for latent function inference" print "defaulting to ", inference_method, "for latent function inference"
self.inference_method = inference_method self.inference_method = inference_method
logger.info("adding kernel and likelihood as parameters")
self.add_parameter(self.kern) self.add_parameter(self.kern)
self.add_parameter(self.likelihood) self.add_parameter(self.likelihood)
@ -199,9 +207,9 @@ class GP(Model):
if fillcol is not None: if fillcol is not None:
kw['fillcol'] = fillcol kw['fillcol'] = fillcol
return models_plots.plot_fit(self, plot_limits, which_data_rows, return models_plots.plot_fit(self, plot_limits, which_data_rows,
which_data_ycols, fixed_inputs, which_data_ycols, fixed_inputs,
levels, samples, fignum, ax, resolution, levels, samples, fignum, ax, resolution,
plot_raw=plot_raw, Y_metadata=Y_metadata, plot_raw=plot_raw, Y_metadata=Y_metadata,
data_symbol=data_symbol, **kw) data_symbol=data_symbol, **kw)
def plot(self, plot_limits=None, which_data_rows='all', def plot(self, plot_limits=None, which_data_rows='all',
@ -250,9 +258,9 @@ class GP(Model):
if fillcol is not None: if fillcol is not None:
kw['fillcol'] = fillcol kw['fillcol'] = fillcol
return models_plots.plot_fit(self, plot_limits, which_data_rows, return models_plots.plot_fit(self, plot_limits, which_data_rows,
which_data_ycols, fixed_inputs, which_data_ycols, fixed_inputs,
levels, samples, fignum, ax, resolution, levels, samples, fignum, ax, resolution,
plot_raw=plot_raw, Y_metadata=Y_metadata, plot_raw=plot_raw, Y_metadata=Y_metadata,
data_symbol=data_symbol, **kw) data_symbol=data_symbol, **kw)
def input_sensitivity(self): def input_sensitivity(self):
@ -281,4 +289,4 @@ class GP(Model):
except KeyboardInterrupt: except KeyboardInterrupt:
print "KeyboardInterrupt caught, calling on_optimization_end() to round things up" print "KeyboardInterrupt caught, calling on_optimization_end() to round things up"
self.inference_method.on_optimization_end() self.inference_method.on_optimization_end()
raise raise

View file

@ -118,12 +118,12 @@ class Model(Parameterized):
""" """
The objective function for the given algorithm. The objective function for the given algorithm.
This function is the true objective, which wants to be minimized. This function is the true objective, which wants to be minimized.
Note that all parameters are already set and in place, so you just need Note that all parameters are already set and in place, so you just need
to return the objective function here. to return the objective function here.
For probabilistic models this is the negative log_likelihood For probabilistic models this is the negative log_likelihood
(including the MAP prior), so we return it here. If your model is not (including the MAP prior), so we return it here. If your model is not
probabilistic, just return your objective to minimize here! probabilistic, just return your objective to minimize here!
""" """
return -float(self.log_likelihood()) - self.log_prior() return -float(self.log_likelihood()) - self.log_prior()
@ -131,18 +131,18 @@ class Model(Parameterized):
def objective_function_gradients(self): def objective_function_gradients(self):
""" """
The gradients for the objective function for the given algorithm. The gradients for the objective function for the given algorithm.
The gradients are w.r.t. the *negative* objective function, as The gradients are w.r.t. the *negative* objective function, as
this framework works with *negative* log-likelihoods as a default. this framework works with *negative* log-likelihoods as a default.
You can find the gradient for the parameters in self.gradient at all times. You can find the gradient for the parameters in self.gradient at all times.
This is the place, where gradients get stored for parameters. This is the place, where gradients get stored for parameters.
This function is the true objective, which wants to be minimized. This function is the true objective, which wants to be minimized.
Note that all parameters are already set and in place, so you just need Note that all parameters are already set and in place, so you just need
to return the gradient here. to return the gradient here.
For probabilistic models this is the gradient of the negative log_likelihood For probabilistic models this is the gradient of the negative log_likelihood
(including the MAP prior), so we return it here. If your model is not (including the MAP prior), so we return it here. If your model is not
probabilistic, just return your *negative* gradient here! probabilistic, just return your *negative* gradient here!
""" """
return -(self._log_likelihood_gradients() + self._log_prior_gradients()) return -(self._log_likelihood_gradients() + self._log_prior_gradients())
@ -225,14 +225,18 @@ class Model(Parameterized):
if self.size == 0: if self.size == 0:
raise RuntimeError, "Model without parameters cannot be optimized" raise RuntimeError, "Model without parameters cannot be optimized"
if optimizer is None:
optimizer = self.preferred_optimizer
if start == None: if start == None:
start = self.optimizer_array start = self.optimizer_array
optimizer = optimization.get_optimizer(optimizer) if optimizer is None:
opt = optimizer(start, model=self, **kwargs) optimizer = self.preferred_optimizer
if isinstance(optimizer, optimization.Optimizer):
opt = optimizer
opt.model = self
else:
optimizer = optimization.get_optimizer(optimizer)
opt = optimizer(start, model=self, **kwargs)
opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads) opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads)
@ -249,7 +253,7 @@ class Model(Parameterized):
def _checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3): def _checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
""" """
Check the gradient of the ,odel by comparing to a numerical Check the gradient of the ,odel by comparing to a numerical
estimate. If the verbose flag is passed, invividual estimate. If the verbose flag is passed, individual
components are tested (and printed) components are tested (and printed)
:param verbose: If True, print a "full" checking of each parameter :param verbose: If True, print a "full" checking of each parameter

View file

@ -33,7 +33,7 @@ class ObsAr(np.ndarray, Pickleable, Observable):
def _setup_observers(self): def _setup_observers(self):
# do not setup anything, as observable arrays do not have default observers # do not setup anything, as observable arrays do not have default observers
pass pass
def copy(self): def copy(self):
from lists_and_dicts import ObserverList from lists_and_dicts import ObserverList
memo = {} memo = {}

View file

@ -751,8 +751,6 @@ class OptimizationHandlable(Indexable):
Transform the gradients by multiplying the gradient factor for each Transform the gradients by multiplying the gradient factor for each
constraint to it. constraint to it.
""" """
if self.has_parent():
return g
[np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__] [np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
if self._has_fixes(): return g[self._fixes_] if self._has_fixes(): return g[self._fixes_]
return g return g
@ -793,7 +791,7 @@ class OptimizationHandlable(Indexable):
#=========================================================================== #===========================================================================
# Randomizeable # Randomizeable
#=========================================================================== #===========================================================================
def randomize(self, rand_gen=np.random.normal, loc=0, scale=1, *args, **kwargs): def randomize(self, rand_gen=np.random.normal, *args, **kwargs):
""" """
Randomize the model. Randomize the model.
Make this draw from the prior if one exists, else draw from given random generator Make this draw from the prior if one exists, else draw from given random generator
@ -804,7 +802,7 @@ class OptimizationHandlable(Indexable):
:param args, kwargs: will be passed through to random number generator :param args, kwargs: will be passed through to random number generator
""" """
# first take care of all parameters (from N(0,1)) # first take care of all parameters (from N(0,1))
x = rand_gen(loc=loc, scale=scale, size=self._size_transformed(), *args, **kwargs) x = rand_gen(size=self._size_transformed(), *args, **kwargs)
# now draw from prior where possible # now draw from prior where possible
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None] [np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
self.optimizer_array = x # makes sure all of the tied parameters get the same init (since there's only one prior object...) self.optimizer_array = x # makes sure all of the tied parameters get the same init (since there's only one prior object...)
@ -835,6 +833,11 @@ class OptimizationHandlable(Indexable):
1.) connect param_array of children to self.param_array 1.) connect param_array of children to self.param_array
2.) tell all children to propagate further 2.) tell all children to propagate further
""" """
if self.param_array.size != self.size:
self._param_array_ = np.empty(self.size, dtype=np.float64)
if self.gradient.size != self.size:
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
pi_old_size = 0 pi_old_size = 0
for pi in self.parameters: for pi in self.parameters:
pislice = slice(pi_old_size, pi_old_size + pi.size) pislice = slice(pi_old_size, pi_old_size + pi.size)
@ -848,6 +851,9 @@ class OptimizationHandlable(Indexable):
pi._propagate_param_grad(parray[pislice], garray[pislice]) pi._propagate_param_grad(parray[pislice], garray[pislice])
pi_old_size += pi.size pi_old_size += pi.size
def _connect_parameters(self):
pass
class Parameterizable(OptimizationHandlable): class Parameterizable(OptimizationHandlable):
""" """
A parameterisable class. A parameterisable class.
@ -874,6 +880,9 @@ class Parameterizable(OptimizationHandlable):
""" """
Array representing the parameters of this class. Array representing the parameters of this class.
There is only one copy of all parameters in memory, two during optimization. There is only one copy of all parameters in memory, two during optimization.
!WARNING!: setting the parameter array MUST always be done in memory:
m.param_array[:] = m_copy.param_array
""" """
if self.__dict__.get('_param_array_', None) is None: if self.__dict__.get('_param_array_', None) is None:
self._param_array_ = np.empty(self.size, dtype=np.float64) self._param_array_ = np.empty(self.size, dtype=np.float64)
@ -986,6 +995,11 @@ class Parameterizable(OptimizationHandlable):
# notification system # notification system
#=========================================================================== #===========================================================================
def _parameters_changed_notification(self, me, which=None): def _parameters_changed_notification(self, me, which=None):
"""
In parameterizable we just need to make sure, that the next call to optimizer_array
will update the optimizer_array to the latest parameters
"""
self._optimizer_copy_transformed = False # tells the optimizer array to update on next request
self.parameters_changed() self.parameters_changed()
def _pass_through_notify_observers(self, me, which=None): def _pass_through_notify_observers(self, me, which=None):
self.notify_observers(which=which) self.notify_observers(which=which)
@ -1017,4 +1031,3 @@ class Parameterizable(OptimizationHandlable):
updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer`` updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer``
""" """
pass pass

View file

@ -8,11 +8,23 @@ from re import compile, _pattern_type
from param import ParamConcatenation from param import ParamConcatenation
from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
import logging
logger = logging.getLogger("parameters changed meta")
class ParametersChangedMeta(type): class ParametersChangedMeta(type):
def __call__(self, *args, **kw): def __call__(self, *args, **kw):
instance = super(ParametersChangedMeta, self).__call__(*args, **kw) self._in_init_ = True
instance.parameters_changed() #import ipdb;ipdb.set_trace()
return instance self = super(ParametersChangedMeta, self).__call__(*args, **kw)
logger.debug("finished init")
self._in_init_ = False
logger.debug("connecting parameters")
self._highest_parent_._connect_parameters()
self._highest_parent_._notify_parent_change()
self._highest_parent_._connect_fixes()
logger.debug("calling parameters changed")
self.parameters_changed()
return self
class Parameterized(Parameterizable): class Parameterized(Parameterizable):
""" """
@ -57,21 +69,19 @@ class Parameterized(Parameterizable):
and concatenate them. Printing m[''] will result in printing of all parameters in detail. and concatenate them. Printing m[''] will result in printing of all parameters in detail.
""" """
#=========================================================================== #===========================================================================
# Metaclass for parameters changed after init. # Metaclass for parameters changed after init.
# This makes sure, that parameters changed will always be called after __init__ # This makes sure, that parameters changed will always be called after __init__
# **Never** call parameters_changed() yourself # **Never** call parameters_changed() yourself
__metaclass__ = ParametersChangedMeta __metaclass__ = ParametersChangedMeta
#=========================================================================== #===========================================================================
def __init__(self, name=None, parameters=[], *a, **kw): def __init__(self, name=None, parameters=[], *a, **kw):
super(Parameterized, self).__init__(name=name, *a, **kw) super(Parameterized, self).__init__(name=name, *a, **kw)
self._in_init_ = True
self.size = sum(p.size for p in self.parameters) self.size = sum(p.size for p in self.parameters)
self.add_observer(self, self._parameters_changed_notification, -100) self.add_observer(self, self._parameters_changed_notification, -100)
if not self._has_fixes(): if not self._has_fixes():
self._fixes_ = None self._fixes_ = None
self._param_slices_ = [] self._param_slices_ = []
self._connect_parameters() #self._connect_parameters()
del self._in_init_
self.add_parameters(*parameters) self.add_parameters(*parameters)
def build_pydot(self, G=None): def build_pydot(self, G=None):
@ -125,6 +135,9 @@ class Parameterized(Parameterizable):
param._parent_.remove_parameter(param) param._parent_.remove_parameter(param)
# make sure the size is set # make sure the size is set
if index is None: if index is None:
start = sum(p.size for p in self.parameters)
self.constraints.shift_right(start, param.size)
self.priors.shift_right(start, param.size)
self.constraints.update(param.constraints, self.size) self.constraints.update(param.constraints, self.size)
self.priors.update(param.priors, self.size) self.priors.update(param.priors, self.size)
self.parameters.append(param) self.parameters.append(param)
@ -143,14 +156,16 @@ class Parameterized(Parameterizable):
parent.size += param.size parent.size += param.size
parent = parent._parent_ parent = parent._parent_
self._connect_parameters() if not self._in_init_:
self._connect_parameters()
self._notify_parent_change()
self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names) self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names)
self._highest_parent_._notify_parent_change() self._highest_parent_._notify_parent_change()
self._highest_parent_._connect_fixes() self._highest_parent_._connect_fixes()
else: else:
raise HierarchyError, """Parameter exists already and no copy made""" raise HierarchyError, """Parameter exists already, try making a copy"""
def add_parameters(self, *parameters): def add_parameters(self, *parameters):
@ -198,26 +213,28 @@ class Parameterized(Parameterizable):
# no parameters for this class # no parameters for this class
return return
if self.param_array.size != self.size: if self.param_array.size != self.size:
self.param_array = np.empty(self.size, dtype=np.float64) self._param_array_ = np.empty(self.size, dtype=np.float64)
if self.gradient.size != self.size: if self.gradient.size != self.size:
self._gradient_array_ = np.empty(self.size, dtype=np.float64) self._gradient_array_ = np.empty(self.size, dtype=np.float64)
old_size = 0 old_size = 0
self._param_slices_ = [] self._param_slices_ = []
for i, p in enumerate(self.parameters): for i, p in enumerate(self.parameters):
if not p.param_array.flags['C_CONTIGUOUS']:
raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS"
p._parent_ = self p._parent_ = self
p._parent_index_ = i p._parent_index_ = i
pslice = slice(old_size, old_size + p.size) pslice = slice(old_size, old_size + p.size)
# first connect all children # first connect all children
p._propagate_param_grad(self.param_array[pslice], self.gradient_full[pslice]) p._propagate_param_grad(self.param_array[pslice], self.gradient_full[pslice])
# then connect children to self # then connect children to self
self.param_array[pslice] = p.param_array.flat # , requirements=['C', 'W']).ravel(order='C') self.param_array[pslice] = p.param_array.flat # , requirements=['C', 'W']).ravel(order='C')
self.gradient_full[pslice] = p.gradient_full.flat # , requirements=['C', 'W']).ravel(order='C') self.gradient_full[pslice] = p.gradient_full.flat # , requirements=['C', 'W']).ravel(order='C')
if not p.param_array.flags['C_CONTIGUOUS']:
raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS"
p.param_array.data = self.param_array[pslice].data p.param_array.data = self.param_array[pslice].data
p.gradient_full.data = self.gradient_full[pslice].data p.gradient_full.data = self.gradient_full[pslice].data
@ -332,7 +349,7 @@ class Parameterized(Parameterizable):
def __str__(self, header=True): def __str__(self, header=True):
name = adjust_name_for_printing(self.name) + "." name = adjust_name_for_printing(self.name) + "."
constrs = self._constraints_str; constrs = self._constraints_str;
ts = self._ties_str ts = self._ties_str
prirs = self._priors_str prirs = self._priors_str
desc = self._description_str; names = self.parameter_names() desc = self._description_str; names = self.parameter_names()

View file

@ -76,11 +76,11 @@ class Uniform(Prior):
o = super(Prior, cls).__new__(cls, lower, upper) o = super(Prior, cls).__new__(cls, lower, upper)
cls._instances.append(weakref.ref(o)) cls._instances.append(weakref.ref(o))
return cls._instances[-1]() return cls._instances[-1]()
def __init__(self, lower, upper): def __init__(self, lower, upper):
self.lower = float(lower) self.lower = float(lower)
self.upper = float(upper) self.upper = float(upper)
def __str__(self): def __str__(self):
return "[" + str(np.round(self.lower)) + ', ' + str(np.round(self.upper)) + ']' return "[" + str(np.round(self.lower)) + ', ' + str(np.round(self.upper)) + ']'
@ -93,7 +93,7 @@ class Uniform(Prior):
def rvs(self, n): def rvs(self, n):
return np.random.uniform(self.lower, self.upper, size=n) return np.random.uniform(self.lower, self.upper, size=n)
class LogGaussian(Prior): class LogGaussian(Prior):
""" """
Implementation of the univariate *log*-Gaussian probability function, coupled with random variables. Implementation of the univariate *log*-Gaussian probability function, coupled with random variables.
@ -246,7 +246,7 @@ class Gamma(Prior):
""" """
Creates an instance of a Gamma Prior by specifying the Expected value(s) Creates an instance of a Gamma Prior by specifying the Expected value(s)
and Variance(s) of the distribution. and Variance(s) of the distribution.
:param E: expected value :param E: expected value
:param V: variance :param V: variance
""" """

View file

@ -8,6 +8,9 @@ from ..inference.latent_function_inference import var_dtc
from .. import likelihoods from .. import likelihoods
from parameterization.variational import VariationalPosterior from parameterization.variational import VariationalPosterior
import logging
logger = logging.getLogger("sparse gp")
class SparseGP(GP): class SparseGP(GP):
""" """
A general purpose Sparse GP model A general purpose Sparse GP model
@ -46,7 +49,7 @@ class SparseGP(GP):
self.num_inducing = Z.shape[0] self.num_inducing = Z.shape[0]
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata) GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata)
logger.info("Adding Z as parameter")
self.add_parameter(self.Z, index=0) self.add_parameter(self.Z, index=0)
def has_uncertain_inputs(self): def has_uncertain_inputs(self):
@ -66,10 +69,10 @@ class SparseGP(GP):
#gradients wrt Z #gradients wrt Z
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z) self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
self.Z.gradient += self.kern.gradients_Z_expectations( self.Z.gradient += self.kern.gradients_Z_expectations(
self.grad_dict['dL_dpsi0'], self.grad_dict['dL_dpsi0'],
self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi1'],
self.grad_dict['dL_dpsi2'], self.grad_dict['dL_dpsi2'],
Z=self.Z, Z=self.Z,
variational_posterior=self.X) variational_posterior=self.X)
else: else:
#gradients wrt kernel #gradients wrt kernel

View file

@ -37,7 +37,7 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan
# k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True) # k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
p = .3 p = .3
m = GPy.models.BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m = GPy.models.BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
if nan: if nan:
@ -144,7 +144,7 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=25, Q=4
m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel) m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel)
m.data_colors = c m.data_colors = c
m.data_t = t m.data_t = t
if optimize: if optimize:
m.optimize('bfgs', messages=verbose, max_iters=2e3) m.optimize('bfgs', messages=verbose, max_iters=2e3)
@ -169,7 +169,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
Y = data['X'][:N] Y = data['X'][:N]
m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k) m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k)
m.data_labels = data['Y'][:N].argmax(axis=1) m.data_labels = data['Y'][:N].argmax(axis=1)
if optimize: if optimize:
m.optimize('bfgs', messages=verbose, max_iters=max_iters, gtol=.05) m.optimize('bfgs', messages=verbose, max_iters=max_iters, gtol=.05)
@ -296,15 +296,16 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
from GPy.models import BayesianGPLVM from GPy.models import BayesianGPLVM
from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 7, 9 D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
Y = Ylist[0] Y = Ylist[0]
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool) inan = _np.random.binomial(1, .8, size=Y.shape).astype(bool) # 80% missing data
Y[inan] = _np.nan Ymissing = Y.copy()
Ymissing[inan] = _np.nan
m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
inference_method=VarDTCMissingData(inan=inan), kernel=k) inference_method=VarDTCMissingData(inan=inan), kernel=k)
m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape) m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
@ -364,7 +365,7 @@ def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim
for inan in inanlist: for inan in inanlist:
imlist.append(VarDTCMissingData(limit=1, inan=inan)) imlist.append(VarDTCMissingData(limit=1, inan=inan))
m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing,
kernel=k, inference_method=imlist, kernel=k, inference_method=imlist,
initx="random", initz='permute', **kw) initx="random", initz='permute', **kw)
@ -410,11 +411,11 @@ def olivetti_faces(optimize=True, verbose=True, plot=True):
Yn /= Yn.std() Yn /= Yn.std()
m = GPy.models.BayesianGPLVM(Yn, Q, num_inducing=20) m = GPy.models.BayesianGPLVM(Yn, Q, num_inducing=20)
if optimize: m.optimize('bfgs', messages=verbose, max_iters=1000) if optimize: m.optimize('bfgs', messages=verbose, max_iters=1000)
if plot: if plot:
ax = m.plot_latent(which_indices=(0, 1)) ax = m.plot_latent(which_indices=(0, 1))
y = m.likelihood.Y[0, :] y = m.Y[0, :]
data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False) data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False)
lvm = GPy.plotting.matplot_dep.visualize.lvm(m.X.mean[0, :].copy(), m, data_show, ax) lvm = GPy.plotting.matplot_dep.visualize.lvm(m.X.mean[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
@ -514,7 +515,7 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
data = GPy.util.datasets.osu_run1() data = GPy.util.datasets.osu_run1()
Q = 6 Q = 6
kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True) kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True)
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel) m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
m.data = data m.data = data
@ -566,7 +567,7 @@ def ssgplvm_simulation_linear():
import GPy import GPy
N, D, Q = 1000, 20, 5 N, D, Q = 1000, 20, 5
pi = 0.2 pi = 0.2
def sample_X(Q, pi): def sample_X(Q, pi):
x = np.empty(Q) x = np.empty(Q)
dies = np.random.rand(Q) dies = np.random.rand(Q)
@ -576,7 +577,7 @@ def ssgplvm_simulation_linear():
else: else:
x[q] = 0. x[q] = 0.
return x return x
Y = np.empty((N,D)) Y = np.empty((N,D))
X = np.empty((N,Q)) X = np.empty((N,Q))
# Generate data from random sampled weight matrices # Generate data from random sampled weight matrices
@ -584,4 +585,4 @@ def ssgplvm_simulation_linear():
X[n] = sample_X(Q,pi) X[n] = sample_X(Q,pi)
w = np.random.randn(D,Q) w = np.random.randn(D,Q)
Y[n] = np.dot(w,X[n]) Y[n] = np.dot(w,X[n])

View file

@ -9,6 +9,8 @@ import numpy as np
from ...util.misc import param_to_array from ...util.misc import param_to_array
from . import LatentFunctionInference from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
import logging, itertools
logger = logging.getLogger('vardtc')
class VarDTC(LatentFunctionInference): class VarDTC(LatentFunctionInference):
""" """
@ -36,11 +38,11 @@ class VarDTC(LatentFunctionInference):
return param_to_array(np.sum(np.square(Y))) return param_to_array(np.sum(np.square(Y)))
def __getstate__(self): def __getstate__(self):
# has to be overridden, as Cacher objects cannot be pickled. # has to be overridden, as Cacher objects cannot be pickled.
return self.limit return self.limit
def __setstate__(self, state): def __setstate__(self, state):
# has to be overridden, as Cacher objects cannot be pickled. # has to be overridden, as Cacher objects cannot be pickled.
self.limit = state self.limit = state
from ...util.caching import Cacher from ...util.caching import Cacher
self.get_trYYT = Cacher(self._get_trYYT, self.limit) self.get_trYYT = Cacher(self._get_trYYT, self.limit)
@ -196,18 +198,19 @@ class VarDTCMissingData(LatentFunctionInference):
def __init__(self, limit=1, inan=None): def __init__(self, limit=1, inan=None):
from ...util.caching import Cacher from ...util.caching import Cacher
self._Y = Cacher(self._subarray_computations, limit) self._Y = Cacher(self._subarray_computations, limit)
self._inan = inan if inan is not None: self._inan = ~inan
else: self._inan = None
pass pass
def set_limit(self, limit): def set_limit(self, limit):
self._Y.limit = limit self._Y.limit = limit
def __getstate__(self): def __getstate__(self):
# has to be overridden, as Cacher objects cannot be pickled. # has to be overridden, as Cacher objects cannot be pickled.
return self._Y.limit, self._inan return self._Y.limit, self._inan
def __setstate__(self, state): def __setstate__(self, state):
# has to be overridden, as Cacher objects cannot be pickled. # has to be overridden, as Cacher objects cannot be pickled.
from ...util.caching import Cacher from ...util.caching import Cacher
self.limit = state[0] self.limit = state[0]
self._inan = state[1] self._inan = state[1]
@ -217,21 +220,35 @@ class VarDTCMissingData(LatentFunctionInference):
if self._inan is None: if self._inan is None:
inan = np.isnan(Y) inan = np.isnan(Y)
has_none = inan.any() has_none = inan.any()
self._inan = ~inan
else: else:
inan = self._inan inan = self._inan
has_none = True has_none = True
if has_none: if has_none:
from ...util.subarray_and_sorting import common_subarrays #print "caching missing data slices, this can take several minutes depending on the number of unique dimensions of the data..."
self._subarray_indices = [] #csa = common_subarrays(inan, 1)
for v,ind in common_subarrays(inan, 1).iteritems(): size = Y.shape[1]
if not np.all(v): #logger.info('preparing subarrays {:3.3%}'.format((i+1.)/size))
v = ~np.array(v, dtype=bool) Ys = []
ind = np.array(ind, dtype=int) next_ten = [0.]
if ind.size == Y.shape[1]: count = itertools.count()
ind = slice(None) for v, y in itertools.izip(inan.T, Y.T[:,:,None]):
self._subarray_indices.append([v,ind]) i = count.next()
Ys = [Y[v, :][:, ind] for v, ind in self._subarray_indices] if ((i+1.)/size) >= next_ten[0]:
traces = [(y**2).sum() for y in Ys] logger.info('preparing subarrays {:>6.1%}'.format((i+1.)/size))
next_ten[0] += .1
Ys.append(y[v,:])
next_ten = [0.]
count = itertools.count()
def trace(y):
i = count.next()
if ((i+1.)/size) >= next_ten[0]:
logger.info('preparing traces {:>6.1%}'.format((i+1.)/size))
next_ten[0] += .1
y = y[inan[:,i],i:i+1]
return np.einsum('ij,ij->', y,y)
traces = [trace(Y) for _ in xrange(size)]
return Ys, traces return Ys, traces
else: else:
self._subarray_indices = [[slice(None),slice(None)]] self._subarray_indices = [[slice(None),slice(None)]]
@ -253,7 +270,6 @@ class VarDTCMissingData(LatentFunctionInference):
beta_all = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6) beta_all = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
het_noise = beta_all.size != 1 het_noise = beta_all.size != 1
import itertools
num_inducing = Z.shape[0] num_inducing = Z.shape[0]
dL_dpsi0_all = np.zeros(Y.shape[0]) dL_dpsi0_all = np.zeros(Y.shape[0])
@ -273,22 +289,24 @@ class VarDTCMissingData(LatentFunctionInference):
Lm = jitchol(Kmm) Lm = jitchol(Kmm)
if uncertain_inputs: LmInv = dtrtri(Lm) if uncertain_inputs: LmInv = dtrtri(Lm)
VVT_factor_all = np.empty(Y.shape) #VVT_factor_all = np.empty(Y.shape)
full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1] #full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1]
if not full_VVT_factor: #if not full_VVT_factor:
psi1V = np.dot(Y.T*beta_all, psi1_all).T # psi1V = np.dot(Y.T*beta_all, psi1_all).T
for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices): #logger.info('computing dimension-wise likelihood and derivatives')
if het_noise: beta = beta_all[ind] #size = len(Ys)
size = Y.shape[1]
next_ten = 0
for i, [y, v, trYYT] in enumerate(itertools.izip(Ys, self._inan.T, traces)):
if ((i+1.)/size) >= next_ten:
logger.info('inference {:> 6.1%}'.format((i+1.)/size))
next_ten += .1
if het_noise: beta = beta_all[i]
else: beta = beta_all else: beta = beta_all
VVT_factor = (beta*y) VVT_factor = (y*beta)
try: output_dim = 1#len(ind)
VVT_factor_all[v, ind].flat = VVT_factor.flat
except ValueError:
mult = np.ravel_multi_index((v.nonzero()[0][:,None],ind[None,:]), VVT_factor_all.shape)
VVT_factor_all.flat[mult] = VVT_factor
output_dim = y.shape[1]
psi0 = psi0_all[v] psi0 = psi0_all[v]
psi1 = psi1_all[v, :] psi1 = psi1_all[v, :]
@ -347,19 +365,20 @@ class VarDTCMissingData(LatentFunctionInference):
psi0, psi1, beta, psi0, psi1, beta,
data_fit, num_data, output_dim, trYYT, Y) data_fit, num_data, output_dim, trYYT, Y)
if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf #if full_VVT_factor:
else: woodbury_vector[:, i:i+1] = Cpsi1Vf
print 'foobar' #else:
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) # print 'foobar'
tmp, _ = dpotrs(LB, tmp, lower=1) # tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
woodbury_vector[:, ind] = dtrtrs(Lm, tmp, lower=1, trans=1)[0] # tmp, _ = dpotrs(LB, tmp, lower=1)
# woodbury_vector[:, ind] = dtrtrs(Lm, tmp, lower=1, trans=1)[0]
#import ipdb;ipdb.set_trace() #import ipdb;ipdb.set_trace()
Bi, _ = dpotri(LB, lower=1) Bi, _ = dpotri(LB, lower=1)
symmetrify(Bi) symmetrify(Bi)
Bi = -dpotri(LB, lower=1)[0] Bi = -dpotri(LB, lower=1)[0]
diag.add(Bi, 1) diag.add(Bi, 1)
woodbury_inv_all[:, :, ind] = backsub_both_sides(Lm, Bi)[:,:,None] woodbury_inv_all[:, :, i:i+1] = backsub_both_sides(Lm, Bi)[:,:,None]
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR) dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
@ -376,23 +395,6 @@ class VarDTCMissingData(LatentFunctionInference):
'dL_dKnm':dL_dpsi1_all, 'dL_dKnm':dL_dpsi1_all,
'dL_dthetaL':dL_dthetaL} 'dL_dthetaL':dL_dthetaL}
#get sufficient things for posterior prediction
#TODO: do we really want to do this in the loop?
#if not full_VVT_factor:
# print 'foobar'
# psi1V = np.dot(Y.T*beta_all, psi1_all).T
# tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
# tmp, _ = dpotrs(LB_all, tmp, lower=1)
# woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
#import ipdb;ipdb.set_trace()
#Bi, _ = dpotri(LB_all, lower=1)
#symmetrify(Bi)
#Bi = -dpotri(LB_all, lower=1)[0]
#from ...util import diag
#diag.add(Bi, 1)
#woodbury_inv = backsub_both_sides(Lm, Bi)
post = Posterior(woodbury_inv=woodbury_inv_all, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) post = Posterior(woodbury_inv=woodbury_inv_all, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
return post, log_marginal, grad_dict return post, log_marginal, grad_dict

View file

@ -22,21 +22,21 @@ class VarDTC_minibatch(LatentFunctionInference):
""" """
const_jitter = 1e-6 const_jitter = 1e-6
def __init__(self, batchsize, limit=1): def __init__(self, batchsize, limit=1):
self.batchsize = batchsize self.batchsize = batchsize
# Cache functions # Cache functions
from ...util.caching import Cacher from ...util.caching import Cacher
self.get_trYYT = Cacher(self._get_trYYT, limit) self.get_trYYT = Cacher(self._get_trYYT, limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, limit) self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
self.midRes = {} self.midRes = {}
self.batch_pos = 0 # the starting position of the current mini-batch self.batch_pos = 0 # the starting position of the current mini-batch
def set_limit(self, limit): def set_limit(self, limit):
self.get_trYYT.limit = limit self.get_trYYT.limit = limit
self.get_YYTfactor.limit = limit self.get_YYTfactor.limit = limit
def _get_trYYT(self, Y): def _get_trYYT(self, Y):
return param_to_array(np.sum(np.square(Y))) return param_to_array(np.sum(np.square(Y)))
@ -51,23 +51,23 @@ class VarDTC_minibatch(LatentFunctionInference):
return param_to_array(Y) return param_to_array(Y)
else: else:
return jitchol(tdot(Y)) return jitchol(tdot(Y))
def inference_likelihood(self, kern, X, Z, likelihood, Y): def inference_likelihood(self, kern, X, Z, likelihood, Y):
""" """
The first phase of inference: The first phase of inference:
Compute: log-likelihood, dL_dKmm Compute: log-likelihood, dL_dKmm
Cached intermediate results: Kmm, KmmInv, Cached intermediate results: Kmm, KmmInv,
""" """
num_inducing = Z.shape[0] num_inducing = Z.shape[0]
num_data, output_dim = Y.shape num_data, output_dim = Y.shape
if isinstance(X, VariationalPosterior): if isinstance(X, VariationalPosterior):
uncertain_inputs = True uncertain_inputs = True
else: else:
uncertain_inputs = False uncertain_inputs = False
#see whether we've got a different noise variance for each datum #see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6) beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1 het_noise = beta.size > 1
@ -77,19 +77,19 @@ class VarDTC_minibatch(LatentFunctionInference):
#self.YYTfactor = beta*self.get_YYTfactor(Y) #self.YYTfactor = beta*self.get_YYTfactor(Y)
YYT_factor = Y YYT_factor = Y
trYYT = self.get_trYYT(Y) trYYT = self.get_trYYT(Y)
psi2_full = np.zeros((num_inducing,num_inducing)) psi2_full = np.zeros((num_inducing,num_inducing))
psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM
psi0_full = 0 psi0_full = 0
YRY_full = 0 YRY_full = 0
for n_start in xrange(0,num_data,self.batchsize): for n_start in xrange(0,num_data,self.batchsize):
n_end = min(self.batchsize+n_start, num_data) n_end = min(self.batchsize+n_start, num_data)
Y_slice = YYT_factor[n_start:n_end] Y_slice = YYT_factor[n_start:n_end]
X_slice = X[n_start:n_end] X_slice = X[n_start:n_end]
if uncertain_inputs: if uncertain_inputs:
psi0 = kern.psi0(Z, X_slice) psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice) psi1 = kern.psi1(Z, X_slice)
@ -98,7 +98,7 @@ class VarDTC_minibatch(LatentFunctionInference):
psi0 = kern.Kdiag(X_slice) psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z) psi1 = kern.K(X_slice, Z)
psi2 = None psi2 = None
if het_noise: if het_noise:
beta_slice = beta[n_start:n_end] beta_slice = beta[n_start:n_end]
psi0_full += (beta_slice*psi0).sum() psi0_full += (beta_slice*psi0).sum()
@ -106,33 +106,33 @@ class VarDTC_minibatch(LatentFunctionInference):
YRY_full += (beta_slice*np.square(Y_slice).sum(axis=-1)).sum() YRY_full += (beta_slice*np.square(Y_slice).sum(axis=-1)).sum()
else: else:
psi0_full += psi0.sum() psi0_full += psi0.sum()
psi1Y_full += np.dot(Y_slice.T,psi1) # DxM psi1Y_full += np.dot(Y_slice.T,psi1) # DxM
if uncertain_inputs: if uncertain_inputs:
if het_noise: if het_noise:
psi2_full += beta_slice*psi2 psi2_full += beta_slice*psi2
else: else:
psi2_full += psi2 psi2_full += psi2.sum(0)
else: else:
if het_noise: if het_noise:
psi2_full += beta_slice*np.outer(psi1,psi1) psi2_full += beta_slice*np.outer(psi1,psi1)
else: else:
psi2_full += np.outer(psi1,psi1) psi2_full += np.einsum('nm,jk->mk',psi1,psi1)
if not het_noise: if not het_noise:
psi0_full *= beta psi0_full *= beta
psi1Y_full *= beta psi1Y_full *= beta
psi2_full *= beta psi2_full *= beta
YRY_full = trYYT*beta YRY_full = trYYT*beta
#====================================================================== #======================================================================
# Compute Common Components # Compute Common Components
#====================================================================== #======================================================================
self.psi1Y = psi1Y_full
Kmm = kern.K(Z).copy() Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter) diag.add(Kmm, self.const_jitter)
Lm = jitchol(Kmm) Lm = jitchol(Kmm)
Lambda = Kmm+psi2_full Lambda = Kmm+psi2_full
LL = jitchol(Lambda) LL = jitchol(Lambda)
b,_ = dtrtrs(LL, psi1Y_full.T) b,_ = dtrtrs(LL, psi1Y_full.T)
@ -140,18 +140,18 @@ class VarDTC_minibatch(LatentFunctionInference):
v,_ = dtrtrs(LL.T,b,lower=False) v,_ = dtrtrs(LL.T,b,lower=False)
vvt = np.einsum('md,od->mo',v,v) vvt = np.einsum('md,od->mo',v,v)
LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right') LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
Psi2LLInvT = dtrtrs(LL,psi2_full)[0].T Psi2LLInvT = dtrtrs(LL,psi2_full)[0].T
LmInvPsi2LLInvT= dtrtrs(Lm,Psi2LLInvT)[0] LmInvPsi2LLInvT= dtrtrs(Lm,Psi2LLInvT)[0]
KmmInvPsi2LLInvT = dtrtrs(Lm,LmInvPsi2LLInvT,trans=True)[0] KmmInvPsi2LLInvT = dtrtrs(Lm,LmInvPsi2LLInvT,trans=True)[0]
KmmInvPsi2P = dtrtrs(LL,KmmInvPsi2LLInvT.T, trans=True)[0].T KmmInvPsi2P = dtrtrs(LL,KmmInvPsi2LLInvT.T, trans=True)[0].T
dL_dpsi2R = (output_dim*KmmInvPsi2P - vvt)/2. # dL_dpsi2 with R inside psi2 dL_dpsi2R = (output_dim*KmmInvPsi2P - vvt)/2. # dL_dpsi2 with R inside psi2
# Cache intermediate results # Cache intermediate results
self.midRes['dL_dpsi2R'] = dL_dpsi2R self.midRes['dL_dpsi2R'] = dL_dpsi2R
self.midRes['v'] = v self.midRes['v'] = v
#====================================================================== #======================================================================
# Compute log-likelihood # Compute log-likelihood
#====================================================================== #======================================================================
@ -159,30 +159,33 @@ class VarDTC_minibatch(LatentFunctionInference):
logL_R = -np.log(beta).sum() logL_R = -np.log(beta).sum()
else: else:
logL_R = -num_data*np.log(beta) logL_R = -num_data*np.log(beta)
logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.-output_dim*(-np.log(np.diag(Lm)).sum()+np.log(np.diag(LL)).sum()) logL = (
-(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.
-output_dim*(-np.log(np.diag(Lm)).sum()+np.log(np.diag(LL)).sum())
)
#====================================================================== #======================================================================
# Compute dL_dKmm # Compute dL_dKmm
#====================================================================== #======================================================================
dL_dKmm = -(output_dim*np.einsum('md,od->mo',KmmInvPsi2LLInvT,KmmInvPsi2LLInvT) + vvt)/2. dL_dKmm = -(output_dim*np.einsum('md,od->mo',KmmInvPsi2LLInvT,KmmInvPsi2LLInvT) + vvt)/2.
#====================================================================== #======================================================================
# Compute the Posterior distribution of inducing points p(u|Y) # Compute the Posterior distribution of inducing points p(u|Y)
#====================================================================== #======================================================================
# phi_u_mean = np.dot(Kmm,v) # phi_u_mean = np.dot(Kmm,v)
# LLInvKmm,_ = dtrtrs(LL,Kmm) # LLInvKmm,_ = dtrtrs(LL,Kmm)
# # phi_u_var = np.einsum('ma,mb->ab',LLInvKmm,LLInvKmm) # # phi_u_var = np.einsum('ma,mb->ab',LLInvKmm,LLInvKmm)
# phi_u_var = Kmm - np.dot(LLInvKmm.T,LLInvKmm) # phi_u_var = Kmm - np.dot(LLInvKmm.T,LLInvKmm)
post = Posterior(woodbury_inv=KmmInvPsi2P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm) post = Posterior(woodbury_inv=KmmInvPsi2P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm)
return logL, dL_dKmm, post return logL, dL_dKmm, post
def inference_minibatch(self, kern, X, Z, likelihood, Y): def inference_minibatch(self, kern, X, Z, likelihood, Y):
""" """
The second phase of inference: Computing the derivatives over a minibatch of Y The second phase of inference: Computing the derivatives over a minibatch of Y
Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL
return a flag showing whether it reached the end of Y (isEnd) return a flag showing whether it reached the end of Y (isEnd)
""" """
@ -193,14 +196,14 @@ class VarDTC_minibatch(LatentFunctionInference):
uncertain_inputs = True uncertain_inputs = True
else: else:
uncertain_inputs = False uncertain_inputs = False
#see whether we've got a different noise variance for each datum #see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6) beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1 het_noise = beta.size > 1
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
#self.YYTfactor = beta*self.get_YYTfactor(Y) #self.YYTfactor = beta*self.get_YYTfactor(Y)
YYT_factor = Y YYT_factor = Y
n_start = self.batch_pos n_start = self.batch_pos
n_end = min(self.batchsize+n_start, num_data) n_end = min(self.batchsize+n_start, num_data)
if n_end==num_data: if n_end==num_data:
@ -209,11 +212,11 @@ class VarDTC_minibatch(LatentFunctionInference):
else: else:
isEnd = False isEnd = False
self.batch_pos = n_end self.batch_pos = n_end
num_slice = n_end-n_start num_slice = n_end-n_start
Y_slice = YYT_factor[n_start:n_end] Y_slice = YYT_factor[n_start:n_end]
X_slice = X[n_start:n_end] X_slice = X[n_start:n_end]
if uncertain_inputs: if uncertain_inputs:
psi0 = kern.psi0(Z, X_slice) psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice) psi1 = kern.psi1(Z, X_slice)
@ -222,51 +225,51 @@ class VarDTC_minibatch(LatentFunctionInference):
psi0 = kern.Kdiag(X_slice) psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z) psi1 = kern.K(X_slice, Z)
psi2 = None psi2 = None
if het_noise: if het_noise:
beta = beta[n_start] # assuming batchsize==1 beta = beta[n_start] # assuming batchsize==1
betaY = beta*Y_slice betaY = beta*Y_slice
betapsi1 = np.einsum('n,nm->nm',beta,psi1) betapsi1 = np.einsum('n,nm->nm',beta,psi1)
#====================================================================== #======================================================================
# Load Intermediate Results # Load Intermediate Results
#====================================================================== #======================================================================
dL_dpsi2R = self.midRes['dL_dpsi2R'] dL_dpsi2R = self.midRes['dL_dpsi2R']
v = self.midRes['v'] v = self.midRes['v']
#====================================================================== #======================================================================
# Compute dL_dpsi # Compute dL_dpsi
#====================================================================== #======================================================================
dL_dpsi0 = -0.5 * output_dim * (beta * np.ones((n_end-n_start,))) dL_dpsi0 = -0.5 * output_dim * (beta * np.ones((n_end-n_start,)))
dL_dpsi1 = np.dot(betaY,v.T) dL_dpsi1 = np.dot(betaY,v.T)
if uncertain_inputs: if uncertain_inputs:
dL_dpsi2 = beta* dL_dpsi2R dL_dpsi2 = beta* dL_dpsi2R
else: else:
dL_dpsi1 += np.dot(betapsi1,dL_dpsi2R)*2. dL_dpsi1 += np.dot(betapsi1,dL_dpsi2R)*2.
dL_dpsi2 = None dL_dpsi2 = None
#====================================================================== #======================================================================
# Compute dL_dthetaL # Compute dL_dthetaL
#====================================================================== #======================================================================
if het_noise: if het_noise:
if uncertain_inputs: if uncertain_inputs:
psiR = np.einsum('mo,nmo->n',dL_dpsi2R,psi2) psiR = np.einsum('mo,nmo->',dL_dpsi2R,psi2)
else: else:
psiR = np.einsum('nm,no,mo->n',psi1,psi1,dL_dpsi2R) psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R)
dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0)-output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum(axis=-1) dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0)-output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum(axis=-1)
else: else:
if uncertain_inputs: if uncertain_inputs:
psiR = np.einsum('mo,mo->',dL_dpsi2R,psi2) psiR = np.einsum('mo,nmo->',dL_dpsi2R,psi2)
else: else:
psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R) psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R)
dL_dthetaL = ((np.square(betaY)).sum() + beta*beta*output_dim*(psi0.sum())-num_slice*output_dim*beta)/2. - beta*beta*psiR- (betaY*np.dot(betapsi1,v)).sum() dL_dthetaL = ((np.square(betaY)).sum() + beta*beta*output_dim*(psi0.sum())-num_slice*output_dim*beta)/2. - beta*beta*psiR- (betaY*np.dot(betapsi1,v)).sum()
if uncertain_inputs: if uncertain_inputs:
@ -278,15 +281,15 @@ class VarDTC_minibatch(LatentFunctionInference):
grad_dict = {'dL_dKdiag':dL_dpsi0, grad_dict = {'dL_dKdiag':dL_dpsi0,
'dL_dKnm':dL_dpsi1, 'dL_dKnm':dL_dpsi1,
'dL_dthetaL':dL_dthetaL} 'dL_dthetaL':dL_dthetaL}
return isEnd, (n_start,n_end), grad_dict return isEnd, (n_start,n_end), grad_dict
def update_gradients(model): def update_gradients(model):
model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, model.X, model.Z, model.likelihood, model.Y) model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, model.X, model.Z, model.likelihood, model.Y)
het_noise = model.likelihood.variance.size > 1 het_noise = model.likelihood.variance.size > 1
if het_noise: if het_noise:
dL_dthetaL = np.empty((model.Y.shape[0],)) dL_dthetaL = np.empty((model.Y.shape[0],))
else: else:
@ -295,40 +298,54 @@ def update_gradients(model):
#gradients w.r.t. kernel #gradients w.r.t. kernel
model.kern.update_gradients_full(dL_dKmm, model.Z, None) model.kern.update_gradients_full(dL_dKmm, model.Z, None)
kern_grad = model.kern.gradient.copy() kern_grad = model.kern.gradient.copy()
#gradients w.r.t. Z #gradients w.r.t. Z
model.Z.gradient = model.kern.gradients_X(dL_dKmm, model.Z) model.Z.gradient = model.kern.gradients_X(dL_dKmm, model.Z)
isEnd = False isEnd = False
while not isEnd: while not isEnd:
isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, model.X, model.Z, model.likelihood, model.Y) isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, model.X, model.Z, model.likelihood, model.Y)
if isinstance(model.X, VariationalPosterior): if isinstance(model.X, VariationalPosterior):
X_slice = model.X[n_range[0]:n_range[1]] X_slice = model.X[n_range[0]:n_range[1]]
dL_dpsi1 = grad_dict['dL_dpsi1']#[None, :]
dL_dpsi2 = grad_dict['dL_dpsi2'][None, :, :]
#gradients w.r.t. kernel #gradients w.r.t. kernel
model.kern.update_gradients_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2']) model.kern.update_gradients_expectations(variational_posterior=X_slice,Z=model.Z,dL_dpsi0=grad_dict['dL_dpsi0'],dL_dpsi1=dL_dpsi1,dL_dpsi2=dL_dpsi2)
kern_grad += model.kern.gradient kern_grad += model.kern.gradient
#gradients w.r.t. Z #gradients w.r.t. Z
model.Z.gradient += model.kern.gradients_Z_expectations( model.Z.gradient += model.kern.gradients_Z_expectations(
dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice) dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=dL_dpsi1,
dL_dpsi2=dL_dpsi2,
Z=model.Z, variational_posterior=X_slice)
#gradients w.r.t. posterior parameters of X #gradients w.r.t. posterior parameters of X
X_grad = model.kern.gradients_qX_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2']) X_grad = model.kern.gradients_qX_expectations(
model.set_X_gradients(X_slice, X_grad) variational_posterior=X_slice,
Z=model.Z,
dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=dL_dpsi1,
dL_dpsi2=dL_dpsi2)
model.X.mean[n_range[0]:n_range[1]].gradient = X_grad[0]
model.X.variance[n_range[0]:n_range[1]].gradient = X_grad[1]
if het_noise: if het_noise:
dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL'] dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
else: else:
dL_dthetaL += grad_dict['dL_dthetaL'] dL_dthetaL += grad_dict['dL_dthetaL']
#import ipdb;ipdb.set_trace()
model.grad_dict = grad_dict
if isinstance(model.X, VariationalPosterior):
# Update Log-likelihood
model._log_marginal_likelihood -= model.variational_prior.KL_divergence(model.X)
# update for the KL divergence
model.variational_prior.update_gradients_KL(model.X)
# Set the gradients w.r.t. kernel # Set the gradients w.r.t. kernel
model.kern.gradient = kern_grad model.kern.gradient = kern_grad
# Update Log-likelihood
model._log_marginal_likelihood -= model.variational_prior.KL_divergence(model.X)
# update for the KL divergence
model.variational_prior.update_gradients_KL(model.X)
# dL_dthetaL # dL_dthetaL
model.likelihood.update_gradients(dL_dthetaL) model.likelihood.update_gradients(dL_dthetaL)

View file

@ -56,13 +56,13 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
if gtol is None: if gtol is None:
gtol = 1e-5 gtol = 1e-5
sigma0 = 1.0e-8 sigma0 = 1.0e-7
fold = f(x, *optargs) # Initial function value. fold = f(x, *optargs) # Initial function value.
function_eval = 1 function_eval = 1
fnow = fold fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient. gradnew = gradf(x, *optargs) # Initial gradient.
if any(np.isnan(gradnew)): #if any(np.isnan(gradnew)):
raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value" # raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value"
current_grad = np.dot(gradnew, gradnew) current_grad = np.dot(gradnew, gradnew)
gradold = gradnew.copy() gradold = gradnew.copy()
d = -gradnew # Initial search direction. d = -gradnew # Initial search direction.
@ -168,13 +168,13 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
if Delta < 0.25: if Delta < 0.25:
beta = min(4.0 * beta, betamax) beta = min(4.0 * beta, betamax)
if Delta > 0.75: if Delta > 0.75:
beta = max(0.5 * beta, betamin) beta = max(0.25 * beta, betamin)
# Update search direction using Polak-Ribiere formula, or re-start # Update search direction using Polak-Ribiere formula, or re-start
# in direction of negative gradient after nparams steps. # in direction of negative gradient after nparams steps.
if nsuccess == x.size: if nsuccess == x.size:
d = -gradnew d = -gradnew
# beta = 1. # TODO: betareset!! beta = 1. # This is not in the original paper
nsuccess = 0 nsuccess = 0
elif success: elif success:
Gamma = np.dot(gradold - gradnew, gradnew) / (mu) Gamma = np.dot(gradold - gradnew, gradnew) / (mu)

View file

@ -1,2 +1,2 @@
# This is the local configuration file for GPy # This is the local installation configuration file for GPy

View file

@ -20,6 +20,8 @@ def index_to_slices(index):
returns returns
>>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]] >>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
""" """
if len(index)==0:
return[]
#contruct the return structure #contruct the return structure
ind = np.asarray(index,dtype=np.int) ind = np.asarray(index,dtype=np.int)

View file

@ -101,6 +101,7 @@ class PeriodicExponential(Periodic):
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None] Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
return(self.lengthscale/(2*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T)) return(self.lengthscale/(2*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T))
@silence_errors
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)""" """derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)"""
if X2 is None: X2 = X if X2 is None: X2 = X
@ -213,7 +214,7 @@ class PeriodicMatern32(Periodic):
return(self.lengthscale**3/(12*np.sqrt(3)*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T)) return(self.lengthscale**3/(12*np.sqrt(3)*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))
#@silence_errors @silence_errors
def update_gradients_full(self,dL_dK,X,X2): def update_gradients_full(self,dL_dK,X,X2):
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)""" """derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
if X2 is None: X2 = X if X2 is None: X2 = X

View file

@ -20,6 +20,9 @@ class DiffGenomeKern(Kern):
assert X2==None assert X2==None
K = self.kern.K(X,X2) K = self.kern.K(X,X2)
if self.idx_p<=0 or self.idx_p>X.shape[0]/2:
return K
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
idx_start = slices[1][0].start idx_start = slices[1][0].start
idx_end = idx_start+self.idx_p idx_end = idx_start+self.idx_p
@ -33,6 +36,9 @@ class DiffGenomeKern(Kern):
def Kdiag(self,X): def Kdiag(self,X):
Kdiag = self.kern.Kdiag(X) Kdiag = self.kern.Kdiag(X)
if self.idx_p<=0 or self.idx_p>X.shape[0]/2:
return Kdiag
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
idx_start = slices[1][0].start idx_start = slices[1][0].start
idx_end = idx_start+self.idx_p idx_end = idx_start+self.idx_p
@ -42,6 +48,10 @@ class DiffGenomeKern(Kern):
def update_gradients_full(self,dL_dK,X,X2=None): def update_gradients_full(self,dL_dK,X,X2=None):
assert X2==None assert X2==None
if self.idx_p<=0 or self.idx_p>X.shape[0]/2:
self.kern.update_gradients_full(dL_dK, X)
return
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
idx_start = slices[1][0].start idx_start = slices[1][0].start
idx_end = idx_start+self.idx_p idx_end = idx_start+self.idx_p

View file

@ -37,19 +37,21 @@ class BayesianGPLVM(SparseGP):
self.init = init self.init = init
if X_variance is None: if X_variance is None:
self.logger.info("initializing latent space variance ~ uniform(0,.1)")
X_variance = np.random.uniform(0,.1,X.shape) X_variance = np.random.uniform(0,.1,X.shape)
if Z is None: if Z is None:
self.logger.info("initializing inducing inputs")
Z = np.random.permutation(X.copy())[:num_inducing] Z = np.random.permutation(X.copy())[:num_inducing]
assert Z.shape[1] == X.shape[1] assert Z.shape[1] == X.shape[1]
if kernel is None: if kernel is None:
self.logger.info("initializing kernel RBF")
kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) # + kern.white(input_dim) kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) # + kern.white(input_dim)
if likelihood is None: if likelihood is None:
likelihood = Gaussian() likelihood = Gaussian()
self.variational_prior = NormalPrior() self.variational_prior = NormalPrior()
X = NormalPosterior(X, X_variance) X = NormalPosterior(X, X_variance)
@ -65,6 +67,7 @@ class BayesianGPLVM(SparseGP):
inference_method = VarDTC() inference_method = VarDTC()
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
self.logger.info("Adding X as parameter")
self.add_parameter(self.X, index=0) self.add_parameter(self.X, index=0)
def set_X_gradients(self, X, X_grad): def set_X_gradients(self, X, X_grad):
@ -75,7 +78,7 @@ class BayesianGPLVM(SparseGP):
if isinstance(self.inference_method, VarDTC_GPU): if isinstance(self.inference_method, VarDTC_GPU):
update_gradients(self) update_gradients(self)
return return
super(BayesianGPLVM, self).parameters_changed() super(BayesianGPLVM, self).parameters_changed()
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
@ -87,7 +90,7 @@ class BayesianGPLVM(SparseGP):
def plot_latent(self, labels=None, which_indices=None, def plot_latent(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40, resolution=50, ax=None, marker='o', s=40,
fignum=None, plot_inducing=True, legend=True, fignum=None, plot_inducing=True, legend=True,
plot_limits=None, plot_limits=None,
aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}): aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}):
import sys import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported." assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
@ -107,10 +110,10 @@ class BayesianGPLVM(SparseGP):
""" """
N_test = Y.shape[0] N_test = Y.shape[0]
input_dim = self.Z.shape[1] input_dim = self.Z.shape[1]
means = np.zeros((N_test, input_dim)) means = np.zeros((N_test, input_dim))
covars = np.zeros((N_test, input_dim)) covars = np.zeros((N_test, input_dim))
dpsi0 = -0.5 * self.input_dim / self.likelihood.variance dpsi0 = -0.5 * self.input_dim / self.likelihood.variance
dpsi2 = self.grad_dict['dL_dpsi2'][0][None, :, :] # TODO: this may change if we ignore het. likelihoods dpsi2 = self.grad_dict['dL_dpsi2'][0][None, :, :] # TODO: this may change if we ignore het. likelihoods
V = Y/self.likelihood.variance V = Y/self.likelihood.variance
@ -125,7 +128,7 @@ class BayesianGPLVM(SparseGP):
dpsi1 = np.dot(self.posterior.woodbury_vector, V.T) dpsi1 = np.dot(self.posterior.woodbury_vector, V.T)
#start = np.zeros(self.input_dim * 2) #start = np.zeros(self.input_dim * 2)
from scipy.optimize import minimize from scipy.optimize import minimize
@ -139,7 +142,7 @@ class BayesianGPLVM(SparseGP):
X = NormalPosterior(means, covars) X = NormalPosterior(means, covars)
return X return X
def dmu_dX(self, Xnew): def dmu_dX(self, Xnew):
""" """
@ -169,7 +172,7 @@ class BayesianGPLVM(SparseGP):
from ..plotting.matplot_dep import dim_reduction_plots from ..plotting.matplot_dep import dim_reduction_plots
return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs) return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs)
def latent_cost_and_grad(mu_S, input_dim, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2): def latent_cost_and_grad(mu_S, input_dim, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
""" """
@ -187,10 +190,10 @@ def latent_cost_and_grad(mu_S, input_dim, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2)
psi2 = kern.psi2(Z, X) psi2 = kern.psi2(Z, X)
lik = dL_dpsi0 * psi0.sum() + np.einsum('ij,kj->...', dL_dpsi1, psi1) + np.einsum('ijk,lkj->...', dL_dpsi2, psi2) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S) lik = dL_dpsi0 * psi0.sum() + np.einsum('ij,kj->...', dL_dpsi1, psi1) + np.einsum('ijk,lkj->...', dL_dpsi2, psi2) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
dLdmu, dLdS = kern.gradients_qX_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, X) dLdmu, dLdS = kern.gradients_qX_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, X)
dmu = dLdmu - mu dmu = dLdmu - mu
# dS = S0 + S1 + S2 -0.5 + .5/S # dS = S0 + S1 + S2 -0.5 + .5/S
dlnS = S * (dLdS - 0.5) + .5 dlnS = S * (dLdS - 0.5) + .5
return -lik, -np.hstack((dmu.flatten(), dlnS.flatten())) return -lik, -np.hstack((dmu.flatten(), dlnS.flatten()))

View file

@ -8,7 +8,7 @@ from base_plots import gpplot, x_frame1D, x_frame2D
from ...util.misc import param_to_array from ...util.misc import param_to_array
from ...models.gp_coregionalized_regression import GPCoregionalizedRegression from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
from scipy import sparse
def plot_fit(model, plot_limits=None, which_data_rows='all', def plot_fit(model, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[], which_data_ycols='all', fixed_inputs=[],
@ -61,11 +61,14 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs(): if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
X = model.X.mean X = model.X.mean
X_variance = param_to_array(model.X.variance) X_variance = model.X.variance
else: else:
X = model.X X = model.X
X, Y = param_to_array(X, model.Y) #X, Y = param_to_array(X, model.Y)
if hasattr(model, 'Z'): Z = param_to_array(model.Z) Y = model.Y
if sparse.issparse(Y): Y = Y.todense().view(np.ndarray)
if hasattr(model, 'Z'): Z = model.Z
#work out what the inputs are for plotting (1D or 2D) #work out what the inputs are for plotting (1D or 2D)
fixed_dims = np.array([i for i,v in fixed_inputs]) fixed_dims = np.array([i for i,v in fixed_inputs])

View file

@ -8,6 +8,7 @@ import GPy
import numpy as np import numpy as np
from GPy.core.parameterization.parameter_core import HierarchyError from GPy.core.parameterization.parameter_core import HierarchyError
from GPy.core.parameterization.observable_array import ObsAr from GPy.core.parameterization.observable_array import ObsAr
from GPy.core.parameterization.transformations import NegativeLogexp
class ArrayCoreTest(unittest.TestCase): class ArrayCoreTest(unittest.TestCase):
def setUp(self): def setUp(self):
@ -38,10 +39,25 @@ class ParameterizedTest(unittest.TestCase):
self.test1.kern = self.rbf+self.white self.test1.kern = self.rbf+self.white
self.test1.add_parameter(self.test1.kern) self.test1.add_parameter(self.test1.kern)
self.test1.add_parameter(self.param, 0) self.test1.add_parameter(self.param, 0)
# print self.test1:
#=============================================================================
# test_model. | Value | Constraint | Prior | Tied to
# param | (25L, 2L) | {0.0,1.0} | |
# add.rbf.variance | 1.0 | 0.0,1.0 +ve | |
# add.rbf.lengthscale | 1.0 | 0.0,1.0 +ve | |
# add.white.variance | 1.0 | 0.0,1.0 +ve | |
#=============================================================================
x = np.linspace(-2,6,4)[:,None] x = np.linspace(-2,6,4)[:,None]
y = np.sin(x) y = np.sin(x)
self.testmodel = GPy.models.GPRegression(x,y) self.testmodel = GPy.models.GPRegression(x,y)
# print self.testmodel:
#=============================================================================
# GP_regression. | Value | Constraint | Prior | Tied to
# rbf.variance | 1.0 | +ve | |
# rbf.lengthscale | 1.0 | +ve | |
# Gaussian_noise.variance | 1.0 | +ve | |
#=============================================================================
def test_add_parameter(self): def test_add_parameter(self):
self.assertEquals(self.rbf._parent_index_, 0) self.assertEquals(self.rbf._parent_index_, 0)
@ -142,8 +158,13 @@ class ParameterizedTest(unittest.TestCase):
self.testmodel.randomize() self.testmodel.randomize()
self.assertEqual(val, self.testmodel.kern.lengthscale) self.assertEqual(val, self.testmodel.kern.lengthscale)
def test_add_parameter_in_hierarchy(self):
from GPy.core import Param
self.test1.kern.rbf.add_parameter(Param("NEW", np.random.rand(2), NegativeLogexp()), 1)
self.assertListEqual(self.test1.constraints[NegativeLogexp()].tolist(), range(self.param.size+1, self.param.size+1 + 2))
self.assertListEqual(self.test1.constraints[GPy.transformations.Logistic(0,1)].tolist(), range(self.param.size))
self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp(0,1)].tolist(), np.r_[50, 53:55].tolist())
def test_regular_expression_misc(self): def test_regular_expression_misc(self):
self.testmodel.kern.lengthscale.fix() self.testmodel.kern.lengthscale.fix()
val = float(self.testmodel.kern.lengthscale) val = float(self.testmodel.kern.lengthscale)
@ -174,4 +195,4 @@ class ParameterizedTest(unittest.TestCase):
if __name__ == "__main__": if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.test_add_parameter'] #import sys;sys.argv = ['', 'Test.test_add_parameter']
unittest.main() unittest.main()

View file

@ -18,13 +18,12 @@ class Cacher(object):
self.operation = operation self.operation = operation
self.order = collections.deque() self.order = collections.deque()
self.cached_inputs = {} # point from cache_ids to a list of [ind_ids], which where used in cache cache_id self.cached_inputs = {} # point from cache_ids to a list of [ind_ids], which where used in cache cache_id
self.logger = logging.getLogger("cache")
#======================================================================= #=======================================================================
# point from each ind_id to [ref(obj), cache_ids] # point from each ind_id to [ref(obj), cache_ids]
# 0: a weak reference to the object itself # 0: a weak reference to the object itself
# 1: the cache_ids in which this ind_id is used (len will be how many times we have seen this ind_id) # 1: the cache_ids in which this ind_id is used (len will be how many times we have seen this ind_id)
self.cached_input_ids = {} self.cached_input_ids = {}
#======================================================================= #=======================================================================
self.cached_outputs = {} # point from cache_ids to outputs self.cached_outputs = {} # point from cache_ids to outputs
@ -36,23 +35,18 @@ class Cacher(object):
def combine_inputs(self, args, kw): def combine_inputs(self, args, kw):
"Combines the args and kw in a unique way, such that ordering of kwargs does not lead to recompute" "Combines the args and kw in a unique way, such that ordering of kwargs does not lead to recompute"
self.logger.debug("combining args and kw")
return args + tuple(c[1] for c in sorted(kw.items(), key=lambda x: x[0])) return args + tuple(c[1] for c in sorted(kw.items(), key=lambda x: x[0]))
def prepare_cache_id(self, combined_args_kw, ignore_args): def prepare_cache_id(self, combined_args_kw, ignore_args):
"get the cacheid (conc. string of argument self.ids in order) ignoring ignore_args" "get the cacheid (conc. string of argument self.ids in order) ignoring ignore_args"
cache_id = "".join(self.id(a) for i, a in enumerate(combined_args_kw) if i not in ignore_args) cache_id = "".join(self.id(a) for i, a in enumerate(combined_args_kw) if i not in ignore_args)
self.logger.debug("cache_id={} was created".format(cache_id))
return cache_id return cache_id
def ensure_cache_length(self, cache_id): def ensure_cache_length(self, cache_id):
"Ensures the cache is within its limits and has one place free" "Ensures the cache is within its limits and has one place free"
self.logger.debug("cache length gets ensured")
if len(self.order) == self.limit: if len(self.order) == self.limit:
self.logger.debug("cache limit of l={} was reached".format(self.limit))
# we have reached the limit, so lets release one element # we have reached the limit, so lets release one element
cache_id = self.order.popleft() cache_id = self.order.popleft()
self.logger.debug("cach_id '{}' gets removed".format(cache_id))
combined_args_kw = self.cached_inputs[cache_id] combined_args_kw = self.cached_inputs[cache_id]
for ind in combined_args_kw: for ind in combined_args_kw:
if ind is not None: if ind is not None:
@ -66,7 +60,6 @@ class Cacher(object):
else: else:
cache_ids.remove(cache_id) cache_ids.remove(cache_id)
self.cached_input_ids[ind_id] = [ref, cache_ids] self.cached_input_ids[ind_id] = [ref, cache_ids]
self.logger.debug("removing caches")
del self.cached_outputs[cache_id] del self.cached_outputs[cache_id]
del self.inputs_changed[cache_id] del self.inputs_changed[cache_id]
del self.cached_inputs[cache_id] del self.cached_inputs[cache_id]
@ -81,10 +74,8 @@ class Cacher(object):
if a is not None: if a is not None:
ind_id = self.id(a) ind_id = self.id(a)
v = self.cached_input_ids.get(ind_id, [weakref.ref(a), []]) v = self.cached_input_ids.get(ind_id, [weakref.ref(a), []])
self.logger.debug("cache_id '{}' gets stored".format(cache_id))
v[1].append(cache_id) v[1].append(cache_id)
if len(v[1]) == 1: if len(v[1]) == 1:
self.logger.debug("adding observer to object {}".format(repr(a)))
a.add_observer(self, self.on_cache_changed) a.add_observer(self, self.on_cache_changed)
self.cached_input_ids[ind_id] = v self.cached_input_ids[ind_id] = v
@ -108,28 +99,21 @@ class Cacher(object):
cache_id = self.prepare_cache_id(inputs, self.ignore_args) cache_id = self.prepare_cache_id(inputs, self.ignore_args)
# 2: if anything is not cachable, we will just return the operation, without caching # 2: if anything is not cachable, we will just return the operation, without caching
if reduce(lambda a, b: a or (not (isinstance(b, Observable) or b is None)), inputs, False): if reduce(lambda a, b: a or (not (isinstance(b, Observable) or b is None)), inputs, False):
self.logger.info("some inputs are not observable: returning without caching")
self.logger.debug(str(map(lambda x: isinstance(x, Observable) or x is None, inputs)))
self.logger.debug(str(map(repr, inputs)))
return self.operation(*args, **kw) return self.operation(*args, **kw)
# 3&4: check whether this cache_id has been cached, then has it changed? # 3&4: check whether this cache_id has been cached, then has it changed?
try: try:
if(self.inputs_changed[cache_id]): if(self.inputs_changed[cache_id]):
self.logger.debug("{} already seen, but inputs changed. refreshing cacher".format(cache_id))
# 4: This happens, when elements have changed for this cache self.id # 4: This happens, when elements have changed for this cache self.id
self.inputs_changed[cache_id] = False self.inputs_changed[cache_id] = False
self.cached_outputs[cache_id] = self.operation(*args, **kw) self.cached_outputs[cache_id] = self.operation(*args, **kw)
except KeyError: except KeyError:
self.logger.info("{} never seen, creating cache entry".format(cache_id))
# 3: This is when we never saw this chache_id: # 3: This is when we never saw this chache_id:
self.ensure_cache_length(cache_id) self.ensure_cache_length(cache_id)
self.add_to_cache(cache_id, inputs, self.operation(*args, **kw)) self.add_to_cache(cache_id, inputs, self.operation(*args, **kw))
except: except:
self.logger.error("an error occurred while trying to run caching for {}, resetting".format(cache_id))
self.reset() self.reset()
raise raise
# 5: We have seen this cache_id and it is cached: # 5: We have seen this cache_id and it is cached:
self.logger.info("returning cache {}".format(cache_id))
return self.cached_outputs[cache_id] return self.cached_outputs[cache_id]
def on_cache_changed(self, direct, which=None): def on_cache_changed(self, direct, which=None):
@ -143,7 +127,6 @@ class Cacher(object):
ind_id = self.id(what) ind_id = self.id(what)
_, cache_ids = self.cached_input_ids.get(ind_id, [None, []]) _, cache_ids = self.cached_input_ids.get(ind_id, [None, []])
for cache_id in cache_ids: for cache_id in cache_ids:
self.logger.info("callback from {} changed inputs from {}".format(ind_id, self.inputs_changed[cache_id]))
self.inputs_changed[cache_id] = True self.inputs_changed[cache_id] = True
def reset(self): def reset(self):

View file

@ -51,7 +51,7 @@ if not (on_rtd):
json_data=open(path).read() json_data=open(path).read()
football_dict = json.loads(json_data) football_dict = json.loads(json_data)
def prompt_user(prompt): def prompt_user(prompt):
"""Ask user for agreeing to data set licenses.""" """Ask user for agreeing to data set licenses."""
@ -128,14 +128,14 @@ def download_url(url, store_directory, save_name = None, messages = True, suffix
f.write(buff) f.write(buff)
sys.stdout.write(" "*(len(status)) + "\r") sys.stdout.write(" "*(len(status)) + "\r")
if file_size: if file_size:
status = r"[{perc: <{ll}}] {dl:7.3f}/{full:.3f}MB".format(dl=file_size_dl/(1048576.), status = r"[{perc: <{ll}}] {dl:7.3f}/{full:.3f}MB".format(dl=file_size_dl/(1048576.),
full=file_size/(1048576.), ll=line_length, full=file_size/(1048576.), ll=line_length,
perc="="*int(line_length*float(file_size_dl)/file_size)) perc="="*int(line_length*float(file_size_dl)/file_size))
else: else:
status = r"[{perc: <{ll}}] {dl:7.3f}MB".format(dl=file_size_dl/(1048576.), status = r"[{perc: <{ll}}] {dl:7.3f}MB".format(dl=file_size_dl/(1048576.),
ll=line_length, ll=line_length,
perc="."*int(line_length*float(file_size_dl/(10*1048576.)))) perc="."*int(line_length*float(file_size_dl/(10*1048576.))))
sys.stdout.write(status) sys.stdout.write(status)
sys.stdout.flush() sys.stdout.flush()
sys.stdout.write(" "*(len(status)) + "\r") sys.stdout.write(" "*(len(status)) + "\r")
@ -320,7 +320,7 @@ def della_gatta_TRP63_gene_expression(data_set='della_gatta', gene_number=None):
Y = Y[:, None] Y = Y[:, None]
return data_details_return({'X': X, 'Y': Y, 'gene_number' : gene_number}, data_set) return data_details_return({'X': X, 'Y': Y, 'gene_number' : gene_number}, data_set)
def football_data(season='1314', data_set='football_data'): def football_data(season='1314', data_set='football_data'):
"""Football data from English games since 1993. This downloads data from football-data.co.uk for the given season. """ """Football data from English games since 1993. This downloads data from football-data.co.uk for the given season. """
@ -385,7 +385,7 @@ def spellman_yeast(data_set='spellman_yeast'):
Y = read_csv(filename, header=0, index_col=0, sep='\t') Y = read_csv(filename, header=0, index_col=0, sep='\t')
return data_details_return({'Y': Y}, data_set) return data_details_return({'Y': Y}, data_set)
def spellman_yeast_cdc(data_set='spellman_yeast'): def spellman_yeast_cdc15(data_set='spellman_yeast'):
if not data_available(data_set): if not data_available(data_set):
download_data(data_set) download_data(data_set)
from pandas import read_csv from pandas import read_csv
@ -405,12 +405,13 @@ def lee_yeast_ChIP(data_set='lee_yeast_ChIP'):
import zipfile import zipfile
dir_path = os.path.join(data_path, data_set) dir_path = os.path.join(data_path, data_set)
filename = os.path.join(dir_path, 'binding_by_gene.tsv') filename = os.path.join(dir_path, 'binding_by_gene.tsv')
X = read_csv(filename, header=1, index_col=0, sep='\t') S = read_csv(filename, header=1, index_col=0, sep='\t')
transcription_factors = [col for col in X.columns if col[:7] != 'Unnamed'] transcription_factors = [col for col in S.columns if col[:7] != 'Unnamed']
annotations = X[['Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3']] annotations = S[['Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3']]
X = X[transcription_factors] S = S[transcription_factors]
return data_details_return({'annotations' : annotations, 'X' : X, 'transcription_factors': transcription_factors}, data_set) return data_details_return({'annotations' : annotations, 'Y' : S, 'transcription_factors': transcription_factors}, data_set)
def fruitfly_tomancak(data_set='fruitfly_tomancak', gene_number=None): def fruitfly_tomancak(data_set='fruitfly_tomancak', gene_number=None):
if not data_available(data_set): if not data_available(data_set):
@ -424,7 +425,7 @@ def fruitfly_tomancak(data_set='fruitfly_tomancak', gene_number=None):
xt = np.linspace(0, num_time-1, num_time) xt = np.linspace(0, num_time-1, num_time)
xr = np.linspace(0, num_repeats-1, num_repeats) xr = np.linspace(0, num_repeats-1, num_repeats)
xtime, xrepeat = np.meshgrid(xt, xr) xtime, xrepeat = np.meshgrid(xt, xr)
X = np.vstack((xtime.flatten(), xrepeat.flatten())).T X = np.vstack((xtime.flatten(), xrepeat.flatten())).T
return data_details_return({'X': X, 'Y': Y, 'gene_number' : gene_number}, data_set) return data_details_return({'X': X, 'Y': Y, 'gene_number' : gene_number}, data_set)
def drosophila_protein(data_set='drosophila_protein'): def drosophila_protein(data_set='drosophila_protein'):
@ -466,7 +467,7 @@ def google_trends(query_terms=['big data', 'machine learning', 'data science'],
"""Data downloaded from Google trends for given query terms. Warning, if you use this function multiple times in a row you get blocked due to terms of service violations. The function will cache the result of your query, if you wish to refresh an old query set refresh_data to True. The function is inspired by this notebook: http://nbviewer.ipython.org/github/sahuguet/notebooks/blob/master/GoogleTrends%20meet%20Notebook.ipynb""" """Data downloaded from Google trends for given query terms. Warning, if you use this function multiple times in a row you get blocked due to terms of service violations. The function will cache the result of your query, if you wish to refresh an old query set refresh_data to True. The function is inspired by this notebook: http://nbviewer.ipython.org/github/sahuguet/notebooks/blob/master/GoogleTrends%20meet%20Notebook.ipynb"""
query_terms.sort() query_terms.sort()
import pandas import pandas
# Create directory name for data # Create directory name for data
dir_path = os.path.join(data_path,'google_trends') dir_path = os.path.join(data_path,'google_trends')
if not os.path.isdir(dir_path): if not os.path.isdir(dir_path):
@ -513,9 +514,9 @@ def google_trends(query_terms=['big data', 'machine learning', 'data science'],
X = np.asarray([(row, i) for i in range(terms) for row in df.index]) X = np.asarray([(row, i) for i in range(terms) for row in df.index])
Y = np.asarray([[df.ix[row][query_terms[i]]] for i in range(terms) for row in df.index ]) Y = np.asarray([[df.ix[row][query_terms[i]]] for i in range(terms) for row in df.index ])
output_info = columns[1:] output_info = columns[1:]
return data_details_return({'data frame' : df, 'X': X, 'Y': Y, 'query_terms': output_info, 'info': "Data downloaded from google trends with query terms: " + ', '.join(output_info) + '.'}, data_set) return data_details_return({'data frame' : df, 'X': X, 'Y': Y, 'query_terms': output_info, 'info': "Data downloaded from google trends with query terms: " + ', '.join(output_info) + '.'}, data_set)
# The data sets # The data sets
def oil(data_set='three_phase_oil_flow'): def oil(data_set='three_phase_oil_flow'):
"""The three phase oil data from Bishop and James (1993).""" """The three phase oil data from Bishop and James (1993)."""
@ -646,7 +647,7 @@ def decampos_digits(data_set='decampos_characters', which_digits=[0,1,2,3,4,5,6,
lbls = np.array([[l]*num_samples for l in which_digits]).reshape(Y.shape[0], 1) lbls = np.array([[l]*num_samples for l in which_digits]).reshape(Y.shape[0], 1)
str_lbls = np.array([[str(l)]*num_samples for l in which_digits]) str_lbls = np.array([[str(l)]*num_samples for l in which_digits])
return data_details_return({'Y': Y, 'lbls': lbls, 'str_lbls' : str_lbls, 'info': 'Digits data set from the de Campos characters data'}, data_set) return data_details_return({'Y': Y, 'lbls': lbls, 'str_lbls' : str_lbls, 'info': 'Digits data set from the de Campos characters data'}, data_set)
def ripley_synth(data_set='ripley_prnn_data'): def ripley_synth(data_set='ripley_prnn_data'):
if not data_available(data_set): if not data_available(data_set):
download_data(data_set) download_data(data_set)
@ -673,7 +674,7 @@ def mauna_loa(data_set='mauna_loa', num_train=545, refresh_data=False):
Y = allY[:num_train, 0:1] Y = allY[:num_train, 0:1]
Ytest = allY[num_train:, 0:1] Ytest = allY[num_train:, 0:1]
return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Mauna Loa data with " + str(num_train) + " values used as training points."}, data_set) return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Mauna Loa data with " + str(num_train) + " values used as training points."}, data_set)
def boxjenkins_airline(data_set='boxjenkins_airline', num_train=96): def boxjenkins_airline(data_set='boxjenkins_airline', num_train=96):
path = os.path.join(data_path, data_set) path = os.path.join(data_path, data_set)
@ -685,7 +686,7 @@ def boxjenkins_airline(data_set='boxjenkins_airline', num_train=96):
Xtest = data[num_train:, 0:1] Xtest = data[num_train:, 0:1]
Ytest = data[num_train:, 1:2] Ytest = data[num_train:, 1:2]
return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Montly airline passenger data from Box & Jenkins 1976."}, data_set) return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Montly airline passenger data from Box & Jenkins 1976."}, data_set)
def osu_run1(data_set='osu_run1', sample_every=4): def osu_run1(data_set='osu_run1', sample_every=4):
path = os.path.join(data_path, data_set) path = os.path.join(data_path, data_set)
@ -724,7 +725,7 @@ def hapmap3(data_set='hapmap3'):
\ -1, iff SNPij==(B2,B2) \ -1, iff SNPij==(B2,B2)
The SNP data and the meta information (such as iid, sex and phenotype) are The SNP data and the meta information (such as iid, sex and phenotype) are
stored in the dataframe datadf, index is the Individual ID, stored in the dataframe datadf, index is the Individual ID,
with following columns for metainfo: with following columns for metainfo:
* family_id -> Family ID * family_id -> Family ID
@ -797,7 +798,7 @@ def hapmap3(data_set='hapmap3'):
status=write_status('unpacking...', curr, status) status=write_status('unpacking...', curr, status)
os.remove(filepath) os.remove(filepath)
status=write_status('reading .ped...', 25, status) status=write_status('reading .ped...', 25, status)
# Preprocess data: # Preprocess data:
snpstrnp = np.loadtxt(unpacked_files[0], dtype=str) snpstrnp = np.loadtxt(unpacked_files[0], dtype=str)
status=write_status('reading .map...', 33, status) status=write_status('reading .map...', 33, status)
mapnp = np.loadtxt(unpacked_files[1], dtype=str) mapnp = np.loadtxt(unpacked_files[1], dtype=str)
@ -958,7 +959,7 @@ def olivetti_glasses(data_set='olivetti_glasses', num_training=200, seed=default
Y = y[index[:num_training],:] Y = y[index[:num_training],:]
Ytest = y[index[num_training:]] Ytest = y[index[num_training:]]
return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'seed' : seed, 'info': "ORL Faces with labels identifiying who is wearing glasses and who isn't. Data is randomly partitioned according to given seed. Presence or absence of glasses was labelled by James Hensman."}, 'olivetti_faces') return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'seed' : seed, 'info': "ORL Faces with labels identifiying who is wearing glasses and who isn't. Data is randomly partitioned according to given seed. Presence or absence of glasses was labelled by James Hensman."}, 'olivetti_faces')
def olivetti_faces(data_set='olivetti_faces'): def olivetti_faces(data_set='olivetti_faces'):
path = os.path.join(data_path, data_set) path = os.path.join(data_path, data_set)
if not data_available(data_set): if not data_available(data_set):
@ -971,7 +972,8 @@ def olivetti_faces(data_set='olivetti_faces'):
for subject in range(40): for subject in range(40):
for image in range(10): for image in range(10):
image_path = os.path.join(path, 'orl_faces', 's'+str(subject+1), str(image+1) + '.pgm') image_path = os.path.join(path, 'orl_faces', 's'+str(subject+1), str(image+1) + '.pgm')
Y.append(GPy.util.netpbmfile.imread(image_path).flatten()) from GPy.util import netpbmfile
Y.append(netpbmfile.imread(image_path).flatten())
lbls.append(subject) lbls.append(subject)
Y = np.asarray(Y) Y = np.asarray(Y)
lbls = np.asarray(lbls)[:, None] lbls = np.asarray(lbls)[:, None]
@ -1194,7 +1196,7 @@ def cifar10_patches(data_set='cifar-10'):
for x in range(0,32-5,5): for x in range(0,32-5,5):
for y in range(0,32-5,5): for y in range(0,32-5,5):
patches = np.concatenate((patches, images[:,x:x+5,y:y+5,:]), axis=0) patches = np.concatenate((patches, images[:,x:x+5,y:y+5,:]), axis=0)
patches = patches.reshape((patches.shape[0],-1)) patches = patches.reshape((patches.shape[0],-1))
return data_details_return({'Y': patches, "info" : "32x32 pixel patches extracted from the CIFAR-10 data by Boris Babenko to demonstrate k-means features."}, data_set) return data_details_return({'Y': patches, "info" : "32x32 pixel patches extracted from the CIFAR-10 data by Boris Babenko to demonstrate k-means features."}, data_set)
def cmu_mocap_49_balance(data_set='cmu_mocap'): def cmu_mocap_49_balance(data_set='cmu_mocap'):

View file

@ -16,8 +16,8 @@ def initialize_latent(init, input_dim, Y):
var = p.fracs[:input_dim] var = p.fracs[:input_dim]
else: else:
var = Xr.var(0) var = Xr.var(0)
Xr -= Xr.mean(0) Xr -= Xr.mean(0)
Xr /= Xr.var(0) Xr /= Xr.std(0)
return Xr, var/var.max() return Xr, var/var.max()

View file

@ -16,13 +16,17 @@ import warnings
import os import os
from config import * from config import *
if np.all(np.float64((scipy.__version__).split('.')[:2]) >= np.array([0, 12])): _scipyversion = np.float64((scipy.__version__).split('.')[:2])
_fix_dpotri_scipy_bug = True
if np.all(_scipyversion >= np.array([0, 14])):
from scipy.linalg import lapack
_fix_dpotri_scipy_bug = False
elif np.all(_scipyversion >= np.array([0, 12])):
#import scipy.linalg.lapack.clapack as lapack #import scipy.linalg.lapack.clapack as lapack
from scipy.linalg import lapack from scipy.linalg import lapack
else: else:
from scipy.linalg.lapack import flapack as lapack from scipy.linalg.lapack import flapack as lapack
if config.getboolean('anaconda', 'installed') and config.getboolean('anaconda', 'MKL'): if config.getboolean('anaconda', 'installed') and config.getboolean('anaconda', 'MKL'):
try: try:
anaconda_path = str(config.get('anaconda', 'location')) anaconda_path = str(config.get('anaconda', 'location'))
@ -30,6 +34,7 @@ if config.getboolean('anaconda', 'installed') and config.getboolean('anaconda',
dsyrk = mkl_rt.dsyrk dsyrk = mkl_rt.dsyrk
dsyr = mkl_rt.dsyr dsyr = mkl_rt.dsyr
_blas_available = True _blas_available = True
print 'anaconda installed and mkl is loaded'
except: except:
_blas_available = False _blas_available = False
else: else:
@ -141,16 +146,23 @@ def dpotrs(A, B, lower=1):
def dpotri(A, lower=1): def dpotri(A, lower=1):
""" """
Wrapper for lapack dpotri function Wrapper for lapack dpotri function
DPOTRI - compute the inverse of a real symmetric positive
definite matrix A using the Cholesky factorization A =
U**T*U or A = L*L**T computed by DPOTRF
:param A: Matrix A :param A: Matrix A
:param lower: is matrix lower (true) or upper (false) :param lower: is matrix lower (true) or upper (false)
:returns: A inverse :returns: A inverse
""" """
assert lower==1, "scipy linalg behaviour is very weird. please use lower, fortran ordered arrays" if _fix_dpotri_scipy_bug:
assert lower==1, "scipy linalg behaviour is very weird. please use lower, fortran ordered arrays"
lower = 0
A = force_F_ordered(A) A = force_F_ordered(A)
R, info = lapack.dpotri(A, lower=0) R, info = lapack.dpotri(A, lower=lower) #needs to be zero here, seems to be a scipy bug
symmetrify(R) symmetrify(R)
return R, info return R, info
@ -217,7 +229,7 @@ def pdinv(A, *args):
L = jitchol(A, *args) L = jitchol(A, *args)
logdet = 2.*np.sum(np.log(np.diag(L))) logdet = 2.*np.sum(np.log(np.diag(L)))
Li = dtrtri(L) Li = dtrtri(L)
Ai, _ = lapack.dpotri(L) Ai, _ = dpotri(L, lower=1)
# Ai = np.tril(Ai) + np.tril(Ai,-1).T # Ai = np.tril(Ai) + np.tril(Ai,-1).T
symmetrify(Ai) symmetrify(Ai)

331
GPy/util/netpbmfile.py Normal file
View file

@ -0,0 +1,331 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# netpbmfile.py
# Copyright (c) 2011-2013, Christoph Gohlke
# Copyright (c) 2011-2013, The Regents of the University of California
# Produced at the Laboratory for Fluorescence Dynamics.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the copyright holders nor the names of any
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""Read and write image data from respectively to Netpbm files.
This implementation follows the Netpbm format specifications at
http://netpbm.sourceforge.net/doc/. No gamma correction is performed.
The following image formats are supported: PBM (bi-level), PGM (grayscale),
PPM (color), PAM (arbitrary), XV thumbnail (RGB332, read-only).
:Author:
`Christoph Gohlke <http://www.lfd.uci.edu/~gohlke/>`_
:Organization:
Laboratory for Fluorescence Dynamics, University of California, Irvine
:Version: 2013.01.18
Requirements
------------
* `CPython 2.7, 3.2 or 3.3 <http://www.python.org>`_
* `Numpy 1.7 <http://www.numpy.org>`_
* `Matplotlib 1.2 <http://www.matplotlib.org>`_ (optional for plotting)
Examples
--------
>>> im1 = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16)
>>> imsave('_tmp.pgm', im1)
>>> im2 = imread('_tmp.pgm')
>>> assert numpy.all(im1 == im2)
"""
from __future__ import division, print_function
import sys
import re
import math
from copy import deepcopy
import numpy
__version__ = '2013.01.18'
__docformat__ = 'restructuredtext en'
__all__ = ['imread', 'imsave', 'NetpbmFile']
def imread(filename, *args, **kwargs):
"""Return image data from Netpbm file as numpy array.
`args` and `kwargs` are arguments to NetpbmFile.asarray().
Examples
--------
>>> image = imread('_tmp.pgm')
"""
try:
netpbm = NetpbmFile(filename)
image = netpbm.asarray()
finally:
netpbm.close()
return image
def imsave(filename, data, maxval=None, pam=False):
"""Write image data to Netpbm file.
Examples
--------
>>> image = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16)
>>> imsave('_tmp.pgm', image)
"""
try:
netpbm = NetpbmFile(data, maxval=maxval)
netpbm.write(filename, pam=pam)
finally:
netpbm.close()
class NetpbmFile(object):
"""Read and write Netpbm PAM, PBM, PGM, PPM, files."""
_types = {b'P1': b'BLACKANDWHITE', b'P2': b'GRAYSCALE', b'P3': b'RGB',
b'P4': b'BLACKANDWHITE', b'P5': b'GRAYSCALE', b'P6': b'RGB',
b'P7 332': b'RGB', b'P7': b'RGB_ALPHA'}
def __init__(self, arg=None, **kwargs):
"""Initialize instance from filename, open file, or numpy array."""
for attr in ('header', 'magicnum', 'width', 'height', 'maxval',
'depth', 'tupltypes', '_filename', '_fh', '_data'):
setattr(self, attr, None)
if arg is None:
self._fromdata([], **kwargs)
elif isinstance(arg, basestring):
self._fh = open(arg, 'rb')
self._filename = arg
self._fromfile(self._fh, **kwargs)
elif hasattr(arg, 'seek'):
self._fromfile(arg, **kwargs)
self._fh = arg
else:
self._fromdata(arg, **kwargs)
def asarray(self, copy=True, cache=False, **kwargs):
"""Return image data from file as numpy array."""
data = self._data
if data is None:
data = self._read_data(self._fh, **kwargs)
if cache:
self._data = data
else:
return data
return deepcopy(data) if copy else data
def write(self, arg, **kwargs):
"""Write instance to file."""
if hasattr(arg, 'seek'):
self._tofile(arg, **kwargs)
else:
with open(arg, 'wb') as fid:
self._tofile(fid, **kwargs)
def close(self):
"""Close open file. Future asarray calls might fail."""
if self._filename and self._fh:
self._fh.close()
self._fh = None
def __del__(self):
self.close()
def _fromfile(self, fh):
"""Initialize instance from open file."""
fh.seek(0)
data = fh.read(4096)
if (len(data) < 7) or not (b'0' < data[1:2] < b'8'):
raise ValueError("Not a Netpbm file:\n%s" % data[:32])
try:
self._read_pam_header(data)
except Exception:
try:
self._read_pnm_header(data)
except Exception:
raise ValueError("Not a Netpbm file:\n%s" % data[:32])
def _read_pam_header(self, data):
"""Read PAM header and initialize instance."""
regroups = re.search(
b"(^P7[\n\r]+(?:(?:[\n\r]+)|(?:#.*)|"
b"(HEIGHT\s+\d+)|(WIDTH\s+\d+)|(DEPTH\s+\d+)|(MAXVAL\s+\d+)|"
b"(?:TUPLTYPE\s+\w+))*ENDHDR\n)", data).groups()
self.header = regroups[0]
self.magicnum = b'P7'
for group in regroups[1:]:
key, value = group.split()
setattr(self, unicode(key).lower(), int(value))
matches = re.findall(b"(TUPLTYPE\s+\w+)", self.header)
self.tupltypes = [s.split(None, 1)[1] for s in matches]
def _read_pnm_header(self, data):
"""Read PNM header and initialize instance."""
bpm = data[1:2] in b"14"
regroups = re.search(b"".join((
b"(^(P[123456]|P7 332)\s+(?:#.*[\r\n])*",
b"\s*(\d+)\s+(?:#.*[\r\n])*",
b"\s*(\d+)\s+(?:#.*[\r\n])*" * (not bpm),
b"\s*(\d+)\s(?:\s*#.*[\r\n]\s)*)")), data).groups() + (1, ) * bpm
self.header = regroups[0]
self.magicnum = regroups[1]
self.width = int(regroups[2])
self.height = int(regroups[3])
self.maxval = int(regroups[4])
self.depth = 3 if self.magicnum in b"P3P6P7 332" else 1
self.tupltypes = [self._types[self.magicnum]]
def _read_data(self, fh, byteorder='>'):
"""Return image data from open file as numpy array."""
fh.seek(len(self.header))
data = fh.read()
dtype = 'u1' if self.maxval < 256 else byteorder + 'u2'
depth = 1 if self.magicnum == b"P7 332" else self.depth
shape = [-1, self.height, self.width, depth]
size = numpy.prod(shape[1:])
if self.magicnum in b"P1P2P3":
data = numpy.array(data.split(None, size)[:size], dtype)
data = data.reshape(shape)
elif self.maxval == 1:
shape[2] = int(math.ceil(self.width / 8))
data = numpy.frombuffer(data, dtype).reshape(shape)
data = numpy.unpackbits(data, axis=-2)[:, :, :self.width, :]
else:
data = numpy.frombuffer(data, dtype)
data = data[:size * (data.size // size)].reshape(shape)
if data.shape[0] < 2:
data = data.reshape(data.shape[1:])
if data.shape[-1] < 2:
data = data.reshape(data.shape[:-1])
if self.magicnum == b"P7 332":
rgb332 = numpy.array(list(numpy.ndindex(8, 8, 4)), numpy.uint8)
rgb332 *= [36, 36, 85]
data = numpy.take(rgb332, data, axis=0)
return data
def _fromdata(self, data, maxval=None):
"""Initialize instance from numpy array."""
data = numpy.array(data, ndmin=2, copy=True)
if data.dtype.kind not in "uib":
raise ValueError("not an integer type: %s" % data.dtype)
if data.dtype.kind == 'i' and numpy.min(data) < 0:
raise ValueError("data out of range: %i" % numpy.min(data))
if maxval is None:
maxval = numpy.max(data)
maxval = 255 if maxval < 256 else 65535
if maxval < 0 or maxval > 65535:
raise ValueError("data out of range: %i" % maxval)
data = data.astype('u1' if maxval < 256 else '>u2')
self._data = data
if data.ndim > 2 and data.shape[-1] in (3, 4):
self.depth = data.shape[-1]
self.width = data.shape[-2]
self.height = data.shape[-3]
self.magicnum = b'P7' if self.depth == 4 else b'P6'
else:
self.depth = 1
self.width = data.shape[-1]
self.height = data.shape[-2]
self.magicnum = b'P5' if maxval > 1 else b'P4'
self.maxval = maxval
self.tupltypes = [self._types[self.magicnum]]
self.header = self._header()
def _tofile(self, fh, pam=False):
"""Write Netbm file."""
fh.seek(0)
fh.write(self._header(pam))
data = self.asarray(copy=False)
if self.maxval == 1:
data = numpy.packbits(data, axis=-1)
data.tofile(fh)
def _header(self, pam=False):
"""Return file header as byte string."""
if pam or self.magicnum == b'P7':
header = "\n".join((
"P7",
"HEIGHT %i" % self.height,
"WIDTH %i" % self.width,
"DEPTH %i" % self.depth,
"MAXVAL %i" % self.maxval,
"\n".join("TUPLTYPE %s" % unicode(i) for i in self.tupltypes),
"ENDHDR\n"))
elif self.maxval == 1:
header = "P4 %i %i\n" % (self.width, self.height)
elif self.depth == 1:
header = "P5 %i %i %i\n" % (self.width, self.height, self.maxval)
else:
header = "P6 %i %i %i\n" % (self.width, self.height, self.maxval)
if sys.version_info[0] > 2:
header = bytes(header, 'ascii')
return header
def __str__(self):
"""Return information about instance."""
return unicode(self.header)
if sys.version_info[0] > 2:
basestring = str
unicode = lambda x: str(x, 'ascii')
if __name__ == "__main__":
# Show images specified on command line or all images in current directory
from glob import glob
from matplotlib import pyplot
files = sys.argv[1:] if len(sys.argv) > 1 else glob('*.p*m')
for fname in files:
try:
pam = NetpbmFile(fname)
img = pam.asarray(copy=False)
if False:
pam.write('_tmp.pgm.out', pam=True)
img2 = imread('_tmp.pgm.out')
assert numpy.all(img == img2)
imsave('_tmp.pgm.out', img)
img2 = imread('_tmp.pgm.out')
assert numpy.all(img == img2)
pam.close()
except ValueError as e:
print(fname, e)
continue
_shape = img.shape
if img.ndim > 3 or (img.ndim > 2 and img.shape[-1] not in (3, 4)):
img = img[0]
cmap = 'gray' if pam.maxval > 1 else 'binary'
pyplot.imshow(img, cmap, interpolation='nearest')
pyplot.title("%s %s %s %s" % (fname, unicode(pam.magicnum),
_shape, img.dtype))
pyplot.show()

View file

@ -16,13 +16,13 @@ def common_subarrays(X, axis=0):
for the subarray in X, where index is the index to the remaining axis. for the subarray in X, where index is the index to the remaining axis.
:param :class:`np.ndarray` X: 2d array to check for common subarrays in :param :class:`np.ndarray` X: 2d array to check for common subarrays in
:param int axis: axis to apply subarray detection over. :param int axis: axis to apply subarray detection over.
When the index is 0, compare rows -- columns, otherwise. When the index is 0, compare rows -- columns, otherwise.
Examples: Examples:
========= =========
In a 2d array: In a 2d array:
>>> import numpy as np >>> import numpy as np
>>> X = np.zeros((3,6), dtype=bool) >>> X = np.zeros((3,6), dtype=bool)
>>> X[[1,1,1],[0,4,5]] = 1; X[1:,[2,3]] = 1 >>> X[[1,1,1],[0,4,5]] = 1; X[1:,[2,3]] = 1
@ -48,14 +48,10 @@ def common_subarrays(X, axis=0):
assert X.ndim == 2 and axis in (0,1), "Only implemented for 2D arrays" assert X.ndim == 2 and axis in (0,1), "Only implemented for 2D arrays"
subarrays = defaultdict(list) subarrays = defaultdict(list)
cnt = count() cnt = count()
logger = logging.getLogger("common_subarrays")
def accumulate(x, s, c): def accumulate(x, s, c):
logger.debug("creating tuple")
t = tuple(x) t = tuple(x)
logger.debug("tuple done")
col = c.next() col = c.next()
iadd(s[t], [col]) iadd(s[t], [col])
logger.debug("added col {}".format(col))
return None return None
if axis == 0: [accumulate(x, subarrays, cnt) for x in X] if axis == 0: [accumulate(x, subarrays, cnt) for x in X]
else: [accumulate(x, subarrays, cnt) for x in X.T] else: [accumulate(x, subarrays, cnt) for x in X.T]
@ -63,4 +59,4 @@ def common_subarrays(X, axis=0):
if __name__ == '__main__': if __name__ == '__main__':
import doctest import doctest
doctest.testmod() doctest.testmod()

View file

@ -40,6 +40,37 @@ def std_norm_cdf(x):
weave.inline(code, arg_names=['x', 'cdf_x', 'N'], support_code=support_code) weave.inline(code, arg_names=['x', 'cdf_x', 'N'], support_code=support_code)
return cdf_x return cdf_x
def std_norm_cdf_np(x):
"""
Cumulative standard Gaussian distribution
Based on Abramowitz, M. and Stegun, I. (1970)
Around 3 times slower when x is a scalar otherwise quite a lot slower
"""
x_shape = np.asarray(x).shape
if len(x_shape) == 0 or x_shape[0] == 1:
sign = np.sign(x)
x *= sign
x /= np.sqrt(2.)
t = 1.0/(1.0 + 0.3275911*x)
erf = 1. - np.exp(-x**2)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))))
cdf_x = 0.5*(1.0 + sign*erf)
return cdf_x
else:
x = np.atleast_1d(x).copy()
cdf_x = np.zeros_like(x)
sign = np.ones_like(x)
neg_x_ind = x<0
sign[neg_x_ind] = -1.0
x[neg_x_ind] = -x[neg_x_ind]
x /= np.sqrt(2.)
t = 1.0/(1.0 + 0.3275911*x)
erf = 1. - np.exp(-x**2)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))))
cdf_x = 0.5*(1.0 + sign*erf)
cdf_x = cdf_x.reshape(x_shape)
return cdf_x
def inv_std_norm_cdf(x): def inv_std_norm_cdf(x):
""" """
Inverse cumulative standard Gaussian distribution Inverse cumulative standard Gaussian distribution