Merge branch 'devel' of https://github.com/SheffieldML/GPy into devel

This commit is contained in:
Neil Lawrence 2014-07-24 20:55:23 +01:00
commit 1c9eb270bf
21 changed files with 693 additions and 279 deletions

View file

@ -12,6 +12,10 @@ from .. import likelihoods
from ..likelihoods.gaussian import Gaussian from ..likelihoods.gaussian import Gaussian
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation, LatentFunctionInference from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation, LatentFunctionInference
from parameterization.variational import VariationalPosterior from parameterization.variational import VariationalPosterior
from scipy.sparse.base import issparse
import logging
logger = logging.getLogger("GP")
class GP(Model): class GP(Model):
""" """
@ -34,12 +38,14 @@ class GP(Model):
assert X.ndim == 2 assert X.ndim == 2
if isinstance(X, (ObsAr, VariationalPosterior)): if isinstance(X, (ObsAr, VariationalPosterior)):
self.X = X.copy() self.X = X.copy()
else: self.X = ObsAr(X.copy()) else: self.X = ObsAr(X)
self.num_data, self.input_dim = self.X.shape self.num_data, self.input_dim = self.X.shape
assert Y.ndim == 2 assert Y.ndim == 2
self.Y = ObsAr(Y.copy()) logger.info("initializing Y")
if issparse(Y): self.Y = Y
else: self.Y = ObsAr(Y)
assert Y.shape[0] == self.num_data assert Y.shape[0] == self.num_data
_, self.output_dim = self.Y.shape _, self.output_dim = self.Y.shape
@ -54,6 +60,7 @@ class GP(Model):
self.likelihood = likelihood self.likelihood = likelihood
#find a sensible inference method #find a sensible inference method
logger.info("initializing inference method")
if inference_method is None: if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian) or isinstance(likelihood, likelihoods.MixedNoise): if isinstance(likelihood, likelihoods.Gaussian) or isinstance(likelihood, likelihoods.MixedNoise):
inference_method = exact_gaussian_inference.ExactGaussianInference() inference_method = exact_gaussian_inference.ExactGaussianInference()
@ -62,6 +69,7 @@ class GP(Model):
print "defaulting to ", inference_method, "for latent function inference" print "defaulting to ", inference_method, "for latent function inference"
self.inference_method = inference_method self.inference_method = inference_method
logger.info("adding kernel and likelihood as parameters")
self.add_parameter(self.kern) self.add_parameter(self.kern)
self.add_parameter(self.likelihood) self.add_parameter(self.likelihood)
@ -199,9 +207,9 @@ class GP(Model):
if fillcol is not None: if fillcol is not None:
kw['fillcol'] = fillcol kw['fillcol'] = fillcol
return models_plots.plot_fit(self, plot_limits, which_data_rows, return models_plots.plot_fit(self, plot_limits, which_data_rows,
which_data_ycols, fixed_inputs, which_data_ycols, fixed_inputs,
levels, samples, fignum, ax, resolution, levels, samples, fignum, ax, resolution,
plot_raw=plot_raw, Y_metadata=Y_metadata, plot_raw=plot_raw, Y_metadata=Y_metadata,
data_symbol=data_symbol, **kw) data_symbol=data_symbol, **kw)
def plot(self, plot_limits=None, which_data_rows='all', def plot(self, plot_limits=None, which_data_rows='all',
@ -250,9 +258,9 @@ class GP(Model):
if fillcol is not None: if fillcol is not None:
kw['fillcol'] = fillcol kw['fillcol'] = fillcol
return models_plots.plot_fit(self, plot_limits, which_data_rows, return models_plots.plot_fit(self, plot_limits, which_data_rows,
which_data_ycols, fixed_inputs, which_data_ycols, fixed_inputs,
levels, samples, fignum, ax, resolution, levels, samples, fignum, ax, resolution,
plot_raw=plot_raw, Y_metadata=Y_metadata, plot_raw=plot_raw, Y_metadata=Y_metadata,
data_symbol=data_symbol, **kw) data_symbol=data_symbol, **kw)
def input_sensitivity(self): def input_sensitivity(self):
@ -281,4 +289,4 @@ class GP(Model):
except KeyboardInterrupt: except KeyboardInterrupt:
print "KeyboardInterrupt caught, calling on_optimization_end() to round things up" print "KeyboardInterrupt caught, calling on_optimization_end() to round things up"
self.inference_method.on_optimization_end() self.inference_method.on_optimization_end()
raise raise

View file

@ -61,7 +61,7 @@ class Model(Parameterized):
on the current machine. on the current machine.
""" """
initial_parameters = self.optimizer_array initial_parameters = self.optimizer_array.copy()
if parallel: if parallel:
try: try:
@ -97,9 +97,9 @@ class Model(Parameterized):
if len(self.optimization_runs): if len(self.optimization_runs):
i = np.argmin([o.f_opt for o in self.optimization_runs]) i = np.argmin([o.f_opt for o in self.optimization_runs])
self._set_params_transformed(self.optimization_runs[i].x_opt) self.optimizer_array = self.optimization_runs[i].x_opt
else: else:
self._set_params_transformed(initial_parameters) self.optimizer_array = initial_parameters
def ensure_default_constraints(self, warning=True): def ensure_default_constraints(self, warning=True):
""" """
@ -118,12 +118,12 @@ class Model(Parameterized):
""" """
The objective function for the given algorithm. The objective function for the given algorithm.
This function is the true objective, which wants to be minimized. This function is the true objective, which wants to be minimized.
Note that all parameters are already set and in place, so you just need Note that all parameters are already set and in place, so you just need
to return the objective function here. to return the objective function here.
For probabilistic models this is the negative log_likelihood For probabilistic models this is the negative log_likelihood
(including the MAP prior), so we return it here. If your model is not (including the MAP prior), so we return it here. If your model is not
probabilistic, just return your objective to minimize here! probabilistic, just return your objective to minimize here!
""" """
return -float(self.log_likelihood()) - self.log_prior() return -float(self.log_likelihood()) - self.log_prior()
@ -131,18 +131,18 @@ class Model(Parameterized):
def objective_function_gradients(self): def objective_function_gradients(self):
""" """
The gradients for the objective function for the given algorithm. The gradients for the objective function for the given algorithm.
The gradients are w.r.t. the *negative* objective function, as The gradients are w.r.t. the *negative* objective function, as
this framework works with *negative* log-likelihoods as a default. this framework works with *negative* log-likelihoods as a default.
You can find the gradient for the parameters in self.gradient at all times. You can find the gradient for the parameters in self.gradient at all times.
This is the place, where gradients get stored for parameters. This is the place, where gradients get stored for parameters.
This function is the true objective, which wants to be minimized. This function is the true objective, which wants to be minimized.
Note that all parameters are already set and in place, so you just need Note that all parameters are already set and in place, so you just need
to return the gradient here. to return the gradient here.
For probabilistic models this is the gradient of the negative log_likelihood For probabilistic models this is the gradient of the negative log_likelihood
(including the MAP prior), so we return it here. If your model is not (including the MAP prior), so we return it here. If your model is not
probabilistic, just return your *negative* gradient here! probabilistic, just return your *negative* gradient here!
""" """
return -(self._log_likelihood_gradients() + self._log_prior_gradients()) return -(self._log_likelihood_gradients() + self._log_prior_gradients())
@ -225,14 +225,18 @@ class Model(Parameterized):
if self.size == 0: if self.size == 0:
raise RuntimeError, "Model without parameters cannot be optimized" raise RuntimeError, "Model without parameters cannot be optimized"
if optimizer is None:
optimizer = self.preferred_optimizer
if start == None: if start == None:
start = self.optimizer_array start = self.optimizer_array
optimizer = optimization.get_optimizer(optimizer) if optimizer is None:
opt = optimizer(start, model=self, **kwargs) optimizer = self.preferred_optimizer
if isinstance(optimizer, optimization.Optimizer):
opt = optimizer
opt.model = self
else:
optimizer = optimization.get_optimizer(optimizer)
opt = optimizer(start, model=self, **kwargs)
opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads) opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads)
@ -249,7 +253,7 @@ class Model(Parameterized):
def _checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3): def _checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
""" """
Check the gradient of the ,odel by comparing to a numerical Check the gradient of the ,odel by comparing to a numerical
estimate. If the verbose flag is passed, invividual estimate. If the verbose flag is passed, individual
components are tested (and printed) components are tested (and printed)
:param verbose: If True, print a "full" checking of each parameter :param verbose: If True, print a "full" checking of each parameter

View file

@ -33,7 +33,7 @@ class ObsAr(np.ndarray, Pickleable, Observable):
def _setup_observers(self): def _setup_observers(self):
# do not setup anything, as observable arrays do not have default observers # do not setup anything, as observable arrays do not have default observers
pass pass
def copy(self): def copy(self):
from lists_and_dicts import ObserverList from lists_and_dicts import ObserverList
memo = {} memo = {}

View file

@ -751,8 +751,6 @@ class OptimizationHandlable(Indexable):
Transform the gradients by multiplying the gradient factor for each Transform the gradients by multiplying the gradient factor for each
constraint to it. constraint to it.
""" """
if self.has_parent():
return g
[np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__] [np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
if self._has_fixes(): return g[self._fixes_] if self._has_fixes(): return g[self._fixes_]
return g return g
@ -793,7 +791,7 @@ class OptimizationHandlable(Indexable):
#=========================================================================== #===========================================================================
# Randomizeable # Randomizeable
#=========================================================================== #===========================================================================
def randomize(self, rand_gen=np.random.normal, loc=0, scale=1, *args, **kwargs): def randomize(self, rand_gen=np.random.normal, *args, **kwargs):
""" """
Randomize the model. Randomize the model.
Make this draw from the prior if one exists, else draw from given random generator Make this draw from the prior if one exists, else draw from given random generator
@ -804,7 +802,7 @@ class OptimizationHandlable(Indexable):
:param args, kwargs: will be passed through to random number generator :param args, kwargs: will be passed through to random number generator
""" """
# first take care of all parameters (from N(0,1)) # first take care of all parameters (from N(0,1))
x = rand_gen(loc=loc, scale=scale, size=self._size_transformed(), *args, **kwargs) x = rand_gen(size=self._size_transformed(), *args, **kwargs)
# now draw from prior where possible # now draw from prior where possible
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None] [np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
self.optimizer_array = x # makes sure all of the tied parameters get the same init (since there's only one prior object...) self.optimizer_array = x # makes sure all of the tied parameters get the same init (since there's only one prior object...)
@ -835,6 +833,11 @@ class OptimizationHandlable(Indexable):
1.) connect param_array of children to self.param_array 1.) connect param_array of children to self.param_array
2.) tell all children to propagate further 2.) tell all children to propagate further
""" """
if self.param_array.size != self.size:
self._param_array_ = np.empty(self.size, dtype=np.float64)
if self.gradient.size != self.size:
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
pi_old_size = 0 pi_old_size = 0
for pi in self.parameters: for pi in self.parameters:
pislice = slice(pi_old_size, pi_old_size + pi.size) pislice = slice(pi_old_size, pi_old_size + pi.size)
@ -848,6 +851,9 @@ class OptimizationHandlable(Indexable):
pi._propagate_param_grad(parray[pislice], garray[pislice]) pi._propagate_param_grad(parray[pislice], garray[pislice])
pi_old_size += pi.size pi_old_size += pi.size
def _connect_parameters(self):
pass
class Parameterizable(OptimizationHandlable): class Parameterizable(OptimizationHandlable):
""" """
A parameterisable class. A parameterisable class.
@ -874,6 +880,9 @@ class Parameterizable(OptimizationHandlable):
""" """
Array representing the parameters of this class. Array representing the parameters of this class.
There is only one copy of all parameters in memory, two during optimization. There is only one copy of all parameters in memory, two during optimization.
!WARNING!: setting the parameter array MUST always be done in memory:
m.param_array[:] = m_copy.param_array
""" """
if self.__dict__.get('_param_array_', None) is None: if self.__dict__.get('_param_array_', None) is None:
self._param_array_ = np.empty(self.size, dtype=np.float64) self._param_array_ = np.empty(self.size, dtype=np.float64)
@ -986,6 +995,11 @@ class Parameterizable(OptimizationHandlable):
# notification system # notification system
#=========================================================================== #===========================================================================
def _parameters_changed_notification(self, me, which=None): def _parameters_changed_notification(self, me, which=None):
"""
In parameterizable we just need to make sure, that the next call to optimizer_array
will update the optimizer_array to the latest parameters
"""
self._optimizer_copy_transformed = False # tells the optimizer array to update on next request
self.parameters_changed() self.parameters_changed()
def _pass_through_notify_observers(self, me, which=None): def _pass_through_notify_observers(self, me, which=None):
self.notify_observers(which=which) self.notify_observers(which=which)
@ -1017,4 +1031,3 @@ class Parameterizable(OptimizationHandlable):
updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer`` updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer``
""" """
pass pass

View file

@ -8,11 +8,23 @@ from re import compile, _pattern_type
from param import ParamConcatenation from param import ParamConcatenation
from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
import logging
logger = logging.getLogger("parameters changed meta")
class ParametersChangedMeta(type): class ParametersChangedMeta(type):
def __call__(self, *args, **kw): def __call__(self, *args, **kw):
instance = super(ParametersChangedMeta, self).__call__(*args, **kw) self._in_init_ = True
instance.parameters_changed() #import ipdb;ipdb.set_trace()
return instance self = super(ParametersChangedMeta, self).__call__(*args, **kw)
logger.debug("finished init")
self._in_init_ = False
logger.debug("connecting parameters")
self._highest_parent_._connect_parameters()
self._highest_parent_._notify_parent_change()
self._highest_parent_._connect_fixes()
logger.debug("calling parameters changed")
self.parameters_changed()
return self
class Parameterized(Parameterizable): class Parameterized(Parameterizable):
""" """
@ -57,21 +69,19 @@ class Parameterized(Parameterizable):
and concatenate them. Printing m[''] will result in printing of all parameters in detail. and concatenate them. Printing m[''] will result in printing of all parameters in detail.
""" """
#=========================================================================== #===========================================================================
# Metaclass for parameters changed after init. # Metaclass for parameters changed after init.
# This makes sure, that parameters changed will always be called after __init__ # This makes sure, that parameters changed will always be called after __init__
# **Never** call parameters_changed() yourself # **Never** call parameters_changed() yourself
__metaclass__ = ParametersChangedMeta __metaclass__ = ParametersChangedMeta
#=========================================================================== #===========================================================================
def __init__(self, name=None, parameters=[], *a, **kw): def __init__(self, name=None, parameters=[], *a, **kw):
super(Parameterized, self).__init__(name=name, *a, **kw) super(Parameterized, self).__init__(name=name, *a, **kw)
self._in_init_ = True
self.size = sum(p.size for p in self.parameters) self.size = sum(p.size for p in self.parameters)
self.add_observer(self, self._parameters_changed_notification, -100) self.add_observer(self, self._parameters_changed_notification, -100)
if not self._has_fixes(): if not self._has_fixes():
self._fixes_ = None self._fixes_ = None
self._param_slices_ = [] self._param_slices_ = []
self._connect_parameters() #self._connect_parameters()
del self._in_init_
self.add_parameters(*parameters) self.add_parameters(*parameters)
def build_pydot(self, G=None): def build_pydot(self, G=None):
@ -125,6 +135,9 @@ class Parameterized(Parameterizable):
param._parent_.remove_parameter(param) param._parent_.remove_parameter(param)
# make sure the size is set # make sure the size is set
if index is None: if index is None:
start = sum(p.size for p in self.parameters)
self.constraints.shift_right(start, param.size)
self.priors.shift_right(start, param.size)
self.constraints.update(param.constraints, self.size) self.constraints.update(param.constraints, self.size)
self.priors.update(param.priors, self.size) self.priors.update(param.priors, self.size)
self.parameters.append(param) self.parameters.append(param)
@ -143,14 +156,16 @@ class Parameterized(Parameterizable):
parent.size += param.size parent.size += param.size
parent = parent._parent_ parent = parent._parent_
self._connect_parameters() if not self._in_init_:
self._connect_parameters()
self._notify_parent_change()
self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names) self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names)
self._highest_parent_._notify_parent_change() self._highest_parent_._notify_parent_change()
self._highest_parent_._connect_fixes() self._highest_parent_._connect_fixes()
else: else:
raise HierarchyError, """Parameter exists already and no copy made""" raise HierarchyError, """Parameter exists already, try making a copy"""
def add_parameters(self, *parameters): def add_parameters(self, *parameters):
@ -198,26 +213,28 @@ class Parameterized(Parameterizable):
# no parameters for this class # no parameters for this class
return return
if self.param_array.size != self.size: if self.param_array.size != self.size:
self.param_array = np.empty(self.size, dtype=np.float64) self._param_array_ = np.empty(self.size, dtype=np.float64)
if self.gradient.size != self.size: if self.gradient.size != self.size:
self._gradient_array_ = np.empty(self.size, dtype=np.float64) self._gradient_array_ = np.empty(self.size, dtype=np.float64)
old_size = 0 old_size = 0
self._param_slices_ = [] self._param_slices_ = []
for i, p in enumerate(self.parameters): for i, p in enumerate(self.parameters):
if not p.param_array.flags['C_CONTIGUOUS']:
raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS"
p._parent_ = self p._parent_ = self
p._parent_index_ = i p._parent_index_ = i
pslice = slice(old_size, old_size + p.size) pslice = slice(old_size, old_size + p.size)
# first connect all children # first connect all children
p._propagate_param_grad(self.param_array[pslice], self.gradient_full[pslice]) p._propagate_param_grad(self.param_array[pslice], self.gradient_full[pslice])
# then connect children to self # then connect children to self
self.param_array[pslice] = p.param_array.flat # , requirements=['C', 'W']).ravel(order='C') self.param_array[pslice] = p.param_array.flat # , requirements=['C', 'W']).ravel(order='C')
self.gradient_full[pslice] = p.gradient_full.flat # , requirements=['C', 'W']).ravel(order='C') self.gradient_full[pslice] = p.gradient_full.flat # , requirements=['C', 'W']).ravel(order='C')
if not p.param_array.flags['C_CONTIGUOUS']:
raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS"
p.param_array.data = self.param_array[pslice].data p.param_array.data = self.param_array[pslice].data
p.gradient_full.data = self.gradient_full[pslice].data p.gradient_full.data = self.gradient_full[pslice].data
@ -332,7 +349,7 @@ class Parameterized(Parameterizable):
def __str__(self, header=True): def __str__(self, header=True):
name = adjust_name_for_printing(self.name) + "." name = adjust_name_for_printing(self.name) + "."
constrs = self._constraints_str; constrs = self._constraints_str;
ts = self._ties_str ts = self._ties_str
prirs = self._priors_str prirs = self._priors_str
desc = self._description_str; names = self.parameter_names() desc = self._description_str; names = self.parameter_names()

View file

@ -76,11 +76,11 @@ class Uniform(Prior):
o = super(Prior, cls).__new__(cls, lower, upper) o = super(Prior, cls).__new__(cls, lower, upper)
cls._instances.append(weakref.ref(o)) cls._instances.append(weakref.ref(o))
return cls._instances[-1]() return cls._instances[-1]()
def __init__(self, lower, upper): def __init__(self, lower, upper):
self.lower = float(lower) self.lower = float(lower)
self.upper = float(upper) self.upper = float(upper)
def __str__(self): def __str__(self):
return "[" + str(np.round(self.lower)) + ', ' + str(np.round(self.upper)) + ']' return "[" + str(np.round(self.lower)) + ', ' + str(np.round(self.upper)) + ']'
@ -93,7 +93,7 @@ class Uniform(Prior):
def rvs(self, n): def rvs(self, n):
return np.random.uniform(self.lower, self.upper, size=n) return np.random.uniform(self.lower, self.upper, size=n)
class LogGaussian(Prior): class LogGaussian(Prior):
""" """
Implementation of the univariate *log*-Gaussian probability function, coupled with random variables. Implementation of the univariate *log*-Gaussian probability function, coupled with random variables.
@ -246,7 +246,7 @@ class Gamma(Prior):
""" """
Creates an instance of a Gamma Prior by specifying the Expected value(s) Creates an instance of a Gamma Prior by specifying the Expected value(s)
and Variance(s) of the distribution. and Variance(s) of the distribution.
:param E: expected value :param E: expected value
:param V: variance :param V: variance
""" """

View file

@ -8,6 +8,9 @@ from ..inference.latent_function_inference import var_dtc
from .. import likelihoods from .. import likelihoods
from parameterization.variational import VariationalPosterior from parameterization.variational import VariationalPosterior
import logging
logger = logging.getLogger("sparse gp")
class SparseGP(GP): class SparseGP(GP):
""" """
A general purpose Sparse GP model A general purpose Sparse GP model
@ -46,7 +49,7 @@ class SparseGP(GP):
self.num_inducing = Z.shape[0] self.num_inducing = Z.shape[0]
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata) GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata)
logger.info("Adding Z as parameter")
self.add_parameter(self.Z, index=0) self.add_parameter(self.Z, index=0)
def has_uncertain_inputs(self): def has_uncertain_inputs(self):
@ -57,19 +60,23 @@ class SparseGP(GP):
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL']) self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
if isinstance(self.X, VariationalPosterior): if isinstance(self.X, VariationalPosterior):
#gradients wrt kernel #gradients wrt kernel
dL_dKmm = self.grad_dict.pop('dL_dKmm') dL_dKmm = self.grad_dict['dL_dKmm']
self.kern.update_gradients_full(dL_dKmm, self.Z, None) self.kern.update_gradients_full(dL_dKmm, self.Z, None)
target = self.kern.gradient.copy() target = self.kern.gradient.copy()
self.kern.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2']) self.kern.update_gradients_expectations(variational_posterior=self.X,
Z=self.Z,
dL_dpsi0=self.grad_dict['dL_dpsi0'],
dL_dpsi1=self.grad_dict['dL_dpsi1'],
dL_dpsi2=self.grad_dict['dL_dpsi2'])
self.kern.gradient += target self.kern.gradient += target
#gradients wrt Z #gradients wrt Z
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z) self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
self.Z.gradient += self.kern.gradients_Z_expectations( self.Z.gradient += self.kern.gradients_Z_expectations(
self.grad_dict['dL_dpsi0'], self.grad_dict['dL_dpsi0'],
self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi1'],
self.grad_dict['dL_dpsi2'], self.grad_dict['dL_dpsi2'],
Z=self.Z, Z=self.Z,
variational_posterior=self.X) variational_posterior=self.X)
else: else:
#gradients wrt kernel #gradients wrt kernel

View file

@ -37,7 +37,7 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan
# k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True) # k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
p = .3 p = .3
m = GPy.models.BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m = GPy.models.BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
if nan: if nan:
@ -144,7 +144,7 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=25, Q=4
m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel) m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel)
m.data_colors = c m.data_colors = c
m.data_t = t m.data_t = t
if optimize: if optimize:
m.optimize('bfgs', messages=verbose, max_iters=2e3) m.optimize('bfgs', messages=verbose, max_iters=2e3)
@ -169,7 +169,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
Y = data['X'][:N] Y = data['X'][:N]
m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k) m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k)
m.data_labels = data['Y'][:N].argmax(axis=1) m.data_labels = data['Y'][:N].argmax(axis=1)
if optimize: if optimize:
m.optimize('bfgs', messages=verbose, max_iters=max_iters, gtol=.05) m.optimize('bfgs', messages=verbose, max_iters=max_iters, gtol=.05)
@ -296,15 +296,16 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
from GPy.models import BayesianGPLVM from GPy.models import BayesianGPLVM
from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 7, 9 D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
Y = Ylist[0] Y = Ylist[0]
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool) inan = _np.random.binomial(1, .8, size=Y.shape).astype(bool) # 80% missing data
Y[inan] = _np.nan Ymissing = Y.copy()
Ymissing[inan] = _np.nan
m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
inference_method=VarDTCMissingData(inan=inan), kernel=k) inference_method=VarDTCMissingData(inan=inan), kernel=k)
m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape) m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
@ -364,7 +365,7 @@ def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim
for inan in inanlist: for inan in inanlist:
imlist.append(VarDTCMissingData(limit=1, inan=inan)) imlist.append(VarDTCMissingData(limit=1, inan=inan))
m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing,
kernel=k, inference_method=imlist, kernel=k, inference_method=imlist,
initx="random", initz='permute', **kw) initx="random", initz='permute', **kw)
@ -410,11 +411,11 @@ def olivetti_faces(optimize=True, verbose=True, plot=True):
Yn /= Yn.std() Yn /= Yn.std()
m = GPy.models.BayesianGPLVM(Yn, Q, num_inducing=20) m = GPy.models.BayesianGPLVM(Yn, Q, num_inducing=20)
if optimize: m.optimize('bfgs', messages=verbose, max_iters=1000) if optimize: m.optimize('bfgs', messages=verbose, max_iters=1000)
if plot: if plot:
ax = m.plot_latent(which_indices=(0, 1)) ax = m.plot_latent(which_indices=(0, 1))
y = m.likelihood.Y[0, :] y = m.Y[0, :]
data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False) data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False)
lvm = GPy.plotting.matplot_dep.visualize.lvm(m.X.mean[0, :].copy(), m, data_show, ax) lvm = GPy.plotting.matplot_dep.visualize.lvm(m.X.mean[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
@ -514,7 +515,7 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
data = GPy.util.datasets.osu_run1() data = GPy.util.datasets.osu_run1()
Q = 6 Q = 6
kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True) kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True)
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel) m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
m.data = data m.data = data
@ -566,7 +567,7 @@ def ssgplvm_simulation_linear():
import GPy import GPy
N, D, Q = 1000, 20, 5 N, D, Q = 1000, 20, 5
pi = 0.2 pi = 0.2
def sample_X(Q, pi): def sample_X(Q, pi):
x = np.empty(Q) x = np.empty(Q)
dies = np.random.rand(Q) dies = np.random.rand(Q)
@ -576,7 +577,7 @@ def ssgplvm_simulation_linear():
else: else:
x[q] = 0. x[q] = 0.
return x return x
Y = np.empty((N,D)) Y = np.empty((N,D))
X = np.empty((N,Q)) X = np.empty((N,Q))
# Generate data from random sampled weight matrices # Generate data from random sampled weight matrices
@ -584,4 +585,4 @@ def ssgplvm_simulation_linear():
X[n] = sample_X(Q,pi) X[n] = sample_X(Q,pi)
w = np.random.randn(D,Q) w = np.random.randn(D,Q)
Y[n] = np.dot(w,X[n]) Y[n] = np.dot(w,X[n])

View file

@ -9,6 +9,8 @@ import numpy as np
from ...util.misc import param_to_array from ...util.misc import param_to_array
from . import LatentFunctionInference from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
import logging, itertools
logger = logging.getLogger('vardtc')
class VarDTC(LatentFunctionInference): class VarDTC(LatentFunctionInference):
""" """
@ -36,11 +38,11 @@ class VarDTC(LatentFunctionInference):
return param_to_array(np.sum(np.square(Y))) return param_to_array(np.sum(np.square(Y)))
def __getstate__(self): def __getstate__(self):
# has to be overridden, as Cacher objects cannot be pickled. # has to be overridden, as Cacher objects cannot be pickled.
return self.limit return self.limit
def __setstate__(self, state): def __setstate__(self, state):
# has to be overridden, as Cacher objects cannot be pickled. # has to be overridden, as Cacher objects cannot be pickled.
self.limit = state self.limit = state
from ...util.caching import Cacher from ...util.caching import Cacher
self.get_trYYT = Cacher(self._get_trYYT, self.limit) self.get_trYYT = Cacher(self._get_trYYT, self.limit)
@ -192,22 +194,23 @@ class VarDTC(LatentFunctionInference):
return post, log_marginal, grad_dict return post, log_marginal, grad_dict
class VarDTCMissingData(LatentFunctionInference): class VarDTCMissingData(LatentFunctionInference):
const_jitter = 1e-6 const_jitter = 1e-10
def __init__(self, limit=1, inan=None): def __init__(self, limit=1, inan=None):
from ...util.caching import Cacher from ...util.caching import Cacher
self._Y = Cacher(self._subarray_computations, limit) self._Y = Cacher(self._subarray_computations, limit)
self._inan = inan if inan is not None: self._inan = ~inan
else: self._inan = None
pass pass
def set_limit(self, limit): def set_limit(self, limit):
self._Y.limit = limit self._Y.limit = limit
def __getstate__(self): def __getstate__(self):
# has to be overridden, as Cacher objects cannot be pickled. # has to be overridden, as Cacher objects cannot be pickled.
return self._Y.limit, self._inan return self._Y.limit, self._inan
def __setstate__(self, state): def __setstate__(self, state):
# has to be overridden, as Cacher objects cannot be pickled. # has to be overridden, as Cacher objects cannot be pickled.
from ...util.caching import Cacher from ...util.caching import Cacher
self.limit = state[0] self.limit = state[0]
self._inan = state[1] self._inan = state[1]
@ -217,21 +220,35 @@ class VarDTCMissingData(LatentFunctionInference):
if self._inan is None: if self._inan is None:
inan = np.isnan(Y) inan = np.isnan(Y)
has_none = inan.any() has_none = inan.any()
self._inan = ~inan
else: else:
inan = self._inan inan = self._inan
has_none = True has_none = True
if has_none: if has_none:
from ...util.subarray_and_sorting import common_subarrays #print "caching missing data slices, this can take several minutes depending on the number of unique dimensions of the data..."
self._subarray_indices = [] #csa = common_subarrays(inan, 1)
for v,ind in common_subarrays(inan, 1).iteritems(): size = Y.shape[1]
if not np.all(v): #logger.info('preparing subarrays {:3.3%}'.format((i+1.)/size))
v = ~np.array(v, dtype=bool) Ys = []
ind = np.array(ind, dtype=int) next_ten = [0.]
if ind.size == Y.shape[1]: count = itertools.count()
ind = slice(None) for v, y in itertools.izip(inan.T, Y.T[:,:,None]):
self._subarray_indices.append([v,ind]) i = count.next()
Ys = [Y[v, :][:, ind] for v, ind in self._subarray_indices] if ((i+1.)/size) >= next_ten[0]:
traces = [(y**2).sum() for y in Ys] logger.info('preparing subarrays {:>6.1%}'.format((i+1.)/size))
next_ten[0] += .1
Ys.append(y[v,:])
next_ten = [0.]
count = itertools.count()
def trace(y):
i = count.next()
if ((i+1.)/size) >= next_ten[0]:
logger.info('preparing traces {:>6.1%}'.format((i+1.)/size))
next_ten[0] += .1
y = y[inan[:,i],i:i+1]
return np.einsum('ij,ij->', y,y)
traces = [trace(Y) for _ in xrange(size)]
return Ys, traces return Ys, traces
else: else:
self._subarray_indices = [[slice(None),slice(None)]] self._subarray_indices = [[slice(None),slice(None)]]
@ -253,7 +270,6 @@ class VarDTCMissingData(LatentFunctionInference):
beta_all = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6) beta_all = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
het_noise = beta_all.size != 1 het_noise = beta_all.size != 1
import itertools
num_inducing = Z.shape[0] num_inducing = Z.shape[0]
dL_dpsi0_all = np.zeros(Y.shape[0]) dL_dpsi0_all = np.zeros(Y.shape[0])
@ -273,22 +289,17 @@ class VarDTCMissingData(LatentFunctionInference):
Lm = jitchol(Kmm) Lm = jitchol(Kmm)
if uncertain_inputs: LmInv = dtrtri(Lm) if uncertain_inputs: LmInv = dtrtri(Lm)
VVT_factor_all = np.empty(Y.shape) size = Y.shape[1]
full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1] next_ten = 0
if not full_VVT_factor: for i, [y, v, trYYT] in enumerate(itertools.izip(Ys, self._inan.T, traces)):
psi1V = np.dot(Y.T*beta_all, psi1_all).T if ((i+1.)/size) >= next_ten:
logger.info('inference {:> 6.1%}'.format((i+1.)/size))
for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices): next_ten += .1
if het_noise: beta = beta_all[ind] if het_noise: beta = beta_all[i]
else: beta = beta_all else: beta = beta_all
VVT_factor = (beta*y) VVT_factor = (y*beta)
try: output_dim = 1#len(ind)
VVT_factor_all[v, ind].flat = VVT_factor.flat
except ValueError:
mult = np.ravel_multi_index((v.nonzero()[0][:,None],ind[None,:]), VVT_factor_all.shape)
VVT_factor_all.flat[mult] = VVT_factor
output_dim = y.shape[1]
psi0 = psi0_all[v] psi0 = psi0_all[v]
psi1 = psi1_all[v, :] psi1 = psi1_all[v, :]
@ -330,7 +341,6 @@ class VarDTCMissingData(LatentFunctionInference):
VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
psi1, het_noise, uncertain_inputs) psi1, het_noise, uncertain_inputs)
#import ipdb;ipdb.set_trace()
dL_dpsi0_all[v] += dL_dpsi0 dL_dpsi0_all[v] += dL_dpsi0
dL_dpsi1_all[v, :] += dL_dpsi1 dL_dpsi1_all[v, :] += dL_dpsi1
if uncertain_inputs: if uncertain_inputs:
@ -347,19 +357,20 @@ class VarDTCMissingData(LatentFunctionInference):
psi0, psi1, beta, psi0, psi1, beta,
data_fit, num_data, output_dim, trYYT, Y) data_fit, num_data, output_dim, trYYT, Y)
if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf #if full_VVT_factor:
else: woodbury_vector[:, i:i+1] = Cpsi1Vf
print 'foobar' #else:
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) # print 'foobar'
tmp, _ = dpotrs(LB, tmp, lower=1) # tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
woodbury_vector[:, ind] = dtrtrs(Lm, tmp, lower=1, trans=1)[0] # tmp, _ = dpotrs(LB, tmp, lower=1)
# woodbury_vector[:, ind] = dtrtrs(Lm, tmp, lower=1, trans=1)[0]
#import ipdb;ipdb.set_trace() #import ipdb;ipdb.set_trace()
Bi, _ = dpotri(LB, lower=1) Bi, _ = dpotri(LB, lower=1)
symmetrify(Bi) symmetrify(Bi)
Bi = -dpotri(LB, lower=1)[0] Bi = -dpotri(LB, lower=1)[0]
diag.add(Bi, 1) diag.add(Bi, 1)
woodbury_inv_all[:, :, ind] = backsub_both_sides(Lm, Bi)[:,:,None] woodbury_inv_all[:, :, i:i+1] = backsub_both_sides(Lm, Bi)[:,:,None]
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR) dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
@ -376,23 +387,6 @@ class VarDTCMissingData(LatentFunctionInference):
'dL_dKnm':dL_dpsi1_all, 'dL_dKnm':dL_dpsi1_all,
'dL_dthetaL':dL_dthetaL} 'dL_dthetaL':dL_dthetaL}
#get sufficient things for posterior prediction
#TODO: do we really want to do this in the loop?
#if not full_VVT_factor:
# print 'foobar'
# psi1V = np.dot(Y.T*beta_all, psi1_all).T
# tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
# tmp, _ = dpotrs(LB_all, tmp, lower=1)
# woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
#import ipdb;ipdb.set_trace()
#Bi, _ = dpotri(LB_all, lower=1)
#symmetrify(Bi)
#Bi = -dpotri(LB_all, lower=1)[0]
#from ...util import diag
#diag.add(Bi, 1)
#woodbury_inv = backsub_both_sides(Lm, Bi)
post = Posterior(woodbury_inv=woodbury_inv_all, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) post = Posterior(woodbury_inv=woodbury_inv_all, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
return post, log_marginal, grad_dict return post, log_marginal, grad_dict

View file

@ -22,21 +22,21 @@ class VarDTC_minibatch(LatentFunctionInference):
""" """
const_jitter = 1e-6 const_jitter = 1e-6
def __init__(self, batchsize, limit=1): def __init__(self, batchsize, limit=1):
self.batchsize = batchsize self.batchsize = batchsize
# Cache functions # Cache functions
from ...util.caching import Cacher from ...util.caching import Cacher
self.get_trYYT = Cacher(self._get_trYYT, limit) self.get_trYYT = Cacher(self._get_trYYT, limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, limit) self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
self.midRes = {} self.midRes = {}
self.batch_pos = 0 # the starting position of the current mini-batch self.batch_pos = 0 # the starting position of the current mini-batch
def set_limit(self, limit): def set_limit(self, limit):
self.get_trYYT.limit = limit self.get_trYYT.limit = limit
self.get_YYTfactor.limit = limit self.get_YYTfactor.limit = limit
def _get_trYYT(self, Y): def _get_trYYT(self, Y):
return param_to_array(np.sum(np.square(Y))) return param_to_array(np.sum(np.square(Y)))
@ -51,23 +51,23 @@ class VarDTC_minibatch(LatentFunctionInference):
return param_to_array(Y) return param_to_array(Y)
else: else:
return jitchol(tdot(Y)) return jitchol(tdot(Y))
def inference_likelihood(self, kern, X, Z, likelihood, Y): def inference_likelihood(self, kern, X, Z, likelihood, Y):
""" """
The first phase of inference: The first phase of inference:
Compute: log-likelihood, dL_dKmm Compute: log-likelihood, dL_dKmm
Cached intermediate results: Kmm, KmmInv, Cached intermediate results: Kmm, KmmInv,
""" """
num_inducing = Z.shape[0] num_inducing = Z.shape[0]
num_data, output_dim = Y.shape num_data, output_dim = Y.shape
if isinstance(X, VariationalPosterior): if isinstance(X, VariationalPosterior):
uncertain_inputs = True uncertain_inputs = True
else: else:
uncertain_inputs = False uncertain_inputs = False
#see whether we've got a different noise variance for each datum #see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6) beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1 het_noise = beta.size > 1
@ -77,19 +77,19 @@ class VarDTC_minibatch(LatentFunctionInference):
#self.YYTfactor = beta*self.get_YYTfactor(Y) #self.YYTfactor = beta*self.get_YYTfactor(Y)
YYT_factor = Y YYT_factor = Y
trYYT = self.get_trYYT(Y) trYYT = self.get_trYYT(Y)
psi2_full = np.zeros((num_inducing,num_inducing)) psi2_full = np.zeros((num_inducing,num_inducing))
psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM
psi0_full = 0 psi0_full = 0
YRY_full = 0 YRY_full = 0
for n_start in xrange(0,num_data,self.batchsize): for n_start in xrange(0,num_data,self.batchsize):
n_end = min(self.batchsize+n_start, num_data) n_end = min(self.batchsize+n_start, num_data)
Y_slice = YYT_factor[n_start:n_end] Y_slice = YYT_factor[n_start:n_end]
X_slice = X[n_start:n_end] X_slice = X[n_start:n_end]
if uncertain_inputs: if uncertain_inputs:
psi0 = kern.psi0(Z, X_slice) psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice) psi1 = kern.psi1(Z, X_slice)
@ -98,7 +98,7 @@ class VarDTC_minibatch(LatentFunctionInference):
psi0 = kern.Kdiag(X_slice) psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z) psi1 = kern.K(X_slice, Z)
psi2 = None psi2 = None
if het_noise: if het_noise:
beta_slice = beta[n_start:n_end] beta_slice = beta[n_start:n_end]
psi0_full += (beta_slice*psi0).sum() psi0_full += (beta_slice*psi0).sum()
@ -106,33 +106,33 @@ class VarDTC_minibatch(LatentFunctionInference):
YRY_full += (beta_slice*np.square(Y_slice).sum(axis=-1)).sum() YRY_full += (beta_slice*np.square(Y_slice).sum(axis=-1)).sum()
else: else:
psi0_full += psi0.sum() psi0_full += psi0.sum()
psi1Y_full += np.dot(Y_slice.T,psi1) # DxM psi1Y_full += np.dot(Y_slice.T,psi1) # DxM
if uncertain_inputs: if uncertain_inputs:
if het_noise: if het_noise:
psi2_full += beta_slice*psi2 psi2_full += beta_slice*psi2
else: else:
psi2_full += psi2 psi2_full += psi2.sum(0)
else: else:
if het_noise: if het_noise:
psi2_full += beta_slice*np.outer(psi1,psi1) psi2_full += beta_slice*np.outer(psi1,psi1)
else: else:
psi2_full += np.outer(psi1,psi1) psi2_full += np.einsum('nm,jk->mk',psi1,psi1)
if not het_noise: if not het_noise:
psi0_full *= beta psi0_full *= beta
psi1Y_full *= beta psi1Y_full *= beta
psi2_full *= beta psi2_full *= beta
YRY_full = trYYT*beta YRY_full = trYYT*beta
#====================================================================== #======================================================================
# Compute Common Components # Compute Common Components
#====================================================================== #======================================================================
self.psi1Y = psi1Y_full
Kmm = kern.K(Z).copy() Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter) diag.add(Kmm, self.const_jitter)
Lm = jitchol(Kmm) Lm = jitchol(Kmm)
Lambda = Kmm+psi2_full Lambda = Kmm+psi2_full
LL = jitchol(Lambda) LL = jitchol(Lambda)
b,_ = dtrtrs(LL, psi1Y_full.T) b,_ = dtrtrs(LL, psi1Y_full.T)
@ -140,18 +140,18 @@ class VarDTC_minibatch(LatentFunctionInference):
v,_ = dtrtrs(LL.T,b,lower=False) v,_ = dtrtrs(LL.T,b,lower=False)
vvt = np.einsum('md,od->mo',v,v) vvt = np.einsum('md,od->mo',v,v)
LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right') LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
Psi2LLInvT = dtrtrs(LL,psi2_full)[0].T Psi2LLInvT = dtrtrs(LL,psi2_full)[0].T
LmInvPsi2LLInvT= dtrtrs(Lm,Psi2LLInvT)[0] LmInvPsi2LLInvT= dtrtrs(Lm,Psi2LLInvT)[0]
KmmInvPsi2LLInvT = dtrtrs(Lm,LmInvPsi2LLInvT,trans=True)[0] KmmInvPsi2LLInvT = dtrtrs(Lm,LmInvPsi2LLInvT,trans=True)[0]
KmmInvPsi2P = dtrtrs(LL,KmmInvPsi2LLInvT.T, trans=True)[0].T KmmInvPsi2P = dtrtrs(LL,KmmInvPsi2LLInvT.T, trans=True)[0].T
dL_dpsi2R = (output_dim*KmmInvPsi2P - vvt)/2. # dL_dpsi2 with R inside psi2 dL_dpsi2R = (output_dim*KmmInvPsi2P - vvt)/2. # dL_dpsi2 with R inside psi2
# Cache intermediate results # Cache intermediate results
self.midRes['dL_dpsi2R'] = dL_dpsi2R self.midRes['dL_dpsi2R'] = dL_dpsi2R
self.midRes['v'] = v self.midRes['v'] = v
#====================================================================== #======================================================================
# Compute log-likelihood # Compute log-likelihood
#====================================================================== #======================================================================
@ -159,30 +159,33 @@ class VarDTC_minibatch(LatentFunctionInference):
logL_R = -np.log(beta).sum() logL_R = -np.log(beta).sum()
else: else:
logL_R = -num_data*np.log(beta) logL_R = -num_data*np.log(beta)
logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.-output_dim*(-np.log(np.diag(Lm)).sum()+np.log(np.diag(LL)).sum()) logL = (
-(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.
-output_dim*(-np.log(np.diag(Lm)).sum()+np.log(np.diag(LL)).sum())
)
#====================================================================== #======================================================================
# Compute dL_dKmm # Compute dL_dKmm
#====================================================================== #======================================================================
dL_dKmm = -(output_dim*np.einsum('md,od->mo',KmmInvPsi2LLInvT,KmmInvPsi2LLInvT) + vvt)/2. dL_dKmm = -(output_dim*np.einsum('md,od->mo',KmmInvPsi2LLInvT,KmmInvPsi2LLInvT) + vvt)/2.
#====================================================================== #======================================================================
# Compute the Posterior distribution of inducing points p(u|Y) # Compute the Posterior distribution of inducing points p(u|Y)
#====================================================================== #======================================================================
# phi_u_mean = np.dot(Kmm,v) # phi_u_mean = np.dot(Kmm,v)
# LLInvKmm,_ = dtrtrs(LL,Kmm) # LLInvKmm,_ = dtrtrs(LL,Kmm)
# # phi_u_var = np.einsum('ma,mb->ab',LLInvKmm,LLInvKmm) # # phi_u_var = np.einsum('ma,mb->ab',LLInvKmm,LLInvKmm)
# phi_u_var = Kmm - np.dot(LLInvKmm.T,LLInvKmm) # phi_u_var = Kmm - np.dot(LLInvKmm.T,LLInvKmm)
post = Posterior(woodbury_inv=KmmInvPsi2P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm) post = Posterior(woodbury_inv=KmmInvPsi2P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm)
return logL, dL_dKmm, post return logL, dL_dKmm, post
def inference_minibatch(self, kern, X, Z, likelihood, Y): def inference_minibatch(self, kern, X, Z, likelihood, Y):
""" """
The second phase of inference: Computing the derivatives over a minibatch of Y The second phase of inference: Computing the derivatives over a minibatch of Y
Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL
return a flag showing whether it reached the end of Y (isEnd) return a flag showing whether it reached the end of Y (isEnd)
""" """
@ -193,14 +196,14 @@ class VarDTC_minibatch(LatentFunctionInference):
uncertain_inputs = True uncertain_inputs = True
else: else:
uncertain_inputs = False uncertain_inputs = False
#see whether we've got a different noise variance for each datum #see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6) beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1 het_noise = beta.size > 1
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
#self.YYTfactor = beta*self.get_YYTfactor(Y) #self.YYTfactor = beta*self.get_YYTfactor(Y)
YYT_factor = Y YYT_factor = Y
n_start = self.batch_pos n_start = self.batch_pos
n_end = min(self.batchsize+n_start, num_data) n_end = min(self.batchsize+n_start, num_data)
if n_end==num_data: if n_end==num_data:
@ -209,11 +212,11 @@ class VarDTC_minibatch(LatentFunctionInference):
else: else:
isEnd = False isEnd = False
self.batch_pos = n_end self.batch_pos = n_end
num_slice = n_end-n_start num_slice = n_end-n_start
Y_slice = YYT_factor[n_start:n_end] Y_slice = YYT_factor[n_start:n_end]
X_slice = X[n_start:n_end] X_slice = X[n_start:n_end]
if uncertain_inputs: if uncertain_inputs:
psi0 = kern.psi0(Z, X_slice) psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice) psi1 = kern.psi1(Z, X_slice)
@ -222,51 +225,51 @@ class VarDTC_minibatch(LatentFunctionInference):
psi0 = kern.Kdiag(X_slice) psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z) psi1 = kern.K(X_slice, Z)
psi2 = None psi2 = None
if het_noise: if het_noise:
beta = beta[n_start] # assuming batchsize==1 beta = beta[n_start] # assuming batchsize==1
betaY = beta*Y_slice betaY = beta*Y_slice
betapsi1 = np.einsum('n,nm->nm',beta,psi1) betapsi1 = np.einsum('n,nm->nm',beta,psi1)
#====================================================================== #======================================================================
# Load Intermediate Results # Load Intermediate Results
#====================================================================== #======================================================================
dL_dpsi2R = self.midRes['dL_dpsi2R'] dL_dpsi2R = self.midRes['dL_dpsi2R']
v = self.midRes['v'] v = self.midRes['v']
#====================================================================== #======================================================================
# Compute dL_dpsi # Compute dL_dpsi
#====================================================================== #======================================================================
dL_dpsi0 = -0.5 * output_dim * (beta * np.ones((n_end-n_start,))) dL_dpsi0 = -0.5 * output_dim * (beta * np.ones((n_end-n_start,)))
dL_dpsi1 = np.dot(betaY,v.T) dL_dpsi1 = np.dot(betaY,v.T)
if uncertain_inputs: if uncertain_inputs:
dL_dpsi2 = beta* dL_dpsi2R dL_dpsi2 = beta* dL_dpsi2R
else: else:
dL_dpsi1 += np.dot(betapsi1,dL_dpsi2R)*2. dL_dpsi1 += np.dot(betapsi1,dL_dpsi2R)*2.
dL_dpsi2 = None dL_dpsi2 = None
#====================================================================== #======================================================================
# Compute dL_dthetaL # Compute dL_dthetaL
#====================================================================== #======================================================================
if het_noise: if het_noise:
if uncertain_inputs: if uncertain_inputs:
psiR = np.einsum('mo,nmo->n',dL_dpsi2R,psi2) psiR = np.einsum('mo,nmo->',dL_dpsi2R,psi2)
else: else:
psiR = np.einsum('nm,no,mo->n',psi1,psi1,dL_dpsi2R) psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R)
dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0)-output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum(axis=-1) dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0)-output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum(axis=-1)
else: else:
if uncertain_inputs: if uncertain_inputs:
psiR = np.einsum('mo,mo->',dL_dpsi2R,psi2) psiR = np.einsum('mo,nmo->',dL_dpsi2R,psi2)
else: else:
psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R) psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R)
dL_dthetaL = ((np.square(betaY)).sum() + beta*beta*output_dim*(psi0.sum())-num_slice*output_dim*beta)/2. - beta*beta*psiR- (betaY*np.dot(betapsi1,v)).sum() dL_dthetaL = ((np.square(betaY)).sum() + beta*beta*output_dim*(psi0.sum())-num_slice*output_dim*beta)/2. - beta*beta*psiR- (betaY*np.dot(betapsi1,v)).sum()
if uncertain_inputs: if uncertain_inputs:
@ -278,15 +281,15 @@ class VarDTC_minibatch(LatentFunctionInference):
grad_dict = {'dL_dKdiag':dL_dpsi0, grad_dict = {'dL_dKdiag':dL_dpsi0,
'dL_dKnm':dL_dpsi1, 'dL_dKnm':dL_dpsi1,
'dL_dthetaL':dL_dthetaL} 'dL_dthetaL':dL_dthetaL}
return isEnd, (n_start,n_end), grad_dict return isEnd, (n_start,n_end), grad_dict
def update_gradients(model): def update_gradients(model):
model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, model.X, model.Z, model.likelihood, model.Y) model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, model.X, model.Z, model.likelihood, model.Y)
het_noise = model.likelihood.variance.size > 1 het_noise = model.likelihood.variance.size > 1
if het_noise: if het_noise:
dL_dthetaL = np.empty((model.Y.shape[0],)) dL_dthetaL = np.empty((model.Y.shape[0],))
else: else:
@ -295,40 +298,54 @@ def update_gradients(model):
#gradients w.r.t. kernel #gradients w.r.t. kernel
model.kern.update_gradients_full(dL_dKmm, model.Z, None) model.kern.update_gradients_full(dL_dKmm, model.Z, None)
kern_grad = model.kern.gradient.copy() kern_grad = model.kern.gradient.copy()
#gradients w.r.t. Z #gradients w.r.t. Z
model.Z.gradient = model.kern.gradients_X(dL_dKmm, model.Z) model.Z.gradient = model.kern.gradients_X(dL_dKmm, model.Z)
isEnd = False isEnd = False
while not isEnd: while not isEnd:
isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, model.X, model.Z, model.likelihood, model.Y) isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, model.X, model.Z, model.likelihood, model.Y)
if isinstance(model.X, VariationalPosterior): if isinstance(model.X, VariationalPosterior):
X_slice = model.X[n_range[0]:n_range[1]] X_slice = model.X[n_range[0]:n_range[1]]
dL_dpsi1 = grad_dict['dL_dpsi1']#[None, :]
dL_dpsi2 = grad_dict['dL_dpsi2'][None, :, :]
#gradients w.r.t. kernel #gradients w.r.t. kernel
model.kern.update_gradients_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2']) model.kern.update_gradients_expectations(variational_posterior=X_slice,Z=model.Z,dL_dpsi0=grad_dict['dL_dpsi0'],dL_dpsi1=dL_dpsi1,dL_dpsi2=dL_dpsi2)
kern_grad += model.kern.gradient kern_grad += model.kern.gradient
#gradients w.r.t. Z #gradients w.r.t. Z
model.Z.gradient += model.kern.gradients_Z_expectations( model.Z.gradient += model.kern.gradients_Z_expectations(
dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice) dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=dL_dpsi1,
dL_dpsi2=dL_dpsi2,
Z=model.Z, variational_posterior=X_slice)
#gradients w.r.t. posterior parameters of X #gradients w.r.t. posterior parameters of X
X_grad = model.kern.gradients_qX_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2']) X_grad = model.kern.gradients_qX_expectations(
model.set_X_gradients(X_slice, X_grad) variational_posterior=X_slice,
Z=model.Z,
dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=dL_dpsi1,
dL_dpsi2=dL_dpsi2)
model.X.mean[n_range[0]:n_range[1]].gradient = X_grad[0]
model.X.variance[n_range[0]:n_range[1]].gradient = X_grad[1]
if het_noise: if het_noise:
dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL'] dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
else: else:
dL_dthetaL += grad_dict['dL_dthetaL'] dL_dthetaL += grad_dict['dL_dthetaL']
#import ipdb;ipdb.set_trace()
model.grad_dict = grad_dict
if isinstance(model.X, VariationalPosterior):
# Update Log-likelihood
model._log_marginal_likelihood -= model.variational_prior.KL_divergence(model.X)
# update for the KL divergence
model.variational_prior.update_gradients_KL(model.X)
# Set the gradients w.r.t. kernel # Set the gradients w.r.t. kernel
model.kern.gradient = kern_grad model.kern.gradient = kern_grad
# Update Log-likelihood
model._log_marginal_likelihood -= model.variational_prior.KL_divergence(model.X)
# update for the KL divergence
model.variational_prior.update_gradients_KL(model.X)
# dL_dthetaL # dL_dthetaL
model.likelihood.update_gradients(dL_dthetaL) model.likelihood.update_gradients(dL_dthetaL)

View file

@ -56,13 +56,13 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
if gtol is None: if gtol is None:
gtol = 1e-5 gtol = 1e-5
sigma0 = 1.0e-8 sigma0 = 1.0e-7
fold = f(x, *optargs) # Initial function value. fold = f(x, *optargs) # Initial function value.
function_eval = 1 function_eval = 1
fnow = fold fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient. gradnew = gradf(x, *optargs) # Initial gradient.
if any(np.isnan(gradnew)): #if any(np.isnan(gradnew)):
raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value" # raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value"
current_grad = np.dot(gradnew, gradnew) current_grad = np.dot(gradnew, gradnew)
gradold = gradnew.copy() gradold = gradnew.copy()
d = -gradnew # Initial search direction. d = -gradnew # Initial search direction.
@ -168,13 +168,13 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
if Delta < 0.25: if Delta < 0.25:
beta = min(4.0 * beta, betamax) beta = min(4.0 * beta, betamax)
if Delta > 0.75: if Delta > 0.75:
beta = max(0.5 * beta, betamin) beta = max(0.25 * beta, betamin)
# Update search direction using Polak-Ribiere formula, or re-start # Update search direction using Polak-Ribiere formula, or re-start
# in direction of negative gradient after nparams steps. # in direction of negative gradient after nparams steps.
if nsuccess == x.size: if nsuccess == x.size:
d = -gradnew d = -gradnew
# beta = 1. # TODO: betareset!! beta = 1. # This is not in the original paper
nsuccess = 0 nsuccess = 0
elif success: elif success:
Gamma = np.dot(gradold - gradnew, gradnew) / (mu) Gamma = np.dot(gradold - gradnew, gradnew) / (mu)

View file

@ -37,19 +37,21 @@ class BayesianGPLVM(SparseGP):
self.init = init self.init = init
if X_variance is None: if X_variance is None:
self.logger.info("initializing latent space variance ~ uniform(0,.1)")
X_variance = np.random.uniform(0,.1,X.shape) X_variance = np.random.uniform(0,.1,X.shape)
if Z is None: if Z is None:
self.logger.info("initializing inducing inputs")
Z = np.random.permutation(X.copy())[:num_inducing] Z = np.random.permutation(X.copy())[:num_inducing]
assert Z.shape[1] == X.shape[1] assert Z.shape[1] == X.shape[1]
if kernel is None: if kernel is None:
self.logger.info("initializing kernel RBF")
kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) # + kern.white(input_dim) kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) # + kern.white(input_dim)
if likelihood is None: if likelihood is None:
likelihood = Gaussian() likelihood = Gaussian()
self.variational_prior = NormalPrior() self.variational_prior = NormalPrior()
X = NormalPosterior(X, X_variance) X = NormalPosterior(X, X_variance)
@ -65,6 +67,7 @@ class BayesianGPLVM(SparseGP):
inference_method = VarDTC() inference_method = VarDTC()
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
self.logger.info("Adding X as parameter")
self.add_parameter(self.X, index=0) self.add_parameter(self.X, index=0)
def set_X_gradients(self, X, X_grad): def set_X_gradients(self, X, X_grad):
@ -75,7 +78,7 @@ class BayesianGPLVM(SparseGP):
if isinstance(self.inference_method, VarDTC_GPU): if isinstance(self.inference_method, VarDTC_GPU):
update_gradients(self) update_gradients(self)
return return
super(BayesianGPLVM, self).parameters_changed() super(BayesianGPLVM, self).parameters_changed()
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
@ -87,7 +90,7 @@ class BayesianGPLVM(SparseGP):
def plot_latent(self, labels=None, which_indices=None, def plot_latent(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40, resolution=50, ax=None, marker='o', s=40,
fignum=None, plot_inducing=True, legend=True, fignum=None, plot_inducing=True, legend=True,
plot_limits=None, plot_limits=None,
aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}): aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}):
import sys import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported." assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
@ -107,10 +110,10 @@ class BayesianGPLVM(SparseGP):
""" """
N_test = Y.shape[0] N_test = Y.shape[0]
input_dim = self.Z.shape[1] input_dim = self.Z.shape[1]
means = np.zeros((N_test, input_dim)) means = np.zeros((N_test, input_dim))
covars = np.zeros((N_test, input_dim)) covars = np.zeros((N_test, input_dim))
dpsi0 = -0.5 * self.input_dim / self.likelihood.variance dpsi0 = -0.5 * self.input_dim / self.likelihood.variance
dpsi2 = self.grad_dict['dL_dpsi2'][0][None, :, :] # TODO: this may change if we ignore het. likelihoods dpsi2 = self.grad_dict['dL_dpsi2'][0][None, :, :] # TODO: this may change if we ignore het. likelihoods
V = Y/self.likelihood.variance V = Y/self.likelihood.variance
@ -125,7 +128,7 @@ class BayesianGPLVM(SparseGP):
dpsi1 = np.dot(self.posterior.woodbury_vector, V.T) dpsi1 = np.dot(self.posterior.woodbury_vector, V.T)
#start = np.zeros(self.input_dim * 2) #start = np.zeros(self.input_dim * 2)
from scipy.optimize import minimize from scipy.optimize import minimize
@ -139,7 +142,7 @@ class BayesianGPLVM(SparseGP):
X = NormalPosterior(means, covars) X = NormalPosterior(means, covars)
return X return X
def dmu_dX(self, Xnew): def dmu_dX(self, Xnew):
""" """
@ -169,7 +172,7 @@ class BayesianGPLVM(SparseGP):
from ..plotting.matplot_dep import dim_reduction_plots from ..plotting.matplot_dep import dim_reduction_plots
return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs) return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs)
def latent_cost_and_grad(mu_S, input_dim, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2): def latent_cost_and_grad(mu_S, input_dim, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
""" """
@ -187,10 +190,10 @@ def latent_cost_and_grad(mu_S, input_dim, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2)
psi2 = kern.psi2(Z, X) psi2 = kern.psi2(Z, X)
lik = dL_dpsi0 * psi0.sum() + np.einsum('ij,kj->...', dL_dpsi1, psi1) + np.einsum('ijk,lkj->...', dL_dpsi2, psi2) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S) lik = dL_dpsi0 * psi0.sum() + np.einsum('ij,kj->...', dL_dpsi1, psi1) + np.einsum('ijk,lkj->...', dL_dpsi2, psi2) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
dLdmu, dLdS = kern.gradients_qX_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, X) dLdmu, dLdS = kern.gradients_qX_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, X)
dmu = dLdmu - mu dmu = dLdmu - mu
# dS = S0 + S1 + S2 -0.5 + .5/S # dS = S0 + S1 + S2 -0.5 + .5/S
dlnS = S * (dLdS - 0.5) + .5 dlnS = S * (dLdS - 0.5) + .5
return -lik, -np.hstack((dmu.flatten(), dlnS.flatten())) return -lik, -np.hstack((dmu.flatten(), dlnS.flatten()))

View file

@ -30,7 +30,7 @@ def most_significant_input_dimensions(model, which_indices):
def plot_latent(model, labels=None, which_indices=None, def plot_latent(model, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40, resolution=50, ax=None, marker='o', s=40,
fignum=None, plot_inducing=False, legend=True, fignum=None, plot_inducing=False, legend=True,
plot_limits=None, plot_limits=None,
aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}): aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}):
""" """
:param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc) :param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc)
@ -84,6 +84,7 @@ def plot_latent(model, labels=None, which_indices=None,
cmap=pb.cm.binary, **imshow_kwargs) cmap=pb.cm.binary, **imshow_kwargs)
# make sure labels are in order of input: # make sure labels are in order of input:
labels = np.asarray(labels)
ulabels = [] ulabels = []
for lab in labels: for lab in labels:
if not lab in ulabels: if not lab in ulabels:

View file

@ -8,7 +8,7 @@ from base_plots import gpplot, x_frame1D, x_frame2D
from ...util.misc import param_to_array from ...util.misc import param_to_array
from ...models.gp_coregionalized_regression import GPCoregionalizedRegression from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
from scipy import sparse
def plot_fit(model, plot_limits=None, which_data_rows='all', def plot_fit(model, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[], which_data_ycols='all', fixed_inputs=[],
@ -61,11 +61,14 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs(): if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
X = model.X.mean X = model.X.mean
X_variance = param_to_array(model.X.variance) X_variance = model.X.variance
else: else:
X = model.X X = model.X
X, Y = param_to_array(X, model.Y) #X, Y = param_to_array(X, model.Y)
if hasattr(model, 'Z'): Z = param_to_array(model.Z) Y = model.Y
if sparse.issparse(Y): Y = Y.todense().view(np.ndarray)
if hasattr(model, 'Z'): Z = model.Z
#work out what the inputs are for plotting (1D or 2D) #work out what the inputs are for plotting (1D or 2D)
fixed_dims = np.array([i for i,v in fixed_inputs]) fixed_dims = np.array([i for i,v in fixed_inputs])

View file

@ -8,6 +8,7 @@ import GPy
import numpy as np import numpy as np
from GPy.core.parameterization.parameter_core import HierarchyError from GPy.core.parameterization.parameter_core import HierarchyError
from GPy.core.parameterization.observable_array import ObsAr from GPy.core.parameterization.observable_array import ObsAr
from GPy.core.parameterization.transformations import NegativeLogexp
class ArrayCoreTest(unittest.TestCase): class ArrayCoreTest(unittest.TestCase):
def setUp(self): def setUp(self):
@ -38,10 +39,25 @@ class ParameterizedTest(unittest.TestCase):
self.test1.kern = self.rbf+self.white self.test1.kern = self.rbf+self.white
self.test1.add_parameter(self.test1.kern) self.test1.add_parameter(self.test1.kern)
self.test1.add_parameter(self.param, 0) self.test1.add_parameter(self.param, 0)
# print self.test1:
#=============================================================================
# test_model. | Value | Constraint | Prior | Tied to
# param | (25L, 2L) | {0.0,1.0} | |
# add.rbf.variance | 1.0 | 0.0,1.0 +ve | |
# add.rbf.lengthscale | 1.0 | 0.0,1.0 +ve | |
# add.white.variance | 1.0 | 0.0,1.0 +ve | |
#=============================================================================
x = np.linspace(-2,6,4)[:,None] x = np.linspace(-2,6,4)[:,None]
y = np.sin(x) y = np.sin(x)
self.testmodel = GPy.models.GPRegression(x,y) self.testmodel = GPy.models.GPRegression(x,y)
# print self.testmodel:
#=============================================================================
# GP_regression. | Value | Constraint | Prior | Tied to
# rbf.variance | 1.0 | +ve | |
# rbf.lengthscale | 1.0 | +ve | |
# Gaussian_noise.variance | 1.0 | +ve | |
#=============================================================================
def test_add_parameter(self): def test_add_parameter(self):
self.assertEquals(self.rbf._parent_index_, 0) self.assertEquals(self.rbf._parent_index_, 0)
@ -142,8 +158,13 @@ class ParameterizedTest(unittest.TestCase):
self.testmodel.randomize() self.testmodel.randomize()
self.assertEqual(val, self.testmodel.kern.lengthscale) self.assertEqual(val, self.testmodel.kern.lengthscale)
def test_add_parameter_in_hierarchy(self):
from GPy.core import Param
self.test1.kern.rbf.add_parameter(Param("NEW", np.random.rand(2), NegativeLogexp()), 1)
self.assertListEqual(self.test1.constraints[NegativeLogexp()].tolist(), range(self.param.size+1, self.param.size+1 + 2))
self.assertListEqual(self.test1.constraints[GPy.transformations.Logistic(0,1)].tolist(), range(self.param.size))
self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp(0,1)].tolist(), np.r_[50, 53:55].tolist())
def test_regular_expression_misc(self): def test_regular_expression_misc(self):
self.testmodel.kern.lengthscale.fix() self.testmodel.kern.lengthscale.fix()
val = float(self.testmodel.kern.lengthscale) val = float(self.testmodel.kern.lengthscale)
@ -174,4 +195,4 @@ class ParameterizedTest(unittest.TestCase):
if __name__ == "__main__": if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.test_add_parameter'] #import sys;sys.argv = ['', 'Test.test_add_parameter']
unittest.main() unittest.main()

View file

@ -18,13 +18,12 @@ class Cacher(object):
self.operation = operation self.operation = operation
self.order = collections.deque() self.order = collections.deque()
self.cached_inputs = {} # point from cache_ids to a list of [ind_ids], which where used in cache cache_id self.cached_inputs = {} # point from cache_ids to a list of [ind_ids], which where used in cache cache_id
self.logger = logging.getLogger("cache")
#======================================================================= #=======================================================================
# point from each ind_id to [ref(obj), cache_ids] # point from each ind_id to [ref(obj), cache_ids]
# 0: a weak reference to the object itself # 0: a weak reference to the object itself
# 1: the cache_ids in which this ind_id is used (len will be how many times we have seen this ind_id) # 1: the cache_ids in which this ind_id is used (len will be how many times we have seen this ind_id)
self.cached_input_ids = {} self.cached_input_ids = {}
#======================================================================= #=======================================================================
self.cached_outputs = {} # point from cache_ids to outputs self.cached_outputs = {} # point from cache_ids to outputs
@ -36,23 +35,18 @@ class Cacher(object):
def combine_inputs(self, args, kw): def combine_inputs(self, args, kw):
"Combines the args and kw in a unique way, such that ordering of kwargs does not lead to recompute" "Combines the args and kw in a unique way, such that ordering of kwargs does not lead to recompute"
self.logger.debug("combining args and kw")
return args + tuple(c[1] for c in sorted(kw.items(), key=lambda x: x[0])) return args + tuple(c[1] for c in sorted(kw.items(), key=lambda x: x[0]))
def prepare_cache_id(self, combined_args_kw, ignore_args): def prepare_cache_id(self, combined_args_kw, ignore_args):
"get the cacheid (conc. string of argument self.ids in order) ignoring ignore_args" "get the cacheid (conc. string of argument self.ids in order) ignoring ignore_args"
cache_id = "".join(self.id(a) for i, a in enumerate(combined_args_kw) if i not in ignore_args) cache_id = "".join(self.id(a) for i, a in enumerate(combined_args_kw) if i not in ignore_args)
self.logger.debug("cache_id={} was created".format(cache_id))
return cache_id return cache_id
def ensure_cache_length(self, cache_id): def ensure_cache_length(self, cache_id):
"Ensures the cache is within its limits and has one place free" "Ensures the cache is within its limits and has one place free"
self.logger.debug("cache length gets ensured")
if len(self.order) == self.limit: if len(self.order) == self.limit:
self.logger.debug("cache limit of l={} was reached".format(self.limit))
# we have reached the limit, so lets release one element # we have reached the limit, so lets release one element
cache_id = self.order.popleft() cache_id = self.order.popleft()
self.logger.debug("cach_id '{}' gets removed".format(cache_id))
combined_args_kw = self.cached_inputs[cache_id] combined_args_kw = self.cached_inputs[cache_id]
for ind in combined_args_kw: for ind in combined_args_kw:
if ind is not None: if ind is not None:
@ -66,7 +60,6 @@ class Cacher(object):
else: else:
cache_ids.remove(cache_id) cache_ids.remove(cache_id)
self.cached_input_ids[ind_id] = [ref, cache_ids] self.cached_input_ids[ind_id] = [ref, cache_ids]
self.logger.debug("removing caches")
del self.cached_outputs[cache_id] del self.cached_outputs[cache_id]
del self.inputs_changed[cache_id] del self.inputs_changed[cache_id]
del self.cached_inputs[cache_id] del self.cached_inputs[cache_id]
@ -81,10 +74,8 @@ class Cacher(object):
if a is not None: if a is not None:
ind_id = self.id(a) ind_id = self.id(a)
v = self.cached_input_ids.get(ind_id, [weakref.ref(a), []]) v = self.cached_input_ids.get(ind_id, [weakref.ref(a), []])
self.logger.debug("cache_id '{}' gets stored".format(cache_id))
v[1].append(cache_id) v[1].append(cache_id)
if len(v[1]) == 1: if len(v[1]) == 1:
self.logger.debug("adding observer to object {}".format(repr(a)))
a.add_observer(self, self.on_cache_changed) a.add_observer(self, self.on_cache_changed)
self.cached_input_ids[ind_id] = v self.cached_input_ids[ind_id] = v
@ -108,28 +99,21 @@ class Cacher(object):
cache_id = self.prepare_cache_id(inputs, self.ignore_args) cache_id = self.prepare_cache_id(inputs, self.ignore_args)
# 2: if anything is not cachable, we will just return the operation, without caching # 2: if anything is not cachable, we will just return the operation, without caching
if reduce(lambda a, b: a or (not (isinstance(b, Observable) or b is None)), inputs, False): if reduce(lambda a, b: a or (not (isinstance(b, Observable) or b is None)), inputs, False):
self.logger.info("some inputs are not observable: returning without caching")
self.logger.debug(str(map(lambda x: isinstance(x, Observable) or x is None, inputs)))
self.logger.debug(str(map(repr, inputs)))
return self.operation(*args, **kw) return self.operation(*args, **kw)
# 3&4: check whether this cache_id has been cached, then has it changed? # 3&4: check whether this cache_id has been cached, then has it changed?
try: try:
if(self.inputs_changed[cache_id]): if(self.inputs_changed[cache_id]):
self.logger.debug("{} already seen, but inputs changed. refreshing cacher".format(cache_id))
# 4: This happens, when elements have changed for this cache self.id # 4: This happens, when elements have changed for this cache self.id
self.inputs_changed[cache_id] = False self.inputs_changed[cache_id] = False
self.cached_outputs[cache_id] = self.operation(*args, **kw) self.cached_outputs[cache_id] = self.operation(*args, **kw)
except KeyError: except KeyError:
self.logger.info("{} never seen, creating cache entry".format(cache_id))
# 3: This is when we never saw this chache_id: # 3: This is when we never saw this chache_id:
self.ensure_cache_length(cache_id) self.ensure_cache_length(cache_id)
self.add_to_cache(cache_id, inputs, self.operation(*args, **kw)) self.add_to_cache(cache_id, inputs, self.operation(*args, **kw))
except: except:
self.logger.error("an error occurred while trying to run caching for {}, resetting".format(cache_id))
self.reset() self.reset()
raise raise
# 5: We have seen this cache_id and it is cached: # 5: We have seen this cache_id and it is cached:
self.logger.info("returning cache {}".format(cache_id))
return self.cached_outputs[cache_id] return self.cached_outputs[cache_id]
def on_cache_changed(self, direct, which=None): def on_cache_changed(self, direct, which=None):
@ -143,7 +127,6 @@ class Cacher(object):
ind_id = self.id(what) ind_id = self.id(what)
_, cache_ids = self.cached_input_ids.get(ind_id, [None, []]) _, cache_ids = self.cached_input_ids.get(ind_id, [None, []])
for cache_id in cache_ids: for cache_id in cache_ids:
self.logger.info("callback from {} changed inputs from {}".format(ind_id, self.inputs_changed[cache_id]))
self.inputs_changed[cache_id] = True self.inputs_changed[cache_id] = True
def reset(self): def reset(self):

View file

@ -51,7 +51,7 @@ if not (on_rtd):
json_data=open(path).read() json_data=open(path).read()
football_dict = json.loads(json_data) football_dict = json.loads(json_data)
def prompt_user(prompt): def prompt_user(prompt):
"""Ask user for agreeing to data set licenses.""" """Ask user for agreeing to data set licenses."""
@ -128,14 +128,14 @@ def download_url(url, store_directory, save_name = None, messages = True, suffix
f.write(buff) f.write(buff)
sys.stdout.write(" "*(len(status)) + "\r") sys.stdout.write(" "*(len(status)) + "\r")
if file_size: if file_size:
status = r"[{perc: <{ll}}] {dl:7.3f}/{full:.3f}MB".format(dl=file_size_dl/(1048576.), status = r"[{perc: <{ll}}] {dl:7.3f}/{full:.3f}MB".format(dl=file_size_dl/(1048576.),
full=file_size/(1048576.), ll=line_length, full=file_size/(1048576.), ll=line_length,
perc="="*int(line_length*float(file_size_dl)/file_size)) perc="="*int(line_length*float(file_size_dl)/file_size))
else: else:
status = r"[{perc: <{ll}}] {dl:7.3f}MB".format(dl=file_size_dl/(1048576.), status = r"[{perc: <{ll}}] {dl:7.3f}MB".format(dl=file_size_dl/(1048576.),
ll=line_length, ll=line_length,
perc="."*int(line_length*float(file_size_dl/(10*1048576.)))) perc="."*int(line_length*float(file_size_dl/(10*1048576.))))
sys.stdout.write(status) sys.stdout.write(status)
sys.stdout.flush() sys.stdout.flush()
sys.stdout.write(" "*(len(status)) + "\r") sys.stdout.write(" "*(len(status)) + "\r")
@ -320,7 +320,7 @@ def della_gatta_TRP63_gene_expression(data_set='della_gatta', gene_number=None):
Y = Y[:, None] Y = Y[:, None]
return data_details_return({'X': X, 'Y': Y, 'gene_number' : gene_number}, data_set) return data_details_return({'X': X, 'Y': Y, 'gene_number' : gene_number}, data_set)
def football_data(season='1314', data_set='football_data'): def football_data(season='1314', data_set='football_data'):
"""Football data from English games since 1993. This downloads data from football-data.co.uk for the given season. """ """Football data from English games since 1993. This downloads data from football-data.co.uk for the given season. """
@ -406,11 +406,11 @@ def lee_yeast_ChIP(data_set='lee_yeast_ChIP'):
dir_path = os.path.join(data_path, data_set) dir_path = os.path.join(data_path, data_set)
filename = os.path.join(dir_path, 'binding_by_gene.tsv') filename = os.path.join(dir_path, 'binding_by_gene.tsv')
S = read_csv(filename, header=1, index_col=0, sep='\t') S = read_csv(filename, header=1, index_col=0, sep='\t')
transcription_factors = [col for col in S.columns if col[:7] != 'Unnamed'] transcription_factors = [col for col in S.columns if col[:7] != 'Unnamed']
annotations = S[['Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3']] annotations = S[['Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3']]
S = S[transcription_factors] S = S[transcription_factors]
return data_details_return({'annotations' : annotations, 'Y' : S, 'transcription_factors': transcription_factors}, data_set) return data_details_return({'annotations' : annotations, 'Y' : S, 'transcription_factors': transcription_factors}, data_set)
def fruitfly_tomancak(data_set='fruitfly_tomancak', gene_number=None): def fruitfly_tomancak(data_set='fruitfly_tomancak', gene_number=None):
@ -425,7 +425,7 @@ def fruitfly_tomancak(data_set='fruitfly_tomancak', gene_number=None):
xt = np.linspace(0, num_time-1, num_time) xt = np.linspace(0, num_time-1, num_time)
xr = np.linspace(0, num_repeats-1, num_repeats) xr = np.linspace(0, num_repeats-1, num_repeats)
xtime, xrepeat = np.meshgrid(xt, xr) xtime, xrepeat = np.meshgrid(xt, xr)
X = np.vstack((xtime.flatten(), xrepeat.flatten())).T X = np.vstack((xtime.flatten(), xrepeat.flatten())).T
return data_details_return({'X': X, 'Y': Y, 'gene_number' : gene_number}, data_set) return data_details_return({'X': X, 'Y': Y, 'gene_number' : gene_number}, data_set)
def drosophila_protein(data_set='drosophila_protein'): def drosophila_protein(data_set='drosophila_protein'):
@ -467,7 +467,7 @@ def google_trends(query_terms=['big data', 'machine learning', 'data science'],
"""Data downloaded from Google trends for given query terms. Warning, if you use this function multiple times in a row you get blocked due to terms of service violations. The function will cache the result of your query, if you wish to refresh an old query set refresh_data to True. The function is inspired by this notebook: http://nbviewer.ipython.org/github/sahuguet/notebooks/blob/master/GoogleTrends%20meet%20Notebook.ipynb""" """Data downloaded from Google trends for given query terms. Warning, if you use this function multiple times in a row you get blocked due to terms of service violations. The function will cache the result of your query, if you wish to refresh an old query set refresh_data to True. The function is inspired by this notebook: http://nbviewer.ipython.org/github/sahuguet/notebooks/blob/master/GoogleTrends%20meet%20Notebook.ipynb"""
query_terms.sort() query_terms.sort()
import pandas import pandas
# Create directory name for data # Create directory name for data
dir_path = os.path.join(data_path,'google_trends') dir_path = os.path.join(data_path,'google_trends')
if not os.path.isdir(dir_path): if not os.path.isdir(dir_path):
@ -514,9 +514,9 @@ def google_trends(query_terms=['big data', 'machine learning', 'data science'],
X = np.asarray([(row, i) for i in range(terms) for row in df.index]) X = np.asarray([(row, i) for i in range(terms) for row in df.index])
Y = np.asarray([[df.ix[row][query_terms[i]]] for i in range(terms) for row in df.index ]) Y = np.asarray([[df.ix[row][query_terms[i]]] for i in range(terms) for row in df.index ])
output_info = columns[1:] output_info = columns[1:]
return data_details_return({'data frame' : df, 'X': X, 'Y': Y, 'query_terms': output_info, 'info': "Data downloaded from google trends with query terms: " + ', '.join(output_info) + '.'}, data_set) return data_details_return({'data frame' : df, 'X': X, 'Y': Y, 'query_terms': output_info, 'info': "Data downloaded from google trends with query terms: " + ', '.join(output_info) + '.'}, data_set)
# The data sets # The data sets
def oil(data_set='three_phase_oil_flow'): def oil(data_set='three_phase_oil_flow'):
"""The three phase oil data from Bishop and James (1993).""" """The three phase oil data from Bishop and James (1993)."""
@ -647,7 +647,7 @@ def decampos_digits(data_set='decampos_characters', which_digits=[0,1,2,3,4,5,6,
lbls = np.array([[l]*num_samples for l in which_digits]).reshape(Y.shape[0], 1) lbls = np.array([[l]*num_samples for l in which_digits]).reshape(Y.shape[0], 1)
str_lbls = np.array([[str(l)]*num_samples for l in which_digits]) str_lbls = np.array([[str(l)]*num_samples for l in which_digits])
return data_details_return({'Y': Y, 'lbls': lbls, 'str_lbls' : str_lbls, 'info': 'Digits data set from the de Campos characters data'}, data_set) return data_details_return({'Y': Y, 'lbls': lbls, 'str_lbls' : str_lbls, 'info': 'Digits data set from the de Campos characters data'}, data_set)
def ripley_synth(data_set='ripley_prnn_data'): def ripley_synth(data_set='ripley_prnn_data'):
if not data_available(data_set): if not data_available(data_set):
download_data(data_set) download_data(data_set)
@ -690,7 +690,7 @@ def mauna_loa(data_set='mauna_loa', num_train=545, refresh_data=False):
Y = allY[:num_train, 0:1] Y = allY[:num_train, 0:1]
Ytest = allY[num_train:, 0:1] Ytest = allY[num_train:, 0:1]
return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Mauna Loa data with " + str(num_train) + " values used as training points."}, data_set) return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Mauna Loa data with " + str(num_train) + " values used as training points."}, data_set)
def boxjenkins_airline(data_set='boxjenkins_airline', num_train=96): def boxjenkins_airline(data_set='boxjenkins_airline', num_train=96):
path = os.path.join(data_path, data_set) path = os.path.join(data_path, data_set)
@ -702,7 +702,7 @@ def boxjenkins_airline(data_set='boxjenkins_airline', num_train=96):
Xtest = data[num_train:, 0:1] Xtest = data[num_train:, 0:1]
Ytest = data[num_train:, 1:2] Ytest = data[num_train:, 1:2]
return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Montly airline passenger data from Box & Jenkins 1976."}, data_set) return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Montly airline passenger data from Box & Jenkins 1976."}, data_set)
def osu_run1(data_set='osu_run1', sample_every=4): def osu_run1(data_set='osu_run1', sample_every=4):
path = os.path.join(data_path, data_set) path = os.path.join(data_path, data_set)
@ -741,7 +741,7 @@ def hapmap3(data_set='hapmap3'):
\ -1, iff SNPij==(B2,B2) \ -1, iff SNPij==(B2,B2)
The SNP data and the meta information (such as iid, sex and phenotype) are The SNP data and the meta information (such as iid, sex and phenotype) are
stored in the dataframe datadf, index is the Individual ID, stored in the dataframe datadf, index is the Individual ID,
with following columns for metainfo: with following columns for metainfo:
* family_id -> Family ID * family_id -> Family ID
@ -814,7 +814,7 @@ def hapmap3(data_set='hapmap3'):
status=write_status('unpacking...', curr, status) status=write_status('unpacking...', curr, status)
os.remove(filepath) os.remove(filepath)
status=write_status('reading .ped...', 25, status) status=write_status('reading .ped...', 25, status)
# Preprocess data: # Preprocess data:
snpstrnp = np.loadtxt(unpacked_files[0], dtype=str) snpstrnp = np.loadtxt(unpacked_files[0], dtype=str)
status=write_status('reading .map...', 33, status) status=write_status('reading .map...', 33, status)
mapnp = np.loadtxt(unpacked_files[1], dtype=str) mapnp = np.loadtxt(unpacked_files[1], dtype=str)
@ -975,7 +975,7 @@ def olivetti_glasses(data_set='olivetti_glasses', num_training=200, seed=default
Y = y[index[:num_training],:] Y = y[index[:num_training],:]
Ytest = y[index[num_training:]] Ytest = y[index[num_training:]]
return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'seed' : seed, 'info': "ORL Faces with labels identifiying who is wearing glasses and who isn't. Data is randomly partitioned according to given seed. Presence or absence of glasses was labelled by James Hensman."}, 'olivetti_faces') return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'seed' : seed, 'info': "ORL Faces with labels identifiying who is wearing glasses and who isn't. Data is randomly partitioned according to given seed. Presence or absence of glasses was labelled by James Hensman."}, 'olivetti_faces')
def olivetti_faces(data_set='olivetti_faces'): def olivetti_faces(data_set='olivetti_faces'):
path = os.path.join(data_path, data_set) path = os.path.join(data_path, data_set)
if not data_available(data_set): if not data_available(data_set):
@ -988,7 +988,8 @@ def olivetti_faces(data_set='olivetti_faces'):
for subject in range(40): for subject in range(40):
for image in range(10): for image in range(10):
image_path = os.path.join(path, 'orl_faces', 's'+str(subject+1), str(image+1) + '.pgm') image_path = os.path.join(path, 'orl_faces', 's'+str(subject+1), str(image+1) + '.pgm')
Y.append(GPy.util.netpbmfile.imread(image_path).flatten()) from GPy.util import netpbmfile
Y.append(netpbmfile.imread(image_path).flatten())
lbls.append(subject) lbls.append(subject)
Y = np.asarray(Y) Y = np.asarray(Y)
lbls = np.asarray(lbls)[:, None] lbls = np.asarray(lbls)[:, None]
@ -1211,7 +1212,7 @@ def cifar10_patches(data_set='cifar-10'):
for x in range(0,32-5,5): for x in range(0,32-5,5):
for y in range(0,32-5,5): for y in range(0,32-5,5):
patches = np.concatenate((patches, images[:,x:x+5,y:y+5,:]), axis=0) patches = np.concatenate((patches, images[:,x:x+5,y:y+5,:]), axis=0)
patches = patches.reshape((patches.shape[0],-1)) patches = patches.reshape((patches.shape[0],-1))
return data_details_return({'Y': patches, "info" : "32x32 pixel patches extracted from the CIFAR-10 data by Boris Babenko to demonstrate k-means features."}, data_set) return data_details_return({'Y': patches, "info" : "32x32 pixel patches extracted from the CIFAR-10 data by Boris Babenko to demonstrate k-means features."}, data_set)
def cmu_mocap_49_balance(data_set='cmu_mocap'): def cmu_mocap_49_balance(data_set='cmu_mocap'):

View file

@ -16,8 +16,8 @@ def initialize_latent(init, input_dim, Y):
var = p.fracs[:input_dim] var = p.fracs[:input_dim]
else: else:
var = Xr.var(0) var = Xr.var(0)
Xr -= Xr.mean(0) Xr -= Xr.mean(0)
Xr /= Xr.var(0) Xr /= Xr.std(0)
return Xr, var/var.max() return Xr, var/var.max()

View file

@ -15,6 +15,7 @@ import scipy
import warnings import warnings
import os import os
from config import * from config import *
import logging
_scipyversion = np.float64((scipy.__version__).split('.')[:2]) _scipyversion = np.float64((scipy.__version__).split('.')[:2])
_fix_dpotri_scipy_bug = True _fix_dpotri_scipy_bug = True
@ -93,14 +94,20 @@ def jitchol(A, maxtries=5):
raise linalg.LinAlgError, "not pd: non-positive diagonal elements" raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
jitter = diagA.mean() * 1e-6 jitter = diagA.mean() * 1e-6
while maxtries > 0 and np.isfinite(jitter): while maxtries > 0 and np.isfinite(jitter):
print 'Warning: adding jitter of {:.10e}'.format(jitter)
try: try:
return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True) L = linalg.cholesky(A + np.eye(A.shape[0]) * jitter, lower=True)
except: except:
jitter *= 10 jitter *= 10
finally: finally:
maxtries -= 1 maxtries -= 1
raise linalg.LinAlgError, "not positive definite, even with jitter." raise linalg.LinAlgError, "not positive definite, even with jitter."
import traceback
try: raise
except:
logging.warning('\n'.join(['Added jitter of {:.10e}'.format(jitter),
' in '+traceback.format_list(traceback.extract_stack(limit=2)[-2:-1])[0][2:]]))
import ipdb;ipdb.set_trace()
return L
@ -110,7 +117,7 @@ def jitchol(A, maxtries=5):
# """ # """
# Wrapper for lapack dtrtri function # Wrapper for lapack dtrtri function
# Inverse of L # Inverse of L
# #
# :param L: Triangular Matrix L # :param L: Triangular Matrix L
# :param lower: is matrix lower (true) or upper (false) # :param lower: is matrix lower (true) or upper (false)
# :returns: Li, info # :returns: Li, info
@ -122,10 +129,17 @@ def dtrtrs(A, B, lower=1, trans=0, unitdiag=0):
""" """
Wrapper for lapack dtrtrs function Wrapper for lapack dtrtrs function
DTRTRS solves a triangular system of the form
A * X = B or A**T * X = B,
where A is a triangular matrix of order N, and B is an N-by-NRHS
matrix. A check is made to verify that A is nonsingular.
:param A: Matrix A(triangular) :param A: Matrix A(triangular)
:param B: Matrix B :param B: Matrix B
:param lower: is matrix lower (true) or upper (false) :param lower: is matrix lower (true) or upper (false)
:returns: :returns: Solution to A * X = B or A**T * X = B
""" """
A = np.asfortranarray(A) A = np.asfortranarray(A)
@ -146,11 +160,11 @@ def dpotrs(A, B, lower=1):
def dpotri(A, lower=1): def dpotri(A, lower=1):
""" """
Wrapper for lapack dpotri function Wrapper for lapack dpotri function
DPOTRI - compute the inverse of a real symmetric positive DPOTRI - compute the inverse of a real symmetric positive
definite matrix A using the Cholesky factorization A = definite matrix A using the Cholesky factorization A =
U**T*U or A = L*L**T computed by DPOTRF U**T*U or A = L*L**T computed by DPOTRF
:param A: Matrix A :param A: Matrix A
:param lower: is matrix lower (true) or upper (false) :param lower: is matrix lower (true) or upper (false)
:returns: A inverse :returns: A inverse
@ -159,7 +173,7 @@ def dpotri(A, lower=1):
if _fix_dpotri_scipy_bug: if _fix_dpotri_scipy_bug:
assert lower==1, "scipy linalg behaviour is very weird. please use lower, fortran ordered arrays" assert lower==1, "scipy linalg behaviour is very weird. please use lower, fortran ordered arrays"
lower = 0 lower = 0
A = force_F_ordered(A) A = force_F_ordered(A)
R, info = lapack.dpotri(A, lower=lower) #needs to be zero here, seems to be a scipy bug R, info = lapack.dpotri(A, lower=lower) #needs to be zero here, seems to be a scipy bug

331
GPy/util/netpbmfile.py Normal file
View file

@ -0,0 +1,331 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# netpbmfile.py
# Copyright (c) 2011-2013, Christoph Gohlke
# Copyright (c) 2011-2013, The Regents of the University of California
# Produced at the Laboratory for Fluorescence Dynamics.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the copyright holders nor the names of any
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""Read and write image data from respectively to Netpbm files.
This implementation follows the Netpbm format specifications at
http://netpbm.sourceforge.net/doc/. No gamma correction is performed.
The following image formats are supported: PBM (bi-level), PGM (grayscale),
PPM (color), PAM (arbitrary), XV thumbnail (RGB332, read-only).
:Author:
`Christoph Gohlke <http://www.lfd.uci.edu/~gohlke/>`_
:Organization:
Laboratory for Fluorescence Dynamics, University of California, Irvine
:Version: 2013.01.18
Requirements
------------
* `CPython 2.7, 3.2 or 3.3 <http://www.python.org>`_
* `Numpy 1.7 <http://www.numpy.org>`_
* `Matplotlib 1.2 <http://www.matplotlib.org>`_ (optional for plotting)
Examples
--------
>>> im1 = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16)
>>> imsave('_tmp.pgm', im1)
>>> im2 = imread('_tmp.pgm')
>>> assert numpy.all(im1 == im2)
"""
from __future__ import division, print_function
import sys
import re
import math
from copy import deepcopy
import numpy
__version__ = '2013.01.18'
__docformat__ = 'restructuredtext en'
__all__ = ['imread', 'imsave', 'NetpbmFile']
def imread(filename, *args, **kwargs):
"""Return image data from Netpbm file as numpy array.
`args` and `kwargs` are arguments to NetpbmFile.asarray().
Examples
--------
>>> image = imread('_tmp.pgm')
"""
try:
netpbm = NetpbmFile(filename)
image = netpbm.asarray()
finally:
netpbm.close()
return image
def imsave(filename, data, maxval=None, pam=False):
"""Write image data to Netpbm file.
Examples
--------
>>> image = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16)
>>> imsave('_tmp.pgm', image)
"""
try:
netpbm = NetpbmFile(data, maxval=maxval)
netpbm.write(filename, pam=pam)
finally:
netpbm.close()
class NetpbmFile(object):
"""Read and write Netpbm PAM, PBM, PGM, PPM, files."""
_types = {b'P1': b'BLACKANDWHITE', b'P2': b'GRAYSCALE', b'P3': b'RGB',
b'P4': b'BLACKANDWHITE', b'P5': b'GRAYSCALE', b'P6': b'RGB',
b'P7 332': b'RGB', b'P7': b'RGB_ALPHA'}
def __init__(self, arg=None, **kwargs):
"""Initialize instance from filename, open file, or numpy array."""
for attr in ('header', 'magicnum', 'width', 'height', 'maxval',
'depth', 'tupltypes', '_filename', '_fh', '_data'):
setattr(self, attr, None)
if arg is None:
self._fromdata([], **kwargs)
elif isinstance(arg, basestring):
self._fh = open(arg, 'rb')
self._filename = arg
self._fromfile(self._fh, **kwargs)
elif hasattr(arg, 'seek'):
self._fromfile(arg, **kwargs)
self._fh = arg
else:
self._fromdata(arg, **kwargs)
def asarray(self, copy=True, cache=False, **kwargs):
"""Return image data from file as numpy array."""
data = self._data
if data is None:
data = self._read_data(self._fh, **kwargs)
if cache:
self._data = data
else:
return data
return deepcopy(data) if copy else data
def write(self, arg, **kwargs):
"""Write instance to file."""
if hasattr(arg, 'seek'):
self._tofile(arg, **kwargs)
else:
with open(arg, 'wb') as fid:
self._tofile(fid, **kwargs)
def close(self):
"""Close open file. Future asarray calls might fail."""
if self._filename and self._fh:
self._fh.close()
self._fh = None
def __del__(self):
self.close()
def _fromfile(self, fh):
"""Initialize instance from open file."""
fh.seek(0)
data = fh.read(4096)
if (len(data) < 7) or not (b'0' < data[1:2] < b'8'):
raise ValueError("Not a Netpbm file:\n%s" % data[:32])
try:
self._read_pam_header(data)
except Exception:
try:
self._read_pnm_header(data)
except Exception:
raise ValueError("Not a Netpbm file:\n%s" % data[:32])
def _read_pam_header(self, data):
"""Read PAM header and initialize instance."""
regroups = re.search(
b"(^P7[\n\r]+(?:(?:[\n\r]+)|(?:#.*)|"
b"(HEIGHT\s+\d+)|(WIDTH\s+\d+)|(DEPTH\s+\d+)|(MAXVAL\s+\d+)|"
b"(?:TUPLTYPE\s+\w+))*ENDHDR\n)", data).groups()
self.header = regroups[0]
self.magicnum = b'P7'
for group in regroups[1:]:
key, value = group.split()
setattr(self, unicode(key).lower(), int(value))
matches = re.findall(b"(TUPLTYPE\s+\w+)", self.header)
self.tupltypes = [s.split(None, 1)[1] for s in matches]
def _read_pnm_header(self, data):
"""Read PNM header and initialize instance."""
bpm = data[1:2] in b"14"
regroups = re.search(b"".join((
b"(^(P[123456]|P7 332)\s+(?:#.*[\r\n])*",
b"\s*(\d+)\s+(?:#.*[\r\n])*",
b"\s*(\d+)\s+(?:#.*[\r\n])*" * (not bpm),
b"\s*(\d+)\s(?:\s*#.*[\r\n]\s)*)")), data).groups() + (1, ) * bpm
self.header = regroups[0]
self.magicnum = regroups[1]
self.width = int(regroups[2])
self.height = int(regroups[3])
self.maxval = int(regroups[4])
self.depth = 3 if self.magicnum in b"P3P6P7 332" else 1
self.tupltypes = [self._types[self.magicnum]]
def _read_data(self, fh, byteorder='>'):
"""Return image data from open file as numpy array."""
fh.seek(len(self.header))
data = fh.read()
dtype = 'u1' if self.maxval < 256 else byteorder + 'u2'
depth = 1 if self.magicnum == b"P7 332" else self.depth
shape = [-1, self.height, self.width, depth]
size = numpy.prod(shape[1:])
if self.magicnum in b"P1P2P3":
data = numpy.array(data.split(None, size)[:size], dtype)
data = data.reshape(shape)
elif self.maxval == 1:
shape[2] = int(math.ceil(self.width / 8))
data = numpy.frombuffer(data, dtype).reshape(shape)
data = numpy.unpackbits(data, axis=-2)[:, :, :self.width, :]
else:
data = numpy.frombuffer(data, dtype)
data = data[:size * (data.size // size)].reshape(shape)
if data.shape[0] < 2:
data = data.reshape(data.shape[1:])
if data.shape[-1] < 2:
data = data.reshape(data.shape[:-1])
if self.magicnum == b"P7 332":
rgb332 = numpy.array(list(numpy.ndindex(8, 8, 4)), numpy.uint8)
rgb332 *= [36, 36, 85]
data = numpy.take(rgb332, data, axis=0)
return data
def _fromdata(self, data, maxval=None):
"""Initialize instance from numpy array."""
data = numpy.array(data, ndmin=2, copy=True)
if data.dtype.kind not in "uib":
raise ValueError("not an integer type: %s" % data.dtype)
if data.dtype.kind == 'i' and numpy.min(data) < 0:
raise ValueError("data out of range: %i" % numpy.min(data))
if maxval is None:
maxval = numpy.max(data)
maxval = 255 if maxval < 256 else 65535
if maxval < 0 or maxval > 65535:
raise ValueError("data out of range: %i" % maxval)
data = data.astype('u1' if maxval < 256 else '>u2')
self._data = data
if data.ndim > 2 and data.shape[-1] in (3, 4):
self.depth = data.shape[-1]
self.width = data.shape[-2]
self.height = data.shape[-3]
self.magicnum = b'P7' if self.depth == 4 else b'P6'
else:
self.depth = 1
self.width = data.shape[-1]
self.height = data.shape[-2]
self.magicnum = b'P5' if maxval > 1 else b'P4'
self.maxval = maxval
self.tupltypes = [self._types[self.magicnum]]
self.header = self._header()
def _tofile(self, fh, pam=False):
"""Write Netbm file."""
fh.seek(0)
fh.write(self._header(pam))
data = self.asarray(copy=False)
if self.maxval == 1:
data = numpy.packbits(data, axis=-1)
data.tofile(fh)
def _header(self, pam=False):
"""Return file header as byte string."""
if pam or self.magicnum == b'P7':
header = "\n".join((
"P7",
"HEIGHT %i" % self.height,
"WIDTH %i" % self.width,
"DEPTH %i" % self.depth,
"MAXVAL %i" % self.maxval,
"\n".join("TUPLTYPE %s" % unicode(i) for i in self.tupltypes),
"ENDHDR\n"))
elif self.maxval == 1:
header = "P4 %i %i\n" % (self.width, self.height)
elif self.depth == 1:
header = "P5 %i %i %i\n" % (self.width, self.height, self.maxval)
else:
header = "P6 %i %i %i\n" % (self.width, self.height, self.maxval)
if sys.version_info[0] > 2:
header = bytes(header, 'ascii')
return header
def __str__(self):
"""Return information about instance."""
return unicode(self.header)
if sys.version_info[0] > 2:
basestring = str
unicode = lambda x: str(x, 'ascii')
if __name__ == "__main__":
# Show images specified on command line or all images in current directory
from glob import glob
from matplotlib import pyplot
files = sys.argv[1:] if len(sys.argv) > 1 else glob('*.p*m')
for fname in files:
try:
pam = NetpbmFile(fname)
img = pam.asarray(copy=False)
if False:
pam.write('_tmp.pgm.out', pam=True)
img2 = imread('_tmp.pgm.out')
assert numpy.all(img == img2)
imsave('_tmp.pgm.out', img)
img2 = imread('_tmp.pgm.out')
assert numpy.all(img == img2)
pam.close()
except ValueError as e:
print(fname, e)
continue
_shape = img.shape
if img.ndim > 3 or (img.ndim > 2 and img.shape[-1] not in (3, 4)):
img = img[0]
cmap = 'gray' if pam.maxval > 1 else 'binary'
pyplot.imshow(img, cmap, interpolation='nearest')
pyplot.title("%s %s %s %s" % (fname, unicode(pam.magicnum),
_shape, img.dtype))
pyplot.show()

View file

@ -16,13 +16,13 @@ def common_subarrays(X, axis=0):
for the subarray in X, where index is the index to the remaining axis. for the subarray in X, where index is the index to the remaining axis.
:param :class:`np.ndarray` X: 2d array to check for common subarrays in :param :class:`np.ndarray` X: 2d array to check for common subarrays in
:param int axis: axis to apply subarray detection over. :param int axis: axis to apply subarray detection over.
When the index is 0, compare rows -- columns, otherwise. When the index is 0, compare rows -- columns, otherwise.
Examples: Examples:
========= =========
In a 2d array: In a 2d array:
>>> import numpy as np >>> import numpy as np
>>> X = np.zeros((3,6), dtype=bool) >>> X = np.zeros((3,6), dtype=bool)
>>> X[[1,1,1],[0,4,5]] = 1; X[1:,[2,3]] = 1 >>> X[[1,1,1],[0,4,5]] = 1; X[1:,[2,3]] = 1
@ -48,14 +48,10 @@ def common_subarrays(X, axis=0):
assert X.ndim == 2 and axis in (0,1), "Only implemented for 2D arrays" assert X.ndim == 2 and axis in (0,1), "Only implemented for 2D arrays"
subarrays = defaultdict(list) subarrays = defaultdict(list)
cnt = count() cnt = count()
logger = logging.getLogger("common_subarrays")
def accumulate(x, s, c): def accumulate(x, s, c):
logger.debug("creating tuple")
t = tuple(x) t = tuple(x)
logger.debug("tuple done")
col = c.next() col = c.next()
iadd(s[t], [col]) iadd(s[t], [col])
logger.debug("added col {}".format(col))
return None return None
if axis == 0: [accumulate(x, subarrays, cnt) for x in X] if axis == 0: [accumulate(x, subarrays, cnt) for x in X]
else: [accumulate(x, subarrays, cnt) for x in X.T] else: [accumulate(x, subarrays, cnt) for x in X.T]
@ -63,4 +59,4 @@ def common_subarrays(X, axis=0):
if __name__ == '__main__': if __name__ == '__main__':
import doctest import doctest
doctest.testmod() doctest.testmod()