merge the discriminative prior to devel

This commit is contained in:
Fariba 2014-10-16 17:21:33 +01:00
commit 2d89fa821a
72 changed files with 1681 additions and 1894 deletions

View file

@ -93,6 +93,22 @@ class GP(Model):
self.link_parameter(self.kern) self.link_parameter(self.kern)
self.link_parameter(self.likelihood) self.link_parameter(self.likelihood)
def set_X(self,X):
# TODO: it does not work with BGPLVM
if isinstance(X, ObsAr):
self.X = X
else:
self.X = ObsAr(X)
def set_Y(self,Y):
if self.normalizer is not None:
self.normalizer.scale_by(Y)
self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
self.Y = Y
else:
self.Y = ObsAr(Y)
self.Y_normalized = self.Y
def parameters_changed(self): def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata) self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL']) self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])

View file

@ -13,11 +13,7 @@ class Mapping(Parameterized):
def __init__(self, input_dim, output_dim, name='mapping'): def __init__(self, input_dim, output_dim, name='mapping'):
self.input_dim = input_dim self.input_dim = input_dim
self.output_dim = output_dim self.output_dim = output_dim
super(Mapping, self).__init__(name=name) super(Mapping, self).__init__(name=name)
# Model.__init__(self)
# All leaf nodes should call self._set_params(self._get_params()) at
# the end
def f(self, X): def f(self, X):
raise NotImplementedError raise NotImplementedError
@ -56,7 +52,8 @@ class Mapping(Parameterized):
Can plot only part of the data and part of the posterior functions Can plot only part of the data and part of the posterior functions
using which_data and which_functions using which_data and which_functions
This is a convenience function: arguments are passed to GPy.plotting.matplot_dep.models_plots.plot_mapping This is a convenience function: arguments are passed to
GPy.plotting.matplot_dep.models_plots.plot_mapping
""" """
if "matplotlib" in sys.modules: if "matplotlib" in sys.modules:
@ -66,7 +63,10 @@ class Mapping(Parameterized):
raise NameError, "matplotlib package has not been imported." raise NameError, "matplotlib package has not been imported."
class Bijective_mapping(Mapping): class Bijective_mapping(Mapping):
"""This is a mapping that is bijective, i.e. you can go from X to f and also back from f to X. The inverse mapping is called g().""" """
This is a mapping that is bijective, i.e. you can go from X to f and
also back from f to X. The inverse mapping is called g().
"""
def __init__(self, input_dim, output_dim, name='bijective_mapping'): def __init__(self, input_dim, output_dim, name='bijective_mapping'):
super(Bijective_apping, self).__init__(name=name) super(Bijective_apping, self).__init__(name=name)
@ -77,7 +77,11 @@ class Bijective_mapping(Mapping):
from model import Model from model import Model
class Mapping_check_model(Model): class Mapping_check_model(Model):
"""This is a dummy model class used as a base class for checking that the gradients of a given mapping are implemented correctly. It enables checkgradient() to be called independently on each mapping.""" """
This is a dummy model class used as a base class for checking that the
gradients of a given mapping are implemented correctly. It enables
checkgradient() to be called independently on each mapping.
"""
def __init__(self, mapping=None, dL_df=None, X=None): def __init__(self, mapping=None, dL_df=None, X=None):
num_samples = 20 num_samples = 20
if mapping==None: if mapping==None:

View file

@ -213,6 +213,7 @@ class Model(Parameterized):
def optimize(self, optimizer=None, start=None, **kwargs): def optimize(self, optimizer=None, start=None, **kwargs):
""" """
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors. Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
kwargs are passed to the optimizer. They can be: kwargs are passed to the optimizer. They can be:
:param max_f_eval: maximum number of function evaluations :param max_f_eval: maximum number of function evaluations
@ -222,7 +223,15 @@ class Model(Parameterized):
:param optimizer: which optimizer to use (defaults to self.preferred optimizer) :param optimizer: which optimizer to use (defaults to self.preferred optimizer)
:type optimizer: string :type optimizer: string
TODO: valid args Valid optimizers are:
- 'scg': scaled conjugate gradient method, recommended for stability.
See also GPy.inference.optimization.scg
- 'fmin_tnc': truncated Newton method (see scipy.optimize.fmin_tnc)
- 'simplex': the Nelder-Mead simplex method (see scipy.optimize.fmin),
- 'lbfgsb': the l-bfgs-b method (see scipy.optimize.fmin_l_bfgs_b),
- 'sgd': stochastic gradient decsent (see scipy.optimize.sgd). For experts only!
""" """
if self.is_fixed: if self.is_fixed:
raise RuntimeError, "Cannot optimize, when everything is fixed" raise RuntimeError, "Cannot optimize, when everything is fixed"
@ -291,7 +300,7 @@ class Model(Parameterized):
# just check the global ratio # just check the global ratio
dx = np.zeros(x.shape) dx = np.zeros(x.shape)
dx[transformed_index] = step * (np.sign(np.random.uniform(-1, 1, transformed_index.size)) if transformed_index.size != 2 else 1.) dx[transformed_index] = step * (np.sign(np.random.uniform(-1, 1, transformed_index.size)) if transformed_index.size != 2 else 1.)
# evaulate around the point x # evaulate around the point x
f1 = self._objective(x + dx) f1 = self._objective(x + dx)
f2 = self._objective(x - dx) f2 = self._objective(x - dx)
@ -372,6 +381,16 @@ class Model(Parameterized):
self.optimizer_array = x self.optimizer_array = x
return ret return ret
def _repr_html_(self):
"""Representation of the model in html for notebook display."""
model_details = [['<b>Model</b>', self.name + '<br>'],
['<b>Log-likelihood</b>', '{}<br>'.format(float(self.log_likelihood()))],
["<b>Number of Parameters</b>", '{}<br>'.format(self.size)]]
from operator import itemgetter
to_print = [""] + ["{}: {}".format(name, detail) for name, detail in model_details] + ["<br><b>Parameters</b>:"]
to_print.append(super(Model, self)._repr_html_())
return "\n".join(to_print)
def __str__(self): def __str__(self):
model_details = [['Name', self.name], model_details = [['Name', self.name],
['Log-likelihood', '{}'.format(float(self.log_likelihood()))], ['Log-likelihood', '{}'.format(float(self.log_likelihood()))],

View file

@ -49,11 +49,13 @@ class Param(Parameterizable, ObsAr):
obj._realshape_ = obj.shape obj._realshape_ = obj.shape
obj._realsize_ = obj.size obj._realsize_ = obj.size
obj._realndim_ = obj.ndim obj._realndim_ = obj.ndim
obj._original_ = True obj._original_ = obj
return obj return obj
def __init__(self, name, input_array, default_constraint=None, *a, **kw): def __init__(self, name, input_array, default_constraint=None, *a, **kw):
self._in_init_ = True
super(Param, self).__init__(name=name, default_constraint=default_constraint, *a, **kw) super(Param, self).__init__(name=name, default_constraint=default_constraint, *a, **kw)
self._in_init_ = False
def build_pydot(self,G): def build_pydot(self,G):
import pydot import pydot
@ -124,10 +126,10 @@ class Param(Parameterizable, ObsAr):
#if not reduce(lambda a, b: a or numpy.any(b is Ellipsis), s, False) and len(s) <= self.ndim: #if not reduce(lambda a, b: a or numpy.any(b is Ellipsis), s, False) and len(s) <= self.ndim:
# s += (Ellipsis,) # s += (Ellipsis,)
new_arr = super(Param, self).__getitem__(s, *args, **kwargs) new_arr = super(Param, self).__getitem__(s, *args, **kwargs)
try: try:
new_arr._current_slice_ = s new_arr._current_slice_ = s
new_arr._gradient_array_ = self.gradient[s] new_arr._gradient_array_ = self.gradient[s]
new_arr._original_ = self.base is new_arr.base new_arr._original_ = self._original_
except AttributeError: pass # returning 0d array or float, double etc except AttributeError: pass # returning 0d array or float, double etc
return new_arr return new_arr
@ -157,29 +159,29 @@ class Param(Parameterizable, ObsAr):
return self.constraints[__fixed__].size == self.size return self.constraints[__fixed__].size == self.size
def _get_original(self, param): def _get_original(self, param):
return self return self._original_
#=========================================================================== #===========================================================================
# Pickling and copying # Pickling and copying
#=========================================================================== #===========================================================================
def copy(self): def copy(self):
return Parameterizable.copy(self, which=self) return Parameterizable.copy(self, which=self)
def __deepcopy__(self, memo): def __deepcopy__(self, memo):
s = self.__new__(self.__class__, name=self.name, input_array=self.view(numpy.ndarray).copy()) s = self.__new__(self.__class__, name=self.name, input_array=self.view(numpy.ndarray).copy())
memo[id(self)] = s memo[id(self)] = s
import copy import copy
Pickleable.__setstate__(s, copy.deepcopy(self.__getstate__(), memo)) Pickleable.__setstate__(s, copy.deepcopy(self.__getstate__(), memo))
return s return s
def _setup_observers(self): def _setup_observers(self):
""" """
Setup the default observers Setup the default observers
1: pass through to parent, if present 1: pass through to parent, if present
""" """
if self.has_parent(): if self.has_parent():
self.add_observer(self._parent_, self._parent_._pass_through_notify_observers, -np.inf) self.add_observer(self._parent_, self._parent_._pass_through_notify_observers, -np.inf)
#=========================================================================== #===========================================================================
# Printing -> done # Printing -> done
#=========================================================================== #===========================================================================
@ -249,6 +251,29 @@ class Param(Parameterizable, ObsAr):
if ind.size > 4: indstr = ','.join(map(str, ind[:2])) + "..." + ','.join(map(str, ind[-2:])) if ind.size > 4: indstr = ','.join(map(str, ind[:2])) + "..." + ','.join(map(str, ind[-2:]))
else: indstr = ','.join(map(str, ind)) else: indstr = ','.join(map(str, ind))
return name + '[' + indstr + ']' return name + '[' + indstr + ']'
def _repr_html_(self, constr_matrix=None, indices=None, prirs=None, ties=None):
"""Representation of the parameter in html for notebook display."""
filter_ = self._current_slice_
vals = self.flat
if indices is None: indices = self._indices(filter_)
ravi = self._raveled_index(filter_)
if constr_matrix is None: constr_matrix = self.constraints.properties_for(ravi)
if prirs is None: prirs = self.priors.properties_for(ravi)
if ties is None: ties = self._ties_for(ravi)
ties = [' '.join(map(lambda x: x, t)) for t in ties]
header_format = """
<tr>
<td><b>{i}</b></td>
<td><b>{x}</b></td>
<td><b>{c}</b></td>
<td><b>{p}</b></td>
<td><b>{t}</b></td>
</tr>"""
header = header_format.format(x=self.hierarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing
if not ties: ties = itertools.cycle([''])
return "\n".join(['<table>'] + [header] + ["<tr><td>{i}</td><td align=\"right\">{x}</td><td>{c}</td><td>{p}</td><td>{t}</td></tr>".format(x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)] + ["</table>"])
def __str__(self, constr_matrix=None, indices=None, prirs=None, ties=None, lc=None, lx=None, li=None, lp=None, lt=None, only_name=False): def __str__(self, constr_matrix=None, indices=None, prirs=None, ties=None, lc=None, lx=None, li=None, lp=None, lt=None, only_name=False):
filter_ = self._current_slice_ filter_ = self._current_slice_
vals = self.flat vals = self.flat

View file

@ -14,12 +14,11 @@ Observable Pattern for patameterization
""" """
from transformations import Transformation,Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED from transformations import Transformation,Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
from ...util.misc import param_to_array
import numpy as np import numpy as np
import re import re
import logging import logging
__updated__ = '2014-05-21' __updated__ = '2014-10-09'
class HierarchyError(Exception): class HierarchyError(Exception):
""" """
@ -64,7 +63,7 @@ class Observable(object):
@updates.setter @updates.setter
def updates(self, ups): def updates(self, ups):
raise DeprecationWarning("updates is now a function, see update(True|False|None)") raise DeprecationWarning("updates is now a function, see update(True|False|None)")
def update_model(self, updates=None): def update_model(self, updates=None):
""" """
Get or set, whether automatic updates are performed. When updates are Get or set, whether automatic updates are performed. When updates are
@ -88,21 +87,23 @@ class Observable(object):
else: else:
self._updates = updates self._updates = updates
self.trigger_update() self.trigger_update()
def toggle_update(self): def toggle_update(self):
self.update_model(not self.update()) self.update_model(not self.update())
def trigger_update(self): def trigger_update(self, trigger_parent=True):
""" """
Update the model from the current state. Update the model from the current state.
Make sure that updates are on, otherwise this Make sure that updates are on, otherwise this
method will do nothing method will do nothing
:param bool trigger_parent: Whether to trigger the parent, after self has updated
""" """
if not self.update_model(): if not self.update_model() or self._in_init_:
#print "Warning: updates are off, updating the model will do nothing" #print "Warning: updates are off, updating the model will do nothing"
return return
self._trigger_params_changed() self._trigger_params_changed(trigger_parent)
def add_observer(self, observer, callble, priority=0): def add_observer(self, observer, callble, priority=0):
""" """
Add an observer `observer` with the callback `callble` Add an observer `observer` with the callback `callble`
@ -540,18 +541,18 @@ class Indexable(Nameable, Observable):
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()] [np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
return ret return ret
return 0. return 0.
#=========================================================================== #===========================================================================
# Tie parameters together # Tie parameters together
#=========================================================================== #===========================================================================
def _has_ties(self): def _has_ties(self):
if self._highest_parent_.tie.tied_param is None: if self._highest_parent_.tie.tied_param is None:
return False return False
if self.has_parent(): if self.has_parent():
return self._highest_parent_.tie.label_buf[self._highest_parent_._raveled_index_for(self)].sum()>0 return self._highest_parent_.tie.label_buf[self._highest_parent_._raveled_index_for(self)].sum()>0
return True return True
def tie_together(self): def tie_together(self):
self._highest_parent_.tie.add_tied_parameter(self) self._highest_parent_.tie.add_tied_parameter(self)
self._highest_parent_._set_fixed(self,self._raveled_index()) self._highest_parent_._set_fixed(self,self._raveled_index())
@ -635,10 +636,18 @@ class Indexable(Nameable, Observable):
""" """
From Parentable: From Parentable:
Called when the parent changed Called when the parent changed
update the constraints and priors view, so that
constraining is automized for the parent.
""" """
from index_operations import ParameterIndexOperationsView from index_operations import ParameterIndexOperationsView
self.constraints = ParameterIndexOperationsView(parent.constraints, parent._offset_for(self), self.size) #if getattr(self, "_in_init_"):
self.priors = ParameterIndexOperationsView(parent.priors, parent._offset_for(self), self.size) #import ipdb;ipdb.set_trace()
#self.constraints.update(param.constraints, start)
#self.priors.update(param.priors, start)
offset = parent._offset_for(self)
self.constraints = ParameterIndexOperationsView(parent.constraints, offset, self.size)
self.priors = ParameterIndexOperationsView(parent.priors, offset, self.size)
self._fixes_ = None self._fixes_ = None
for p in self.parameters: for p in self.parameters:
p._parent_changed(parent) p._parent_changed(parent)
@ -741,6 +750,7 @@ class OptimizationHandlable(Indexable):
self.param_array.flat[f] = p self.param_array.flat[f] = p
[np.put(self.param_array, ind[f[ind]], c.f(self.param_array.flat[ind[f[ind]]])) [np.put(self.param_array, ind[f[ind]], c.f(self.param_array.flat[ind[f[ind]]]))
for c, ind in self.constraints.iteritems() if c != __fixed__] for c, ind in self.constraints.iteritems() if c != __fixed__]
#self._highest_parent_.tie.propagate_val()
self._optimizer_copy_transformed = False self._optimizer_copy_transformed = False
self._trigger_params_changed() self._trigger_params_changed()
@ -826,15 +836,16 @@ class OptimizationHandlable(Indexable):
""" """
# first take care of all parameters (from N(0,1)) # first take care of all parameters (from N(0,1))
x = rand_gen(size=self._size_transformed(), *args, **kwargs) x = rand_gen(size=self._size_transformed(), *args, **kwargs)
updates = self.update_model()
self.update_model(False) # Switch off the updates self.update_model(False) # Switch off the updates
self.optimizer_array = x # makes sure all of the tied parameters get the same init (since there's only one prior object...) self.optimizer_array = x # makes sure all of the tied parameters get the same init (since there's only one prior object...)
# now draw from prior where possible # now draw from prior where possible
x = param_to_array(self.param_array).flat.copy() x = self.param_array.copy()
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None] [np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
unfixlist = np.ones((self.size,),dtype=np.bool) unfixlist = np.ones((self.size,),dtype=np.bool)
unfixlist[self.constraints[__fixed__]] = False unfixlist[self.constraints[__fixed__]] = False
self.param_array.flat[unfixlist] = x[unfixlist] self.param_array.flat[unfixlist] = x.view(np.ndarray).ravel()[unfixlist]
self.update_model(True) self.update_model(updates)
#=========================================================================== #===========================================================================
# For shared memory arrays. This does nothing in Param, but sets the memory # For shared memory arrays. This does nothing in Param, but sets the memory
@ -928,7 +939,7 @@ class Parameterizable(OptimizationHandlable):
""" """
if self.__dict__.get('_param_array_', None) is None: if self.__dict__.get('_param_array_', None) is None:
self._param_array_ = np.empty(self.size, dtype=np.float64) self._param_array_ = np.empty(self.size, dtype=np.float64)
if self.constraints[__fixed__].size !=0: if self.constraints[__fixed__].size !=0:
fixes = np.ones(self.size).astype(bool) fixes = np.ones(self.size).astype(bool)
fixes[self.constraints[__fixed__]] = FIXED fixes[self.constraints[__fixed__]] = FIXED

View file

@ -149,6 +149,7 @@ class Parameterized(Parameterizable):
self.priors.update(param.priors, start) self.priors.update(param.priors, start)
self.parameters.insert(index, param) self.parameters.insert(index, param)
self._notify_parent_change()
param.add_observer(self, self._pass_through_notify_observers, -np.inf) param.add_observer(self, self._pass_through_notify_observers, -np.inf)
parent = self parent = self
@ -356,6 +357,35 @@ class Parameterized(Parameterizable):
@property @property
def _ties_str(self): def _ties_str(self):
return [','.join(x._ties_str) for x in self.flattened_parameters] return [','.join(x._ties_str) for x in self.flattened_parameters]
def _repr_html_(self, header=True):
"""Representation of the parameters in html for notebook display."""
name = adjust_name_for_printing(self.name) + "."
constrs = self._constraints_str;
ts = self._ties_str
prirs = self._priors_str
desc = self._description_str; names = self.parameter_names()
nl = max([len(str(x)) for x in names + [name]])
sl = max([len(str(x)) for x in desc + ["Value"]])
cl = max([len(str(x)) if x else 0 for x in constrs + ["Constraint"]])
tl = max([len(str(x)) if x else 0 for x in ts + ["Tied to"]])
pl = max([len(str(x)) if x else 0 for x in prirs + ["Prior"]])
format_spec = "<tr><td>{{name:<{0}s}}</td><td align=\"right\">{{desc:>{1}s}}</td><td>{{const:^{2}s}}</td><td>{{pri:^{3}s}}</td><td>{{t:^{4}s}}</td></tr>".format(nl, sl, cl, pl, tl)
to_print = []
for n, d, c, t, p in itertools.izip(names, desc, constrs, ts, prirs):
to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p))
sep = '-' * (nl + sl + cl + + pl + tl + 8 * 2 + 3)
if header:
header = """
<tr>
<td><b>{name}</b>
<td><b>Value</b></td>
<td><b>Constraint</b></td>
<td><b>Prior</b></td>
<td><b>Tied to</b></td>""".format(name=name)
to_print.insert(0, header)
return '<table>' + '\n'.format(sep).join(to_print) + '\n</table>'
def __str__(self, header=True): def __str__(self, header=True):
name = adjust_name_for_printing(self.name) + "." name = adjust_name_for_printing(self.name) + "."
constrs = self._constraints_str; constrs = self._constraints_str;

View file

@ -58,7 +58,7 @@ class Gaussian(Prior):
self.constant = -0.5 * np.log(2 * np.pi * self.sigma2) self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)
def __str__(self): def __str__(self):
return "N(" + str(np.round(self.mu)) + ', ' + str(np.round(self.sigma2)) + ')' return "N({:.2g}, {:.2g})".format(self.mu, self.sigma)
def lnpdf(self, x): def lnpdf(self, x):
return self.constant - 0.5 * np.square(x - self.mu) / self.sigma2 return self.constant - 0.5 * np.square(x - self.mu) / self.sigma2
@ -89,7 +89,7 @@ class Uniform(Prior):
self.upper = float(upper) self.upper = float(upper)
def __str__(self): def __str__(self):
return "[" + str(np.round(self.lower)) + ', ' + str(np.round(self.upper)) + ']' return "[{:.2g}, {:.2g}]".format(self.lower, self.upper)
def lnpdf(self, x): def lnpdf(self, x):
region = (x >= self.lower) * (x <= self.upper) region = (x >= self.lower) * (x <= self.upper)
@ -132,7 +132,7 @@ class LogGaussian(Prior):
self.constant = -0.5 * np.log(2 * np.pi * self.sigma2) self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)
def __str__(self): def __str__(self):
return "lnN(" + str(np.round(self.mu)) + ', ' + str(np.round(self.sigma2)) + ')' return "lnN({:.2g}, {:.2g})".format(self.mu, self.sigma)
def lnpdf(self, x): def lnpdf(self, x):
return self.constant - 0.5 * np.square(np.log(x) - self.mu) / self.sigma2 - np.log(x) return self.constant - 0.5 * np.square(np.log(x) - self.mu) / self.sigma2 - np.log(x)
@ -237,7 +237,7 @@ class Gamma(Prior):
self.constant = -gammaln(self.a) + a * np.log(b) self.constant = -gammaln(self.a) + a * np.log(b)
def __str__(self): def __str__(self):
return "Ga(" + str(np.round(self.a)) + ', ' + str(np.round(self.b)) + ')' return "Ga({:.2g}, {:.2g})".format(self.a, self.b)
def summary(self): def summary(self):
ret = {"E[x]": self.a / self.b, \ ret = {"E[x]": self.a / self.b, \
@ -272,8 +272,7 @@ class Gamma(Prior):
b = E / V b = E / V
return Gamma(a, b) return Gamma(a, b)
class InverseGamma(Prior):
class inverse_gamma(Prior):
""" """
Implementation of the inverse-Gamma probability function, coupled with random variables. Implementation of the inverse-Gamma probability function, coupled with random variables.
@ -284,8 +283,8 @@ class inverse_gamma(Prior):
""" """
domain = _POSITIVE domain = _POSITIVE
_instances = []
def __new__(cls, a, b): # Singleton: def __new__(cls, a, b): # Singleton:
if cls._instances: if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()] cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances: for instance in cls._instances:
@ -301,7 +300,7 @@ class inverse_gamma(Prior):
self.constant = -gammaln(self.a) + a * np.log(b) self.constant = -gammaln(self.a) + a * np.log(b)
def __str__(self): def __str__(self):
return "iGa(" + str(np.round(self.a)) + ', ' + str(np.round(self.b)) + ')' return "iGa({:.2g}, {:.2g})".format(self.a, self.b)
def lnpdf(self, x): def lnpdf(self, x):
return self.constant - (self.a + 1) * np.log(x) - self.b / x return self.constant - (self.a + 1) * np.log(x) - self.b / x
@ -312,7 +311,6 @@ class inverse_gamma(Prior):
def rvs(self, n): def rvs(self, n):
return 1. / np.random.gamma(scale=1. / self.b, shape=self.a, size=n) return 1. / np.random.gamma(scale=1. / self.b, shape=self.a, size=n)
class DGPLVM_KFDA(Prior): class DGPLVM_KFDA(Prior):
""" """
Implementation of the Discriminative Gaussian Process Latent Variable function using Implementation of the Discriminative Gaussian Process Latent Variable function using
@ -656,3 +654,64 @@ class DGPLVM(Prior):
def __str__(self): def __str__(self):
return 'DGPLVM_prior' return 'DGPLVM_prior'
class HalfT(Prior):
"""
Implementation of the half student t probability function, coupled with random variables.
:param A: scale parameter
:param nu: degrees of freedom
"""
domain = _POSITIVE
_instances = []
def __new__(cls, A, nu): # Singleton:
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if instance().A == A and instance().nu == nu:
return instance()
o = super(Prior, cls).__new__(cls, A, nu)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, A, nu):
self.A = float(A)
self.nu = float(nu)
self.constant = gammaln(.5*(self.nu+1.)) - gammaln(.5*self.nu) - .5*np.log(np.pi*self.A*self.nu)
def __str__(self):
return "hT({:.2g}, {:.2g})".format(self.A, self.nu)
def lnpdf(self,theta):
return (theta>0) * ( self.constant -.5*(self.nu+1) * np.log( 1.+ (1./self.nu) * (theta/self.A)**2 ) )
#theta = theta if isinstance(theta,np.ndarray) else np.array([theta])
#lnpdfs = np.zeros_like(theta)
#theta = np.array([theta])
#above_zero = theta.flatten()>1e-6
#v = self.nu
#sigma2=self.A
#stop
#lnpdfs[above_zero] = (+ gammaln((v + 1) * 0.5)
# - gammaln(v * 0.5)
# - 0.5*np.log(sigma2 * v * np.pi)
# - 0.5*(v + 1)*np.log(1 + (1/np.float(v))*((theta[above_zero][0]**2)/sigma2))
#)
#return lnpdfs
def lnpdf_grad(self,theta):
theta = theta if isinstance(theta,np.ndarray) else np.array([theta])
grad = np.zeros_like(theta)
above_zero = theta>1e-6
v = self.nu
sigma2=self.A
grad[above_zero] = -0.5*(v+1)*(2*theta[above_zero])/(v*sigma2 + theta[above_zero][0]**2)
return grad
def rvs(self, n):
#return np.random.randn(n) * self.sigma + self.mu
from scipy.stats import t
#[np.abs(x) for x in t.rvs(df=4,loc=0,scale=50, size=10000)])
ret = t.rvs(self.nu,loc=0,scale=self.A, size=n)
ret[ret<0] = 0
return ret

View file

@ -9,6 +9,11 @@ from .. import likelihoods
from parameterization.variational import VariationalPosterior from parameterization.variational import VariationalPosterior
import logging import logging
from GPy.inference.latent_function_inference.posterior import Posterior
from GPy.inference.optimization.stochastics import SparseGPStochastics,\
SparseGPMissing
#no stochastics.py file added! from GPy.inference.optimization.stochastics import SparseGPStochastics,\
#SparseGPMissing
logger = logging.getLogger("sparse gp") logger = logging.getLogger("sparse gp")
class SparseGP(GP): class SparseGP(GP):
@ -34,12 +39,13 @@ class SparseGP(GP):
""" """
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp', Y_metadata=None, normalizer=False): def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
name='sparse gp', Y_metadata=None, normalizer=False,
missing_data=False, stochastic=False, batchsize=1):
#pick a sensible inference method #pick a sensible inference method
if inference_method is None: if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian): if isinstance(likelihood, likelihoods.Gaussian):
inference_method = var_dtc.VarDTC() inference_method = var_dtc.VarDTC(limit=1 if not self.missing_data else Y.shape[1])
else: else:
#inference_method = ?? #inference_method = ??
raise NotImplementedError, "what to do what to do?" raise NotImplementedError, "what to do what to do?"
@ -49,46 +55,281 @@ class SparseGP(GP):
self.num_inducing = Z.shape[0] self.num_inducing = Z.shape[0]
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer) GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
self.missing_data = missing_data
if stochastic and missing_data:
self.missing_data = True
self.ninan = ~np.isnan(Y)
self.stochastics = SparseGPStochastics(self, batchsize)
elif stochastic and not missing_data:
self.missing_data = False
self.stochastics = SparseGPStochastics(self, batchsize)
elif missing_data:
self.missing_data = True
self.ninan = ~np.isnan(Y)
self.stochastics = SparseGPMissing(self)
else:
self.stochastics = False
logger.info("Adding Z as parameter") logger.info("Adding Z as parameter")
self.link_parameter(self.Z, index=0) self.link_parameter(self.Z, index=0)
if self.missing_data:
self.Ylist = []
overall = self.Y_normalized.shape[1]
m_f = lambda i: "Precomputing Y for missing data: {: >7.2%}".format(float(i+1)/overall)
message = m_f(-1)
print message,
for d in xrange(overall):
self.Ylist.append(self.Y_normalized[self.ninan[:, d], d][:, None])
print ' '*(len(message)+1) + '\r',
message = m_f(d)
print message,
print ''
self.posterior = None
def has_uncertain_inputs(self): def has_uncertain_inputs(self):
return isinstance(self.X, VariationalPosterior) return isinstance(self.X, VariationalPosterior)
def parameters_changed(self): def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata) """
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL']) This is the standard part, which usually belongs in parameters_changed.
if isinstance(self.X, VariationalPosterior):
For automatic handling of subsampling (such as missing_data, stochastics etc.), we need to put this into an inner
loop, in order to ensure a different handling of gradients etc of different
subsets of data.
The dict in current_values will be passed aroung as current_values for
the rest of the algorithm, so this is the place to store current values,
such as subsets etc, if necessary.
If Lm and dL_dKmm can be precomputed (or only need to be computed once)
pass them in here, so they will be passed to the inference_method.
subset_indices is a dictionary of indices. you can put the indices however you
like them into this dictionary for inner use of the indices inside the
algorithm.
"""
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=None)
current_values = {}
likelihood.update_gradients(grad_dict['dL_dthetaL'])
current_values['likgrad'] = likelihood.gradient.copy()
if subset_indices is None:
subset_indices = {}
if isinstance(X, VariationalPosterior):
#gradients wrt kernel #gradients wrt kernel
dL_dKmm = self.grad_dict['dL_dKmm'] dL_dKmm = grad_dict['dL_dKmm']
self.kern.update_gradients_full(dL_dKmm, self.Z, None) kern.update_gradients_full(dL_dKmm, Z, None)
target = self.kern.gradient.copy() current_values['kerngrad'] = kern.gradient.copy()
self.kern.update_gradients_expectations(variational_posterior=self.X, kern.update_gradients_expectations(variational_posterior=X,
Z=self.Z, Z=Z,
dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi1=grad_dict['dL_dpsi1'],
dL_dpsi2=self.grad_dict['dL_dpsi2']) dL_dpsi2=grad_dict['dL_dpsi2'])
self.kern.gradient += target current_values['kerngrad'] += kern.gradient
#gradients wrt Z #gradients wrt Z
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z) current_values['Zgrad'] = kern.gradients_X(dL_dKmm, Z)
self.Z.gradient += self.kern.gradients_Z_expectations( current_values['Zgrad'] += kern.gradients_Z_expectations(
self.grad_dict['dL_dpsi0'], grad_dict['dL_dpsi0'],
self.grad_dict['dL_dpsi1'], grad_dict['dL_dpsi1'],
self.grad_dict['dL_dpsi2'], grad_dict['dL_dpsi2'],
Z=self.Z, Z=Z,
variational_posterior=self.X) variational_posterior=X)
else: else:
#gradients wrt kernel #gradients wrt kernel
self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X) kern.update_gradients_diag(grad_dict['dL_dKdiag'], X)
target = self.kern.gradient.copy() current_values['kerngrad'] = kern.gradient.copy()
self.kern.update_gradients_full(self.grad_dict['dL_dKnm'], self.X, self.Z) kern.update_gradients_full(grad_dict['dL_dKnm'], X, Z)
target += self.kern.gradient current_values['kerngrad'] += kern.gradient
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None) kern.update_gradients_full(grad_dict['dL_dKmm'], Z, None)
self.kern.gradient += target current_values['kerngrad'] += kern.gradient
#gradients wrt Z #gradients wrt Z
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) current_values['Zgrad'] = kern.gradients_X(grad_dict['dL_dKmm'], Z)
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X) current_values['Zgrad'] += kern.gradients_X(grad_dict['dL_dKnm'].T, Z, X)
return posterior, log_marginal_likelihood, grad_dict, current_values, subset_indices
def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None):
"""
This is for automatic updates of values in the inner loop of missing
data handling. Both arguments are dictionaries and the values in
full_values will be updated by the current_gradients.
If a key from current_values does not exist in full_values, it will be
initialized to the value in current_values.
If there is indices needed for the update, value_indices can be used for
that. If value_indices has the same key, as current_values, the update
in full_values will be indexed by the indices in value_indices.
grads:
dictionary of standing gradients (you will have to carefully make sure, that
the ordering is right!). The values in here will be updated such that
full_values[key] += current_values[key] forall key in full_gradients.keys()
gradients:
dictionary of gradients in the current set of parameters.
value_indices:
dictionary holding indices for the update in full_values.
if the key exists the update rule is:def df(x):
m.stochastics.do_stochastics()
grads = m._grads(x)
print '\r',
message = "Lik: {: 6.4E} Grad: {: 6.4E} Dim: {} Lik: {} Len: {!s}".format(float(m.log_likelihood()), np.einsum('i,i->', grads, grads), m.stochastics.d, float(m.likelihood.variance), " ".join(["{:3.2E}".format(l) for l in m.kern.lengthscale.values]))
print message,
return grads
def grad_stop(threshold):
def inner(args):
g = args['gradient']
return np.sqrt(np.einsum('i,i->',g,g)) < threshold
return inner
def maxiter_stop(maxiter):
def inner(args):
return args['n_iter'] == maxiter
return inner
def optimize(m, maxiter=1000):
#opt = climin.RmsProp(m.optimizer_array.copy(), df, 1e-6, decay=0.9, momentum=0.9, step_adapt=1e-7)
opt = climin.Adadelta(m.optimizer_array.copy(), df, 1e-2, decay=0.9)
ret = opt.minimize_until((grad_stop(.1), maxiter_stop(maxiter)))
print
return ret
full_values[key][value_indices[key]] += current_values[key]
"""
for key in current_values.keys():
if value_indices is not None and value_indices.has_key(key):
index = value_indices[key]
else:
index = slice(None)
if full_values.has_key(key):
full_values[key][index] += current_values[key]
else:
full_values[key] = current_values[key]
def _inner_values_update(self, current_values):
"""
This exists if there is more to do with the current values.
It will be called allways in the inner loop, so that
you can do additional inner updates for the inside of the missing data
loop etc. This can also be used for stochastic updates, when only working on
one dimension of the output.
"""
pass
def _outer_values_update(self, full_values):
"""
Here you put the values, which were collected before in the right places.
E.g. set the gradients of parameters, etc.
"""
self.likelihood.gradient = full_values['likgrad']
self.kern.gradient = full_values['kerngrad']
self.Z.gradient = full_values['Zgrad']
def _outer_init_full_values(self):
"""
If full_values has indices in values_indices, we might want to initialize
the full_values differently, so that subsetting is possible.
Here you can initialize the full_values for the values needed.
Keep in mind, that if a key does not exist in full_values when updating
values, it will be set (so e.g. for Z there is no need to initialize Zgrad,
as there is no subsetting needed. For X in BGPLVM on the other hand we probably need
to initialize the gradients for the mean and the variance in order to
have the full gradient for indexing)
"""
return {}
def _outer_loop_for_missing_data(self):
Lm = None
dL_dKmm = None
self._log_marginal_likelihood = 0
full_values = self._outer_init_full_values()
if self.posterior is None:
woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
else:
woodbury_inv = self.posterior._woodbury_inv
woodbury_vector = self.posterior._woodbury_vector
if not self.stochastics:
m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim)
message = m_f(-1)
print message,
for d in self.stochastics.d:
ninan = self.ninan[:, d]
if not self.stochastics:
print ' '*(len(message)) + '\r',
message = m_f(d)
print message,
posterior, log_marginal_likelihood, \
grad_dict, current_values, value_indices = self._inner_parameters_changed(
self.kern, self.X[ninan],
self.Z, self.likelihood,
self.Ylist[d], self.Y_metadata,
Lm, dL_dKmm,
subset_indices=dict(outputs=d, samples=ninan))
self._inner_take_over_or_update(full_values, current_values, value_indices)
self._inner_values_update(current_values)
Lm = posterior.K_chol
dL_dKmm = grad_dict['dL_dKmm']
woodbury_inv[:, :, d] = posterior.woodbury_inv
woodbury_vector[:, d:d+1] = posterior.woodbury_vector
self._log_marginal_likelihood += log_marginal_likelihood
if not self.stochastics:
print ''
if self.posterior is None:
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
self._outer_values_update(full_values)
def _outer_loop_without_missing_data(self):
self._log_marginal_likelihood = 0
if self.posterior is None:
woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
else:
woodbury_inv = self.posterior._woodbury_inv
woodbury_vector = self.posterior._woodbury_vector
d = self.stochastics.d
posterior, log_marginal_likelihood, \
grad_dict, current_values, _ = self._inner_parameters_changed(
self.kern, self.X,
self.Z, self.likelihood,
self.Y_normalized[:, d], self.Y_metadata)
self.grad_dict = grad_dict
self._log_marginal_likelihood += log_marginal_likelihood
self._outer_values_update(current_values)
woodbury_inv[:, :, d] = posterior.woodbury_inv[:, :, None]
woodbury_vector[:, d] = posterior.woodbury_vector
if self.posterior is None:
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
def parameters_changed(self):
if self.missing_data:
self._outer_loop_for_missing_data()
elif self.stochastics:
self._outer_loop_without_missing_data()
else:
self.posterior, self._log_marginal_likelihood, self.grad_dict, full_values, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
self._outer_values_update(full_values)
def _raw_predict(self, Xnew, full_cov=False, kern=None): def _raw_predict(self, Xnew, full_cov=False, kern=None):
""" """

View file

@ -34,15 +34,17 @@ class SparseGP_MPI(SparseGP):
""" """
def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False): def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False,
missing_data=False, stochastic=False, batchsize=1):
self._IN_OPTIMIZATION_ = False self._IN_OPTIMIZATION_ = False
if mpi_comm != None: if mpi_comm != None:
if inference_method is None: if inference_method is None:
inference_method = VarDTC_minibatch(mpi_comm=mpi_comm) inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
else: else:
assert isinstance(inference_method, VarDTC_minibatch), 'inference_method has to support MPI!' assert isinstance(inference_method, VarDTC_minibatch), 'inference_method has to support MPI!'
super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer) super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer,
missing_data=missing_data, stochastic=stochastic, batchsize=batchsize)
self.update_model(False) self.update_model(False)
self.link_parameter(self.X, index=0) self.link_parameter(self.X, index=0)
if variational_prior is not None: if variational_prior is not None:
@ -75,18 +77,18 @@ class SparseGP_MPI(SparseGP):
return dc return dc
#===================================================== #=====================================================
# The MPI parallelization # The MPI parallelization
# - can move to model at some point # - can move to model at some point
#===================================================== #=====================================================
@SparseGP.optimizer_array.setter @SparseGP.optimizer_array.setter
def optimizer_array(self, p): def optimizer_array(self, p):
if self.mpi_comm != None: if self.mpi_comm != None:
if self._IN_OPTIMIZATION_ and self.mpi_comm.rank==0: if self._IN_OPTIMIZATION_ and self.mpi_comm.rank==0:
self.mpi_comm.Bcast(np.int32(1),root=0) self.mpi_comm.Bcast(np.int32(1),root=0)
self.mpi_comm.Bcast(p, root=0) self.mpi_comm.Bcast(p, root=0)
SparseGP.optimizer_array.fset(self,p) SparseGP.optimizer_array.fset(self,p)
def optimize(self, optimizer=None, start=None, **kwargs): def optimize(self, optimizer=None, start=None, **kwargs):
self._IN_OPTIMIZATION_ = True self._IN_OPTIMIZATION_ = True
if self.mpi_comm==None: if self.mpi_comm==None:

View file

@ -42,10 +42,10 @@ class SVIGP(GP):
""" """
def __init__(self, X, likelihood, kernel, Z, q_u=None, batchsize=10, X_variance=None): def __init__(self, X, Y, kernel, Z, q_u=None, batchsize=10):
GP.__init__(self, X, likelihood, kernel, normalize_X=False) raise NotImplementedError, "This is a work in progress, see github issue "
GP.__init__(self, X, Y, kernel)
self.batchsize=batchsize self.batchsize=batchsize
self.Y = self.likelihood.Y.copy()
self.Z = Z self.Z = Z
self.num_inducing = Z.shape[0] self.num_inducing = Z.shape[0]
@ -57,15 +57,9 @@ class SVIGP(GP):
self.param_steplength = 1e-5 self.param_steplength = 1e-5
self.momentum = 0.9 self.momentum = 0.9
if X_variance is None:
self.has_uncertain_inputs = False
else:
self.has_uncertain_inputs = True
self.X_variance = X_variance
if q_u is None: if q_u is None:
q_u = np.hstack((np.random.randn(self.num_inducing*self.output_dim),-.5*np.eye(self.num_inducing).flatten())) q_u = np.hstack((np.random.randn(self.num_inducing*self.output_dim),-.5*np.eye(self.num_inducing).flatten()))
self.set_vb_param(q_u) self.set_vb_param(q_u)
self._permutation = np.random.permutation(self.num_data) self._permutation = np.random.permutation(self.num_data)
@ -76,82 +70,25 @@ class SVIGP(GP):
self._grad_trace = [] self._grad_trace = []
#set the adaptive steplength parameters #set the adaptive steplength parameters
self.hbar_t = 0.0 #self.hbar_t = 0.0
self.tau_t = 100.0 #self.tau_t = 100.0
self.gbar_t = 0.0 #self.gbar_t = 0.0
self.gbar_t1 = 0.0 #self.gbar_t1 = 0.0
self.gbar_t2 = 0.0 #self.gbar_t2 = 0.0
self.hbar_tp = 0.0 #self.hbar_tp = 0.0
self.tau_tp = 10000.0 #self.tau_tp = 10000.0
self.gbar_tp = 0.0 #self.gbar_tp = 0.0
self.adapt_param_steplength = True #self.adapt_param_steplength = True
self.adapt_vb_steplength = True #self.adapt_vb_steplength = True
self._param_steplength_trace = [] #self._param_steplength_trace = []
self._vb_steplength_trace = [] #self._vb_steplength_trace = []
# def _getstate(self):
# steplength_params = [self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength]
# return GP._getstate(self) + \
# [self.get_vb_param(),
# self.Z,
# self.num_inducing,
# self.has_uncertain_inputs,
# self.X_variance,
# self.X_batch,
# self.X_variance_batch,
# steplength_params,
# self.batchcounter,
# self.batchsize,
# self.epochs,
# self.momentum,
# self.data_prop,
# self._param_trace,
# self._param_steplength_trace,
# self._vb_steplength_trace,
# self._ll_trace,
# self._grad_trace,
# self.Y,
# self._permutation,
# self.iterations
# ]
#
# def _setstate(self, state):
# self.iterations = state.pop()
# self._permutation = state.pop()
# self.Y = state.pop()
# self._grad_trace = state.pop()
# self._ll_trace = state.pop()
# self._vb_steplength_trace = state.pop()
# self._param_steplength_trace = state.pop()
# self._param_trace = state.pop()
# self.data_prop = state.pop()
# self.momentum = state.pop()
# self.epochs = state.pop()
# self.batchsize = state.pop()
# self.batchcounter = state.pop()
# steplength_params = state.pop()
# (self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength) = steplength_params
# self.X_variance_batch = state.pop()
# self.X_batch = state.pop()
# self.X_variance = state.pop()
# self.has_uncertain_inputs = state.pop()
# self.num_inducing = state.pop()
# self.Z = state.pop()
# vb_param = state.pop()
# GP._setstate(self, state)
# self.set_vb_param(vb_param)
def _compute_kernel_matrices(self): def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation # kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z) self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs: self.psi0 = self.kern.Kdiag(self.X_batch)
self.psi0 = self.kern.psi0(self.Z, self.X_batch, self.X_variance_batch) self.psi1 = self.kern.K(self.X_batch, self.Z)
self.psi1 = self.kern.psi1(self.Z, self.X_batch, self.X_variance_batch) self.psi2 = None
self.psi2 = self.kern.psi2(self.Z, self.X_batch, self.X_variance_batch)
else:
self.psi0 = self.kern.Kdiag(self.X_batch)
self.psi1 = self.kern.K(self.X_batch, self.Z)
self.psi2 = None
def dL_dtheta(self): def dL_dtheta(self):
dL_dtheta = self.kern._param_grad_helper(self.dL_dKmm, self.Z) dL_dtheta = self.kern._param_grad_helper(self.dL_dKmm, self.Z)
@ -179,7 +116,7 @@ class SVIGP(GP):
def load_batch(self): def load_batch(self):
""" """
load a batch of data (set self.X_batch and self.likelihood.Y from self.X, self.Y) load a batch of data (set self.X_batch and self.Y_batch from self.X, self.Y)
""" """
#if we've seen all the data, start again with them in a new random order #if we've seen all the data, start again with them in a new random order
@ -191,9 +128,7 @@ class SVIGP(GP):
this_perm = self._permutation[self.batchcounter:self.batchcounter+self.batchsize] this_perm = self._permutation[self.batchcounter:self.batchcounter+self.batchsize]
self.X_batch = self.X[this_perm] self.X_batch = self.X[this_perm]
self.likelihood.set_data(self.Y[this_perm]) self.Y_batch = self.Y[this_perm]
if self.has_uncertain_inputs:
self.X_variance_batch = self.X_variance[this_perm]
self.batchcounter += self.batchsize self.batchcounter += self.batchsize
@ -288,7 +223,9 @@ class SVIGP(GP):
Compute the gradients of the lower bound wrt the canonical and Compute the gradients of the lower bound wrt the canonical and
Expectation parameters of u. Expectation parameters of u.
Note that the natural gradient in either is given by the gradient in the other (See Hensman et al 2012 Fast Variational inference in the conjugate exponential Family) Note that the natural gradient in either is given by the gradient in
the other (See Hensman et al 2012 Fast Variational inference in the
conjugate exponential Family)
""" """
# Gradient for eta # Gradient for eta

View file

@ -12,7 +12,7 @@ from sympy.utilities.iterables import numbered_symbols
import scipy import scipy
import GPy import GPy
#from scipy.special import gammaln, gamma, erf, erfc, erfcx, polygamma #from scipy.special import gammaln, gamma, erf, erfc, erfcx, polygamma
from GPy.util.symbolic import normcdf, normcdfln, logistic, logisticln, erfcx, erfc, gammaln #@NDL you removed this file! #from GPy.util.symbolic import normcdf, normcdfln, logistic, logisticln, erfcx, erfc, gammaln
def getFromDict(dataDict, mapList): def getFromDict(dataDict, mapList):
return reduce(lambda d, k: d[k], mapList, dataDict) return reduce(lambda d, k: d[k], mapList, dataDict)

View file

@ -19,4 +19,9 @@ dir=$HOME/tmp/GPy-datasets/
# if you have an anaconda python installation please specify it here. # if you have an anaconda python installation please specify it here.
installed = False installed = False
location = None location = None
MKL = False # set this to true if you have the MKL optimizations installed # set this to true if you have the MKL optimizations installed:
MKL = False
[weave]
#if true, try to use weave, and fall back to numpy. if false, just use numpy.
working = True

View file

@ -7,11 +7,6 @@ Gaussian Processes classification
""" """
import GPy import GPy
try:
import pylab as pb
except:
pass
default_seed = 10000 default_seed = 10000
def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True): def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
@ -19,7 +14,9 @@ def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
Run a Gaussian process classification on the three phase oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. Run a Gaussian process classification on the three phase oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
""" """
data = GPy.util.datasets.oil() try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.oil()
X = data['X'] X = data['X']
Xtest = data['Xtest'] Xtest = data['Xtest']
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
@ -54,7 +51,9 @@ def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):
""" """
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0 Y[Y.flatten() == -1] = 0
@ -71,7 +70,8 @@ def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):
# Plot # Plot
if plot: if plot:
fig, axes = pb.subplots(2, 1) from matplotlib import pyplot as plt
fig, axes = plt.subplots(2, 1)
m.plot_f(ax=axes[0]) m.plot_f(ax=axes[0])
m.plot(ax=axes[1]) m.plot(ax=axes[1])
@ -87,7 +87,9 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
""" """
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0 Y[Y.flatten() == -1] = 0
@ -107,7 +109,8 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
# Plot # Plot
if plot: if plot:
fig, axes = pb.subplots(2, 1) from matplotlib import pyplot as plt
fig, axes = plt.subplots(2, 1)
m.plot_f(ax=axes[0]) m.plot_f(ax=axes[0])
m.plot(ax=axes[1]) m.plot(ax=axes[1])
@ -123,7 +126,9 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti
""" """
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0 Y[Y.flatten() == -1] = 0
@ -137,7 +142,8 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti
# Plot # Plot
if plot: if plot:
fig, axes = pb.subplots(2, 1) from matplotlib import pyplot as plt
fig, axes = plt.subplots(2, 1)
m.plot_f(ax=axes[0]) m.plot_f(ax=axes[0])
m.plot(ax=axes[1]) m.plot(ax=axes[1])
@ -153,7 +159,9 @@ def toy_heaviside(seed=default_seed, optimize=True, plot=True):
""" """
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0 Y[Y.flatten() == -1] = 0
@ -171,7 +179,8 @@ def toy_heaviside(seed=default_seed, optimize=True, plot=True):
# Plot # Plot
if plot: if plot:
fig, axes = pb.subplots(2, 1) from matplotlib import pyplot as plt
fig, axes = plt.subplots(2, 1)
m.plot_f(ax=axes[0]) m.plot_f(ax=axes[0])
m.plot(ax=axes[1]) m.plot(ax=axes[1])
@ -190,7 +199,9 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
:param kernel: kernel to use in the model :param kernel: kernel to use in the model
:type kernel: a GPy kernel :type kernel: a GPy kernel
""" """
data = GPy.util.datasets.crescent_data(seed=seed) try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.crescent_data(seed=seed)
Y = data['Y'] Y = data['Y']
Y[Y.flatten()==-1] = 0 Y[Y.flatten()==-1] = 0

View file

@ -1,6 +1,7 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as _np import numpy as _np
#default_seed = _np.random.seed(123344) #default_seed = _np.random.seed(123344)
def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan=False): def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan=False):
@ -23,9 +24,6 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan
X = _np.random.rand(num_inputs, input_dim) X = _np.random.rand(num_inputs, input_dim)
lengthscales = _np.random.rand(input_dim) lengthscales = _np.random.rand(input_dim)
k = GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True) k = GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True)
##+ GPy.kern.white(input_dim, 0.01)
#)
#k = GPy.kern.Linear(input_dim, ARD=1)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T
@ -71,7 +69,8 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan
def gplvm_oil_100(optimize=True, verbose=1, plot=True): def gplvm_oil_100(optimize=True, verbose=1, plot=True):
import GPy import GPy
data = GPy.util.datasets.oil_100() import pods
data = pods.datasets.oil_100()
Y = data['X'] Y = data['X']
# create simple GP model # create simple GP model
kernel = GPy.kern.RBF(6, ARD=True) + GPy.kern.Bias(6) kernel = GPy.kern.RBF(6, ARD=True) + GPy.kern.Bias(6)
@ -83,8 +82,10 @@ def gplvm_oil_100(optimize=True, verbose=1, plot=True):
def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_inducing=15, max_iters=50): def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_inducing=15, max_iters=50):
import GPy import GPy
import pods
_np.random.seed(0) _np.random.seed(0)
data = GPy.util.datasets.oil() data = pods.datasets.oil()
Y = data['X'][:N] Y = data['X'][:N]
Y = Y - Y.mean(0) Y = Y - Y.mean(0)
Y /= Y.std(0) Y /= Y.std(0)
@ -101,7 +102,7 @@ def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_induci
def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=25, Q=4, sigma=.2): def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=25, Q=4, sigma=.2):
import GPy import GPy
from GPy.util.datasets import swiss_roll_generated from pods.datasets import swiss_roll_generated
from GPy.models import BayesianGPLVM from GPy.models import BayesianGPLVM
data = swiss_roll_generated(num_samples=N, sigma=sigma) data = swiss_roll_generated(num_samples=N, sigma=sigma)
@ -159,11 +160,11 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=25, Q=4
def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k): def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
import GPy import GPy
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
from ..util.misc import param_to_array
import numpy as np import numpy as np
import pods
_np.random.seed(0) _np.random.seed(0)
data = GPy.util.datasets.oil() data = pods.datasets.oil()
kernel = GPy.kern.RBF(Q, 1., 1./_np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2)) kernel = GPy.kern.RBF(Q, 1., 1./_np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2))
Y = data['X'][:N] Y = data['X'][:N]
@ -177,7 +178,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
fig, (latent_axes, sense_axes) = plt.subplots(1, 2) fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
m.plot_latent(ax=latent_axes, labels=m.data_labels) m.plot_latent(ax=latent_axes, labels=m.data_labels)
data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0,:])) data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0,:]))
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean)[0:1,:], # @UnusedVariable lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean.values[0:1,:], # @UnusedVariable
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels) m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels)
raw_input('Press enter to finish') raw_input('Press enter to finish')
plt.close(fig) plt.close(fig)
@ -186,11 +187,10 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k): def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
import GPy import GPy
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
from ..util.misc import param_to_array import pods
import numpy as np
_np.random.seed(0) _np.random.seed(0)
data = GPy.util.datasets.oil() data = pods.datasets.oil()
kernel = GPy.kern.RBF(Q, 1., 1./_np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2)) kernel = GPy.kern.RBF(Q, 1., 1./_np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2))
Y = data['X'][:N] Y = data['X'][:N]
@ -204,13 +204,50 @@ def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40
fig, (latent_axes, sense_axes) = plt.subplots(1, 2) fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
m.plot_latent(ax=latent_axes, labels=m.data_labels) m.plot_latent(ax=latent_axes, labels=m.data_labels)
data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0,:])) data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0,:]))
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean)[0:1,:], # @UnusedVariable lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean.values[0:1,:], # @UnusedVariable
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels) m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels)
raw_input('Press enter to finish') raw_input('Press enter to finish')
plt.close(fig) plt.close(fig)
return m return m
def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False): def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False):
Q_signal = 4
import GPy
import numpy as np
np.random.seed(3000)
k = GPy.kern.Matern32(Q_signal, 10., lengthscale=1+(np.random.uniform(1,6,Q_signal)), ARD=1)
t = np.c_[[np.linspace(-1,5,N) for _ in range(Q_signal)]].T
K = k.K(t)
s2, s1, s3, sS = np.random.multivariate_normal(np.zeros(K.shape[0]), K, size=(4))[:,:,None]
Y1, Y2, Y3, S1, S2, S3 = _generate_high_dimensional_output(D1, D2, D3, s1, s2, s3, sS)
slist = [sS, s1, s2, s3]
slist_names = ["sS", "s1", "s2", "s3"]
Ylist = [Y1, Y2, Y3]
if plot_sim:
from matplotlib import pyplot as plt
import matplotlib.cm as cm
import itertools
fig = plt.figure("MRD Simulation Data", figsize=(8, 6))
fig.clf()
ax = fig.add_subplot(2, 1, 1)
labls = slist_names
for S, lab in itertools.izip(slist, labls):
ax.plot(S, label=lab)
ax.legend()
for i, Y in enumerate(Ylist):
ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i)
ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable
ax.set_title("Y{}".format(i + 1))
plt.draw()
plt.tight_layout()
return slist, [S1, S2, S3], Ylist
def _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim=False):
_np.random.seed(1234) _np.random.seed(1234)
x = _np.linspace(0, 4 * _np.pi, N)[:, None] x = _np.linspace(0, 4 * _np.pi, N)[:, None]
@ -229,34 +266,17 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
s3 -= s3.mean(); s3 /= s3.std(0) s3 -= s3.mean(); s3 /= s3.std(0)
sS -= sS.mean(); sS /= sS.std(0) sS -= sS.mean(); sS /= sS.std(0)
S1 = _np.hstack([s1, sS]) Y1, Y2, Y3, S1, S2, S3 = _generate_high_dimensional_output(D1, D2, D3, s1, s2, s3, sS)
S2 = _np.hstack([s2, s3, sS])
S3 = _np.hstack([s3, sS])
Y1 = S1.dot(_np.random.randn(S1.shape[1], D1))
Y2 = S2.dot(_np.random.randn(S2.shape[1], D2))
Y3 = S3.dot(_np.random.randn(S3.shape[1], D3))
Y1 += .3 * _np.random.randn(*Y1.shape)
Y2 += .2 * _np.random.randn(*Y2.shape)
Y3 += .25 * _np.random.randn(*Y3.shape)
Y1 -= Y1.mean(0)
Y2 -= Y2.mean(0)
Y3 -= Y3.mean(0)
Y1 /= Y1.std(0)
Y2 /= Y2.std(0)
Y3 /= Y3.std(0)
slist = [sS, s1, s2, s3] slist = [sS, s1, s2, s3]
slist_names = ["sS", "s1", "s2", "s3"] slist_names = ["sS", "s1", "s2", "s3"]
Ylist = [Y1, Y2, Y3] Ylist = [Y1, Y2, Y3]
if plot_sim: if plot_sim:
import pylab from matplotlib import pyplot as plt
import matplotlib.cm as cm import matplotlib.cm as cm
import itertools import itertools
fig = pylab.figure("MRD Simulation Data", figsize=(8, 6)) fig = plt.figure("MRD Simulation Data", figsize=(8, 6))
fig.clf() fig.clf()
ax = fig.add_subplot(2, 1, 1) ax = fig.add_subplot(2, 1, 1)
labls = slist_names labls = slist_names
@ -267,28 +287,28 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i) ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i)
ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable
ax.set_title("Y{}".format(i + 1)) ax.set_title("Y{}".format(i + 1))
pylab.draw() plt.draw()
pylab.tight_layout() plt.tight_layout()
return slist, [S1, S2, S3], Ylist return slist, [S1, S2, S3], Ylist
# def bgplvm_simulation_matlab_compare(): def _generate_high_dimensional_output(D1, D2, D3, s1, s2, s3, sS):
# from GPy.util.datasets import simulation_BGPLVM S1 = _np.hstack([s1, sS])
# from GPy import kern S2 = _np.hstack([s2, s3, sS])
# from GPy.models import BayesianGPLVM S3 = _np.hstack([s3, sS])
# Y1 = S1.dot(_np.random.randn(S1.shape[1], D1))
# sim_data = simulation_BGPLVM() Y2 = S2.dot(_np.random.randn(S2.shape[1], D2))
# Y = sim_data['Y'] Y3 = S3.dot(_np.random.randn(S3.shape[1], D3))
# mu = sim_data['mu'] Y1 += .3 * _np.random.randn(*Y1.shape)
# num_inducing, [_, Q] = 3, mu.shape Y2 += .2 * _np.random.randn(*Y2.shape)
# Y3 += .25 * _np.random.randn(*Y3.shape)
# k = kern.linear(Q, ARD=True) + kern.bias(Q, _np.exp(-2)) + kern.white(Q, _np.exp(-2)) Y1 -= Y1.mean(0)
# m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k, Y2 -= Y2.mean(0)
# _debug=False) Y3 -= Y3.mean(0)
# m.auto_scale_factor = True Y1 /= Y1.std(0)
# m['noise'] = Y.var() / 100. Y2 /= Y2.std(0)
# m['linear_variance'] = .01 Y3 /= Y3.std(0)
# return m return Y1, Y2, Y3, S1, S2, S3
def bgplvm_simulation(optimize=True, verbose=1, def bgplvm_simulation(optimize=True, verbose=1,
plot=True, plot_sim=False, plot=True, plot_sim=False,
@ -298,7 +318,7 @@ def bgplvm_simulation(optimize=True, verbose=1,
from GPy.models import BayesianGPLVM from GPy.models import BayesianGPLVM
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 3, 9 D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 3, 9
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
Y = Ylist[0] Y = Ylist[0]
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
#k = kern.RBF(Q, ARD=True, lengthscale=10.) #k = kern.RBF(Q, ARD=True, lengthscale=10.)
@ -323,7 +343,7 @@ def ssgplvm_simulation(optimize=True, verbose=1,
from GPy.models import SSGPLVM from GPy.models import SSGPLVM
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 3, 9 D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 3, 9
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
Y = Ylist[0] Y = Ylist[0]
k = kern.Linear(Q, ARD=True, useGPU=useGPU)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) k = kern.Linear(Q, ARD=True, useGPU=useGPU)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
#k = kern.RBF(Q, ARD=True, lengthscale=10.) #k = kern.RBF(Q, ARD=True, lengthscale=10.)
@ -346,23 +366,19 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
): ):
from GPy import kern from GPy import kern
from GPy.models import BayesianGPLVM from GPy.models import BayesianGPLVM
from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4 D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
Y = Ylist[0] Y = Ylist[0]
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
inan = _np.random.binomial(1, .8, size=Y.shape).astype(bool) # 80% missing data inan = _np.random.binomial(1, .2, size=Y.shape).astype(bool) # 80% missing data
Ymissing = Y.copy() Ymissing = Y.copy()
Ymissing[inan] = _np.nan Ymissing[inan] = _np.nan
m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing, m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
inference_method=VarDTCMissingData(inan=inan), kernel=k) kernel=k, missing_data=True)
m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
m.likelihood.variance = .01
m.parameters_changed()
m.Yreal = Y m.Yreal = Y
if optimize: if optimize:
@ -380,7 +396,7 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
from GPy.models import MRD from GPy.models import MRD
D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5 D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
#Ylist = [Ylist[0]] #Ylist = [Ylist[0]]
k = kern.Linear(Q, ARD=True) k = kern.Linear(Q, ARD=True)
@ -390,7 +406,7 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
if optimize: if optimize:
print "Optimizing Model:" print "Optimizing Model:"
m.optimize(messages=verbose, max_iters=8e3, gtol=.1) m.optimize(messages=verbose, max_iters=8e3)
if plot: if plot:
m.X.plot("MRD Latent Space 1D") m.X.plot("MRD Latent Space 1D")
m.plot_scales("MRD Scales") m.plot_scales("MRD Scales")
@ -402,7 +418,7 @@ def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim
from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5 D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
#Ylist = [Ylist[0]] #Ylist = [Ylist[0]]
k = kern.Linear(Q, ARD=True) k = kern.Linear(Q, ARD=True)
@ -431,8 +447,9 @@ def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim
def brendan_faces(optimize=True, verbose=True, plot=True): def brendan_faces(optimize=True, verbose=True, plot=True):
import GPy import GPy
import pods
data = GPy.util.datasets.brendan_faces() data = pods.datasets.brendan_faces()
Q = 2 Q = 2
Y = data['Y'] Y = data['Y']
Yn = Y - Y.mean() Yn = Y - Y.mean()
@ -455,8 +472,9 @@ def brendan_faces(optimize=True, verbose=True, plot=True):
def olivetti_faces(optimize=True, verbose=True, plot=True): def olivetti_faces(optimize=True, verbose=True, plot=True):
import GPy import GPy
import pods
data = GPy.util.datasets.olivetti_faces() data = pods.datasets.olivetti_faces()
Q = 2 Q = 2
Y = data['Y'] Y = data['Y']
Yn = Y - Y.mean() Yn = Y - Y.mean()
@ -476,7 +494,9 @@ def olivetti_faces(optimize=True, verbose=True, plot=True):
def stick_play(range=None, frame_rate=15, optimize=False, verbose=True, plot=True): def stick_play(range=None, frame_rate=15, optimize=False, verbose=True, plot=True):
import GPy import GPy
data = GPy.util.datasets.osu_run1() import pods
data = pods.datasets.osu_run1()
# optimize # optimize
if range == None: if range == None:
Y = data['Y'].copy() Y = data['Y'].copy()
@ -491,8 +511,9 @@ def stick_play(range=None, frame_rate=15, optimize=False, verbose=True, plot=Tru
def stick(kernel=None, optimize=True, verbose=True, plot=True): def stick(kernel=None, optimize=True, verbose=True, plot=True):
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
import GPy import GPy
import pods
data = GPy.util.datasets.osu_run1() data = pods.datasets.osu_run1()
# optimize # optimize
m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel) m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel)
if optimize: m.optimize('bfgs', messages=verbose, max_f_eval=10000) if optimize: m.optimize('bfgs', messages=verbose, max_f_eval=10000)
@ -510,8 +531,9 @@ def stick(kernel=None, optimize=True, verbose=True, plot=True):
def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True): def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True):
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
import GPy import GPy
import pods
data = GPy.util.datasets.osu_run1() data = pods.datasets.osu_run1()
# optimize # optimize
mapping = GPy.mappings.Linear(data['Y'].shape[1], 2) mapping = GPy.mappings.Linear(data['Y'].shape[1], 2)
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping) m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
@ -529,8 +551,9 @@ def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True):
def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True): def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True):
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
import GPy import GPy
import pods
data = GPy.util.datasets.osu_run1() data = pods.datasets.osu_run1()
# optimize # optimize
back_kernel=GPy.kern.RBF(data['Y'].shape[1], lengthscale=5.) back_kernel=GPy.kern.RBF(data['Y'].shape[1], lengthscale=5.)
mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel) mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel)
@ -542,15 +565,16 @@ def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True):
y = m.likelihood.Y[0, :] y = m.likelihood.Y[0, :]
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect']) data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish') #raw_input('Press enter to finish')
return m return m
def robot_wireless(optimize=True, verbose=True, plot=True): def robot_wireless(optimize=True, verbose=True, plot=True):
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
import GPy import GPy
import pods
data = GPy.util.datasets.robot_wireless() data = pods.datasets.robot_wireless()
# optimize # optimize
m = GPy.models.BayesianGPLVM(data['Y'], 4, num_inducing=25) m = GPy.models.BayesianGPLVM(data['Y'], 4, num_inducing=25)
if optimize: m.optimize(messages=verbose, max_f_eval=10000) if optimize: m.optimize(messages=verbose, max_f_eval=10000)
@ -564,8 +588,9 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
import numpy as np import numpy as np
import GPy import GPy
import pods
data = GPy.util.datasets.osu_run1() data = pods.datasets.osu_run1()
Q = 6 Q = 6
kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True) kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True)
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel) m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
@ -595,8 +620,9 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose=True, plot=True): def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose=True, plot=True):
import GPy import GPy
import pods
data = GPy.util.datasets.cmu_mocap(subject, motion) data = pods.datasets.cmu_mocap(subject, motion)
if in_place: if in_place:
# Make figure move in place. # Make figure move in place.
data['Y'][:, 0:3] = 0.0 data['Y'][:, 0:3] = 0.0

View file

@ -13,7 +13,9 @@ import GPy
def olympic_marathon_men(optimize=True, plot=True): def olympic_marathon_men(optimize=True, plot=True):
"""Run a standard Gaussian process regression on the Olympic marathon data.""" """Run a standard Gaussian process regression on the Olympic marathon data."""
data = GPy.util.datasets.olympic_marathon_men() try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.olympic_marathon_men()
# create simple GP Model # create simple GP Model
m = GPy.models.GPRegression(data['X'], data['Y']) m = GPy.models.GPRegression(data['X'], data['Y'])
@ -82,7 +84,9 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True):
from the Mount Epomeo runs. Requires gpxpy to be installed on your system from the Mount Epomeo runs. Requires gpxpy to be installed on your system
to load in the data. to load in the data.
""" """
data = GPy.util.datasets.epomeo_gpx() try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.epomeo_gpx()
num_data_list = [] num_data_list = []
for Xpart in data['X']: for Xpart in data['X']:
num_data_list.append(Xpart.shape[0]) num_data_list.append(Xpart.shape[0])
@ -107,9 +111,9 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True):
k = k1**k2 k = k1**k2
m = GPy.models.SparseGPRegression(t, Y, kernel=k, Z=Z, normalize_Y=True) m = GPy.models.SparseGPRegression(t, Y, kernel=k, Z=Z, normalize_Y=True)
m.constrain_fixed('.*rbf_var', 1.) m.constrain_fixed('.*variance', 1.)
m.constrain_fixed('iip') m.inducing_inputs.constrain_fixed()
m.constrain_bounded('noise_variance', 1e-3, 1e-1) m.Gaussian_noise.variance.constrain_bounded(1e-3, 1e-1)
m.optimize(max_iters=max_iters,messages=True) m.optimize(max_iters=max_iters,messages=True)
return m return m
@ -125,7 +129,9 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
length_scales = np.linspace(0.1, 60., resolution) length_scales = np.linspace(0.1, 60., resolution)
log_SNRs = np.linspace(-3., 4., resolution) log_SNRs = np.linspace(-3., 4., resolution)
data = GPy.util.datasets.della_gatta_TRP63_gene_expression(data_set='della_gatta',gene_number=gene_number) try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.della_gatta_TRP63_gene_expression(data_set='della_gatta',gene_number=gene_number)
# data['Y'] = data['Y'][0::2, :] # data['Y'] = data['Y'][0::2, :]
# data['X'] = data['X'][0::2, :] # data['X'] = data['X'][0::2, :]
@ -151,16 +157,16 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
kern = GPy.kern.RBF(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50)) kern = GPy.kern.RBF(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50))
m = GPy.models.GPRegression(data['X'], data['Y'], kernel=kern) m = GPy.models.GPRegression(data['X'], data['Y'], kernel=kern)
m['noise_variance'] = np.random.uniform(1e-3, 1) m.Gaussian_noise.variance = np.random.uniform(1e-3, 1)
optim_point_x[0] = m['rbf_lengthscale'] optim_point_x[0] = m.rbf.lengthscale
optim_point_y[0] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']); optim_point_y[0] = np.log10(m.rbf.variance) - np.log10(m.Gaussian_noise.variance);
# optimize # optimize
if optimize: if optimize:
m.optimize('scg', xtol=1e-6, ftol=1e-6, max_iters=max_iters) m.optimize('scg', xtol=1e-6, ftol=1e-6, max_iters=max_iters)
optim_point_x[1] = m['rbf_lengthscale'] optim_point_x[1] = m.rbf.lengthscale
optim_point_y[1] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']); optim_point_y[1] = np.log10(m.rbf.variance) - np.log10(m.Gaussian_noise.variance);
if plot: if plot:
pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1] - optim_point_x[0], optim_point_y[1] - optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k') pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1] - optim_point_x[0], optim_point_y[1] - optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k')
@ -191,7 +197,7 @@ def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.RBF):
noise_var = total_var / (1. + SNR) noise_var = total_var / (1. + SNR)
signal_var = total_var - noise_var signal_var = total_var - noise_var
model.kern['.*variance'] = signal_var model.kern['.*variance'] = signal_var
model['noise_variance'] = noise_var model.Gaussian_noise.variance = noise_var
length_scale_lls = [] length_scale_lls = []
for length_scale in length_scales: for length_scale in length_scales:
@ -205,13 +211,15 @@ def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.RBF):
def olympic_100m_men(optimize=True, plot=True): def olympic_100m_men(optimize=True, plot=True):
"""Run a standard Gaussian process regression on the Rogers and Girolami olympics data.""" """Run a standard Gaussian process regression on the Rogers and Girolami olympics data."""
data = GPy.util.datasets.olympic_100m_men() try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.olympic_100m_men()
# create simple GP Model # create simple GP Model
m = GPy.models.GPRegression(data['X'], data['Y']) m = GPy.models.GPRegression(data['X'], data['Y'])
# set the lengthscale to be something sensible (defaults to 1) # set the lengthscale to be something sensible (defaults to 1)
m['rbf_lengthscale'] = 10 m.rbf.lengthscale = 10
if optimize: if optimize:
m.optimize('bfgs', max_iters=200) m.optimize('bfgs', max_iters=200)
@ -222,7 +230,9 @@ def olympic_100m_men(optimize=True, plot=True):
def toy_rbf_1d(optimize=True, plot=True): def toy_rbf_1d(optimize=True, plot=True):
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance.""" """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
data = GPy.util.datasets.toy_rbf_1d() try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.toy_rbf_1d()
# create simple GP Model # create simple GP Model
m = GPy.models.GPRegression(data['X'], data['Y']) m = GPy.models.GPRegression(data['X'], data['Y'])
@ -236,7 +246,9 @@ def toy_rbf_1d(optimize=True, plot=True):
def toy_rbf_1d_50(optimize=True, plot=True): def toy_rbf_1d_50(optimize=True, plot=True):
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance.""" """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
data = GPy.util.datasets.toy_rbf_1d_50() try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.toy_rbf_1d_50()
# create simple GP Model # create simple GP Model
m = GPy.models.GPRegression(data['X'], data['Y']) m = GPy.models.GPRegression(data['X'], data['Y'])
@ -303,12 +315,11 @@ def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize
# m.set_prior('.*lengthscale',len_prior) # m.set_prior('.*lengthscale',len_prior)
if optimize: if optimize:
m.optimize(optimizer='scg', max_iters=max_iters, messages=1) m.optimize(optimizer='scg', max_iters=max_iters)
if plot: if plot:
m.kern.plot_ARD() m.kern.plot_ARD()
print m
return m return m
def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize=True, plot=True): def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize=True, plot=True):
@ -343,24 +354,25 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, o
# m.set_prior('.*lengthscale',len_prior) # m.set_prior('.*lengthscale',len_prior)
if optimize: if optimize:
m.optimize(optimizer='scg', max_iters=max_iters, messages=1) m.optimize(optimizer='scg', max_iters=max_iters)
if plot: if plot:
m.kern.plot_ARD() m.kern.plot_ARD()
print m
return m return m
def robot_wireless(max_iters=100, kernel=None, optimize=True, plot=True): def robot_wireless(max_iters=100, kernel=None, optimize=True, plot=True):
"""Predict the location of a robot given wirelss signal strength readings.""" """Predict the location of a robot given wirelss signal strength readings."""
data = GPy.util.datasets.robot_wireless() try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.robot_wireless()
# create simple GP Model # create simple GP Model
m = GPy.models.GPRegression(data['Y'], data['X'], kernel=kernel) m = GPy.models.GPRegression(data['Y'], data['X'], kernel=kernel)
# optimize # optimize
if optimize: if optimize:
m.optimize(messages=True, max_iters=max_iters) m.optimize(max_iters=max_iters)
Xpredict = m.predict(data['Ytest'])[0] Xpredict = m.predict(data['Ytest'])[0]
if plot: if plot:
@ -372,13 +384,14 @@ def robot_wireless(max_iters=100, kernel=None, optimize=True, plot=True):
sse = ((data['Xtest'] - Xpredict)**2).sum() sse = ((data['Xtest'] - Xpredict)**2).sum()
print m
print('Sum of squares error on test data: ' + str(sse)) print('Sum of squares error on test data: ' + str(sse))
return m return m
def silhouette(max_iters=100, optimize=True, plot=True): def silhouette(max_iters=100, optimize=True, plot=True):
"""Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper.""" """Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper."""
data = GPy.util.datasets.silhouette() try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.silhouette()
# create simple GP Model # create simple GP Model
m = GPy.models.GPRegression(data['X'], data['Y']) m = GPy.models.GPRegression(data['X'], data['Y'])
@ -390,7 +403,7 @@ def silhouette(max_iters=100, optimize=True, plot=True):
print m print m
return m return m
def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True, checkgrad=True): def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True, checkgrad=False):
"""Run a 1D example of a sparse GP regression.""" """Run a 1D example of a sparse GP regression."""
# sample inputs and outputs # sample inputs and outputs
X = np.random.uniform(-3., 3., (num_samples, 1)) X = np.random.uniform(-3., 3., (num_samples, 1))
@ -401,10 +414,10 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, opti
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing) m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
if checkgrad: if checkgrad:
m.checkgrad(verbose=1) m.checkgrad()
if optimize: if optimize:
m.optimize('tnc', messages=1, max_iters=max_iters) m.optimize('tnc', max_iters=max_iters)
if plot: if plot:
m.plot() m.plot()

View file

@ -124,6 +124,7 @@ class vDTC(object):
v, _ = dtrtrs(L, tmp, lower=1, trans=1) v, _ = dtrtrs(L, tmp, lower=1, trans=1)
tmp, _ = dtrtrs(LA, Li, lower=1, trans=0) tmp, _ = dtrtrs(LA, Li, lower=1, trans=0)
P = tdot(tmp.T) P = tdot(tmp.T)
stop
#compute log marginal #compute log marginal
log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \ log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \

View file

@ -1,7 +1,6 @@
import numpy as np import numpy as np
from ...util import diag from ...util import diag
from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify, DSYR from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify, DSYR
from ...util.misc import param_to_array
from ...core.parameterization.variational import VariationalPosterior from ...core.parameterization.variational import VariationalPosterior
from . import LatentFunctionInference from . import LatentFunctionInference
from posterior import Posterior from posterior import Posterior
@ -23,7 +22,7 @@ class EPDTC(LatentFunctionInference):
self.get_YYTfactor.limit = limit self.get_YYTfactor.limit = limit
def _get_trYYT(self, Y): def _get_trYYT(self, Y):
return param_to_array(np.sum(np.square(Y))) return np.sum(np.square(Y))
def __getstate__(self): def __getstate__(self):
# has to be overridden, as Cacher objects cannot be pickled. # has to be overridden, as Cacher objects cannot be pickled.
@ -44,7 +43,7 @@ class EPDTC(LatentFunctionInference):
""" """
N, D = Y.shape N, D = Y.shape
if (N>=D): if (N>=D):
return param_to_array(Y) return Y
else: else:
return jitchol(tdot(Y)) return jitchol(tdot(Y))

View file

@ -12,7 +12,6 @@
import numpy as np import numpy as np
from ...util.linalg import mdot, jitchol, dpotrs, dtrtrs, dpotri, symmetrify, pdinv from ...util.linalg import mdot, jitchol, dpotrs, dtrtrs, dpotri, symmetrify, pdinv
from ...util.misc import param_to_array
from posterior import Posterior from posterior import Posterior
import warnings import warnings
from scipy import optimize from scipy import optimize
@ -39,9 +38,6 @@ class Laplace(LatentFunctionInference):
Returns a Posterior class containing essential quantities of the posterior Returns a Posterior class containing essential quantities of the posterior
""" """
#make Y a normal array!
Y = param_to_array(Y)
# Compute K # Compute K
K = kern.K(X) K = kern.K(X)
@ -86,7 +82,7 @@ class Laplace(LatentFunctionInference):
#define the objective function (to be maximised) #define the objective function (to be maximised)
def obj(Ki_f, f): def obj(Ki_f, f):
return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + likelihood.logpdf(f, Y, Y_metadata=Y_metadata) return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata))
difference = np.inf difference = np.inf
iteration = 0 iteration = 0
@ -153,10 +149,10 @@ class Laplace(LatentFunctionInference):
#compute vital matrices #compute vital matrices
C = np.dot(LiW12, K) C = np.dot(LiW12, K)
Ki_W_i = K - C.T.dot(C) Ki_W_i = K - C.T.dot(C)
#compute the log marginal #compute the log marginal
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L))) log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata).sum() - np.sum(np.log(np.diag(L)))
# Compute matrices for derivatives # Compute matrices for derivatives
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat

View file

@ -6,7 +6,6 @@ from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrt
from ...util import diag from ...util import diag
from ...core.parameterization.variational import VariationalPosterior from ...core.parameterization.variational import VariationalPosterior
import numpy as np import numpy as np
from ...util.misc import param_to_array
from . import LatentFunctionInference from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
import logging, itertools import logging, itertools
@ -35,7 +34,7 @@ class VarDTC(LatentFunctionInference):
self.get_YYTfactor.limit = limit self.get_YYTfactor.limit = limit
def _get_trYYT(self, Y): def _get_trYYT(self, Y):
return param_to_array(np.sum(np.square(Y))) return np.sum(np.square(Y))
def __getstate__(self): def __getstate__(self):
# has to be overridden, as Cacher objects cannot be pickled. # has to be overridden, as Cacher objects cannot be pickled.
@ -56,14 +55,16 @@ class VarDTC(LatentFunctionInference):
""" """
N, D = Y.shape N, D = Y.shape
if (N>=D): if (N>=D):
return param_to_array(Y) return Y.view(np.ndarray)
else: else:
return jitchol(tdot(Y)) return jitchol(tdot(Y))
def get_VVTfactor(self, Y, prec): def get_VVTfactor(self, Y, prec):
return Y * prec # TODO chache this, and make it effective return Y * prec # TODO chache this, and make it effective
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None):
_, output_dim = Y.shape _, output_dim = Y.shape
uncertain_inputs = isinstance(X, VariationalPosterior) uncertain_inputs = isinstance(X, VariationalPosterior)
@ -85,8 +86,8 @@ class VarDTC(LatentFunctionInference):
Kmm = kern.K(Z).copy() Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter) diag.add(Kmm, self.const_jitter)
Lm = jitchol(Kmm) if Lm is None:
Lm = jitchol(Kmm)
# The rather complex computations of A, and the psi stats # The rather complex computations of A, and the psi stats
if uncertain_inputs: if uncertain_inputs:
@ -101,7 +102,6 @@ class VarDTC(LatentFunctionInference):
else: else:
psi0 = kern.Kdiag(X) psi0 = kern.Kdiag(X)
psi1 = kern.K(X, Z) psi1 = kern.K(X, Z)
psi2 = None
if het_noise: if het_noise:
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1))) tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
else: else:
@ -123,11 +123,12 @@ class VarDTC(LatentFunctionInference):
delit = tdot(_LBi_Lmi_psi1Vf) delit = tdot(_LBi_Lmi_psi1Vf)
data_fit = np.trace(delit) data_fit = np.trace(delit)
DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit) DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
delit = -0.5 * DBi_plus_BiPBi if dL_dKmm is None:
delit += -0.5 * B * output_dim delit = -0.5 * DBi_plus_BiPBi
delit += output_dim * np.eye(num_inducing) delit += -0.5 * B * output_dim
# Compute dL_dKmm delit += output_dim * np.eye(num_inducing)
dL_dKmm = backsub_both_sides(Lm, delit) # Compute dL_dKmm
dL_dKmm = backsub_both_sides(Lm, delit)
# derivatives of L w.r.t. psi # derivatives of L w.r.t. psi
dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
@ -209,6 +210,7 @@ class VarDTCMissingData(LatentFunctionInference):
inan = np.isnan(Y) inan = np.isnan(Y)
has_none = inan.any() has_none = inan.any()
self._inan = ~inan self._inan = ~inan
inan = self._inan
else: else:
inan = self._inan inan = self._inan
has_none = True has_none = True
@ -242,17 +244,15 @@ class VarDTCMissingData(LatentFunctionInference):
self._subarray_indices = [[slice(None),slice(None)]] self._subarray_indices = [[slice(None),slice(None)]]
return [Y], [(Y**2).sum()] return [Y], [(Y**2).sum()]
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None):
if isinstance(X, VariationalPosterior): if isinstance(X, VariationalPosterior):
uncertain_inputs = True uncertain_inputs = True
psi0_all = kern.psi0(Z, X) psi0_all = kern.psi0(Z, X)
psi1_all = kern.psi1(Z, X) psi1_all = kern.psi1(Z, X)
psi2_all = kern.psi2(Z, X)
else: else:
uncertain_inputs = False uncertain_inputs = False
psi0_all = kern.Kdiag(X) psi0_all = kern.Kdiag(X)
psi1_all = kern.K(X, Z) psi1_all = kern.K(X, Z)
psi2_all = None
Ys, traces = self._Y(Y) Ys, traces = self._Y(Y)
beta_all = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6) beta_all = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
@ -274,10 +274,11 @@ class VarDTCMissingData(LatentFunctionInference):
Kmm = kern.K(Z).copy() Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter) diag.add(Kmm, self.const_jitter)
#factor Kmm #factor Kmm
Lm = jitchol(Kmm) if Lm is None:
Lm = jitchol(Kmm)
if uncertain_inputs: LmInv = dtrtri(Lm) if uncertain_inputs: LmInv = dtrtri(Lm)
size = Y.shape[1] size = len(Ys)
next_ten = 0 next_ten = 0
for i, [y, v, trYYT] in enumerate(itertools.izip(Ys, self._inan.T, traces)): for i, [y, v, trYYT] in enumerate(itertools.izip(Ys, self._inan.T, traces)):
if ((i+1.)/size) >= next_ten: if ((i+1.)/size) >= next_ten:
@ -285,19 +286,19 @@ class VarDTCMissingData(LatentFunctionInference):
next_ten += .1 next_ten += .1
if het_noise: beta = beta_all[i] if het_noise: beta = beta_all[i]
else: beta = beta_all else: beta = beta_all
VVT_factor = (y*beta) VVT_factor = (y*beta)
output_dim = 1#len(ind) output_dim = 1#len(ind)
psi0 = psi0_all[v] psi0 = psi0_all[v]
psi1 = psi1_all[v, :] psi1 = psi1_all[v, :]
if uncertain_inputs: psi2 = psi2_all[v, :] if uncertain_inputs:
psi2 = kern.psi2(Z, X[v, :])
else: psi2 = None else: psi2 = None
num_data = psi1.shape[0] num_data = psi1.shape[0]
if uncertain_inputs: if uncertain_inputs:
if het_noise: psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0) if het_noise: psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0)
else: psi2_beta = psi2.sum(0) * beta else: psi2_beta = psi2 * (beta)
A = LmInv.dot(psi2_beta.dot(LmInv.T)) A = LmInv.dot(psi2_beta.dot(LmInv.T))
else: else:
if het_noise: tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1))) if het_noise: tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
@ -332,11 +333,11 @@ class VarDTCMissingData(LatentFunctionInference):
dL_dpsi0_all[v] += dL_dpsi0 dL_dpsi0_all[v] += dL_dpsi0
dL_dpsi1_all[v, :] += dL_dpsi1 dL_dpsi1_all[v, :] += dL_dpsi1
if uncertain_inputs: if uncertain_inputs:
dL_dpsi2_all[v, :] += dL_dpsi2 dL_dpsi2_all[v, :, :] += dL_dpsi2
# log marginal likelihood # log marginal likelihood
log_marginal += _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, log_marginal += _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
psi0, A, LB, trYYT, data_fit,VVT_factor) psi0, A, LB, trYYT, data_fit, VVT_factor)
#put the gradients in the right places #put the gradients in the right places
dL_dR += _compute_dL_dR(likelihood, dL_dR += _compute_dL_dR(likelihood,
@ -395,7 +396,6 @@ def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, C
# subsume back into psi1 (==Kmn) # subsume back into psi1 (==Kmn)
dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2) dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2)
dL_dpsi2 = None dL_dpsi2 = None
return dL_dpsi0, dL_dpsi1, dL_dpsi2 return dL_dpsi0, dL_dpsi1, dL_dpsi2

View file

@ -6,7 +6,6 @@ from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs
from ...util import diag from ...util import diag
from ...core.parameterization.variational import VariationalPosterior from ...core.parameterization.variational import VariationalPosterior
import numpy as np import numpy as np
from ...util.misc import param_to_array
from . import LatentFunctionInference from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
@ -32,18 +31,18 @@ class VarDTC_GPU(LatentFunctionInference):
""" """
const_jitter = np.float64(1e-6) const_jitter = np.float64(1e-6)
def __init__(self, batchsize=None, gpu_memory=4., limit=1): def __init__(self, batchsize=None, gpu_memory=4., limit=1):
self.batchsize = batchsize self.batchsize = batchsize
self.gpu_memory = gpu_memory self.gpu_memory = gpu_memory
self.midRes = {} self.midRes = {}
self.batch_pos = 0 # the starting position of the current mini-batch self.batch_pos = 0 # the starting position of the current mini-batch
self.cublas_handle = gpu_init.cublas_handle self.cublas_handle = gpu_init.cublas_handle
# Initialize GPU caches # Initialize GPU caches
self.gpuCache = None self.gpuCache = None
def _initGPUCache(self, kern, num_inducing, input_dim, output_dim, Y): def _initGPUCache(self, kern, num_inducing, input_dim, output_dim, Y):
ndata = Y.shape[0] ndata = Y.shape[0]
if self.batchsize==None: if self.batchsize==None:
@ -75,10 +74,10 @@ class VarDTC_GPU(LatentFunctionInference):
'psi2p_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'), 'psi2p_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
} }
self.gpuCache['ones_gpu'].fill(1.0) self.gpuCache['ones_gpu'].fill(1.0)
YT_gpu = self.gpuCache['YT_gpu'] YT_gpu = self.gpuCache['YT_gpu']
self._trYYT = cublas.cublasDdot(self.cublas_handle, YT_gpu.size, YT_gpu.gpudata, 1, YT_gpu.gpudata, 1) self._trYYT = cublas.cublasDdot(self.cublas_handle, YT_gpu.size, YT_gpu.gpudata, 1, YT_gpu.gpudata, 1)
def _estimateMemoryOccupation(self, N, M, D): def _estimateMemoryOccupation(self, N, M, D):
""" """
Estimate the best batch size. Estimate the best batch size.
@ -89,7 +88,7 @@ class VarDTC_GPU(LatentFunctionInference):
unit: GB unit: GB
""" """
return (M+9.*M*M+3*M*D+N+2.*N*D)*8./1024./1024./1024., (4.+3.*M+D+3.*M*M)*8./1024./1024./1024. return (M+9.*M*M+3*M*D+N+2.*N*D)*8./1024./1024./1024., (4.+3.*M+D+3.*M*M)*8./1024./1024./1024.
def _estimateBatchSize(self, kern, N, M, Q, D): def _estimateBatchSize(self, kern, N, M, Q, D):
""" """
Estimate the best batch size. Estimate the best batch size.
@ -104,11 +103,11 @@ class VarDTC_GPU(LatentFunctionInference):
else: else:
x0, x1 = 0.,0. x0, x1 = 0.,0.
y0, y1 = self._estimateMemoryOccupation(N, M, D) y0, y1 = self._estimateMemoryOccupation(N, M, D)
opt_batchsize = min(int((self.gpu_memory-y0-x0)/(x1+y1)), N) opt_batchsize = min(int((self.gpu_memory-y0-x0)/(x1+y1)), N)
return opt_batchsize return opt_batchsize
def _get_YYTfactor(self, Y): def _get_YYTfactor(self, Y):
""" """
find a matrix L which satisfies LLT = YYT. find a matrix L which satisfies LLT = YYT.
@ -117,10 +116,10 @@ class VarDTC_GPU(LatentFunctionInference):
""" """
N, D = Y.shape N, D = Y.shape
if (N>=D): if (N>=D):
return param_to_array(Y) return Y.view(np.ndarray)
else: else:
return jitchol(tdot(Y)) return jitchol(tdot(Y))
def gatherPsiStat(self, kern, X, Z, Y, beta, uncertain_inputs, het_noise): def gatherPsiStat(self, kern, X, Z, Y, beta, uncertain_inputs, het_noise):
num_inducing, input_dim = Z.shape[0], Z.shape[1] num_inducing, input_dim = Z.shape[0], Z.shape[1]
num_data, output_dim = Y.shape num_data, output_dim = Y.shape
@ -130,7 +129,7 @@ class VarDTC_GPU(LatentFunctionInference):
beta_gpu = self.gpuCache['beta_gpu'] beta_gpu = self.gpuCache['beta_gpu']
YT_gpu = self.gpuCache['YT_gpu'] YT_gpu = self.gpuCache['YT_gpu']
betaYT_gpu = self.gpuCache['betaYT_gpu'] betaYT_gpu = self.gpuCache['betaYT_gpu']
beta_gpu.fill(beta) beta_gpu.fill(beta)
betaYT_gpu.fill(0.) betaYT_gpu.fill(0.)
cublas.cublasDaxpy(self.cublas_handle, betaYT_gpu.size, beta, YT_gpu.gpudata, 1, betaYT_gpu.gpudata, 1) cublas.cublasDaxpy(self.cublas_handle, betaYT_gpu.size, beta, YT_gpu.gpudata, 1, betaYT_gpu.gpudata, 1)
@ -140,7 +139,7 @@ class VarDTC_GPU(LatentFunctionInference):
psi1Y_gpu.fill(0.) psi1Y_gpu.fill(0.)
psi2_gpu.fill(0.) psi2_gpu.fill(0.)
psi0_full = 0 psi0_full = 0
for n_start in xrange(0,num_data,self.batchsize): for n_start in xrange(0,num_data,self.batchsize):
n_end = min(self.batchsize+n_start, num_data) n_end = min(self.batchsize+n_start, num_data)
ndata = n_end - n_start ndata = n_end - n_start
@ -156,35 +155,35 @@ class VarDTC_GPU(LatentFunctionInference):
psi1p_gpu = kern.K(X_slice, Z) psi1p_gpu = kern.K(X_slice, Z)
cublas.cublasDgemm(self.cublas_handle, 'T', 'T', num_inducing, output_dim, ndata, 1.0, psi1p_gpu.gpudata, ndata, betaYT_gpu_slice.gpudata, output_dim, 1.0, psi1Y_gpu.gpudata, num_inducing) cublas.cublasDgemm(self.cublas_handle, 'T', 'T', num_inducing, output_dim, ndata, 1.0, psi1p_gpu.gpudata, ndata, betaYT_gpu_slice.gpudata, output_dim, 1.0, psi1Y_gpu.gpudata, num_inducing)
psi0_full += psi0.sum() psi0_full += psi0.sum()
if uncertain_inputs: if uncertain_inputs:
sum_axis(psi2_gpu,psi2p_gpu,1,1) sum_axis(psi2_gpu,psi2p_gpu,1,1)
else: else:
cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, num_inducing, ndata, beta, psi1p_gpu.gpudata, ndata, psi1p_gpu.gpudata, ndata, 1.0, psi2_gpu.gpudata, num_inducing) cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, num_inducing, ndata, beta, psi1p_gpu.gpudata, ndata, psi1p_gpu.gpudata, ndata, 1.0, psi2_gpu.gpudata, num_inducing)
psi0_full *= beta psi0_full *= beta
if uncertain_inputs: if uncertain_inputs:
cublas.cublasDscal(self.cublas_handle, psi2_gpu.size, beta, psi2_gpu.gpudata, 1) cublas.cublasDscal(self.cublas_handle, psi2_gpu.size, beta, psi2_gpu.gpudata, 1)
else: else:
psi2_full = np.zeros((num_inducing,num_inducing)) psi2_full = np.zeros((num_inducing,num_inducing))
psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM
psi0_full = 0. psi0_full = 0.
YRY_full = 0. YRY_full = 0.
for n_start in xrange(0,num_data,self.batchsize): for n_start in xrange(0,num_data,self.batchsize):
n_end = min(self.batchsize+n_start, num_data) n_end = min(self.batchsize+n_start, num_data)
Y_slice = Y[n_start:n_end] Y_slice = Y[n_start:n_end]
X_slice = X[n_start:n_end] X_slice = X[n_start:n_end]
if het_noise: if het_noise:
b = beta[n_start] b = beta[n_start]
YRY_full += np.inner(Y_slice, Y_slice)*b YRY_full += np.inner(Y_slice, Y_slice)*b
else: else:
b = beta b = beta
if uncertain_inputs: if uncertain_inputs:
psi0 = kern.psi0(Z, X_slice) psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice) psi1 = kern.psi1(Z, X_slice)
@ -193,50 +192,50 @@ class VarDTC_GPU(LatentFunctionInference):
psi0 = kern.Kdiag(X_slice) psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z) psi1 = kern.K(X_slice, Z)
psi2_full += np.dot(psi1.T,psi1)*b psi2_full += np.dot(psi1.T,psi1)*b
psi0_full += psi0.sum()*b psi0_full += psi0.sum()*b
psi1Y_full += np.dot(Y_slice.T,psi1)*b # DxM psi1Y_full += np.dot(Y_slice.T,psi1)*b # DxM
if not het_noise: if not het_noise:
YRY_full = trYYT*beta YRY_full = trYYT*beta
psi1Y_gpu.set(psi1Y_full) psi1Y_gpu.set(psi1Y_full)
psi2_gpu.set(psi2_full) psi2_gpu.set(psi2_full)
return psi0_full, YRY_full return psi0_full, YRY_full
def inference_likelihood(self, kern, X, Z, likelihood, Y): def inference_likelihood(self, kern, X, Z, likelihood, Y):
""" """
The first phase of inference: The first phase of inference:
Compute: log-likelihood, dL_dKmm Compute: log-likelihood, dL_dKmm
Cached intermediate results: Kmm, KmmInv, Cached intermediate results: Kmm, KmmInv,
""" """
num_inducing, input_dim = Z.shape[0], Z.shape[1] num_inducing, input_dim = Z.shape[0], Z.shape[1]
num_data, output_dim = Y.shape num_data, output_dim = Y.shape
#see whether we've got a different noise variance for each datum #see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6) beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1 het_noise = beta.size > 1
if het_noise: if het_noise:
self.batchsize=0 self.batchsize=0
self._initGPUCache(kern, num_inducing, input_dim, output_dim, Y) self._initGPUCache(kern, num_inducing, input_dim, output_dim, Y)
if isinstance(X, VariationalPosterior): if isinstance(X, VariationalPosterior):
uncertain_inputs = True uncertain_inputs = True
else: else:
uncertain_inputs = False uncertain_inputs = False
psi1Y_gpu = self.gpuCache['psi1Y_gpu'] psi1Y_gpu = self.gpuCache['psi1Y_gpu']
psi2_gpu = self.gpuCache['psi2_gpu'] psi2_gpu = self.gpuCache['psi2_gpu']
psi0_full, YRY_full = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs, het_noise) psi0_full, YRY_full = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs, het_noise)
#====================================================================== #======================================================================
# Compute Common Components # Compute Common Components
#====================================================================== #======================================================================
Kmm = kern.K(Z).copy() Kmm = kern.K(Z).copy()
Kmm_gpu = self.gpuCache['Kmm_gpu'] Kmm_gpu = self.gpuCache['Kmm_gpu']
Kmm_gpu.set(np.asfortranarray(Kmm)) Kmm_gpu.set(np.asfortranarray(Kmm))
@ -244,14 +243,14 @@ class VarDTC_GPU(LatentFunctionInference):
ones_gpu = self.gpuCache['ones_gpu'] ones_gpu = self.gpuCache['ones_gpu']
cublas.cublasDaxpy(self.cublas_handle, num_inducing, self.const_jitter, ones_gpu.gpudata, 1, Kmm_gpu.gpudata, num_inducing+1) cublas.cublasDaxpy(self.cublas_handle, num_inducing, self.const_jitter, ones_gpu.gpudata, 1, Kmm_gpu.gpudata, num_inducing+1)
# assert np.allclose(Kmm, Kmm_gpu.get()) # assert np.allclose(Kmm, Kmm_gpu.get())
# Lm = jitchol(Kmm) # Lm = jitchol(Kmm)
# #
Lm_gpu = self.gpuCache['Lm_gpu'] Lm_gpu = self.gpuCache['Lm_gpu']
cublas.cublasDcopy(self.cublas_handle, Kmm_gpu.size, Kmm_gpu.gpudata, 1, Lm_gpu.gpudata, 1) cublas.cublasDcopy(self.cublas_handle, Kmm_gpu.size, Kmm_gpu.gpudata, 1, Lm_gpu.gpudata, 1)
culinalg.cho_factor(Lm_gpu,'L') culinalg.cho_factor(Lm_gpu,'L')
# print np.abs(np.tril(Lm)-np.tril(Lm_gpu.get())).max() # print np.abs(np.tril(Lm)-np.tril(Lm_gpu.get())).max()
# Lambda = Kmm+psi2_full # Lambda = Kmm+psi2_full
# LL = jitchol(Lambda) # LL = jitchol(Lambda)
# #
@ -261,7 +260,7 @@ class VarDTC_GPU(LatentFunctionInference):
LL_gpu = Lambda_gpu LL_gpu = Lambda_gpu
culinalg.cho_factor(LL_gpu,'L') culinalg.cho_factor(LL_gpu,'L')
# print np.abs(np.tril(LL)-np.tril(LL_gpu.get())).max() # print np.abs(np.tril(LL)-np.tril(LL_gpu.get())).max()
# b,_ = dtrtrs(LL, psi1Y_full) # b,_ = dtrtrs(LL, psi1Y_full)
# bbt_cpu = np.square(b).sum() # bbt_cpu = np.square(b).sum()
# #
@ -270,7 +269,7 @@ class VarDTC_GPU(LatentFunctionInference):
cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'N', 'N', num_inducing, output_dim, np.float64(1.0), LL_gpu.gpudata, num_inducing, b_gpu.gpudata, num_inducing) cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'N', 'N', num_inducing, output_dim, np.float64(1.0), LL_gpu.gpudata, num_inducing, b_gpu.gpudata, num_inducing)
bbt = cublas.cublasDdot(self.cublas_handle, b_gpu.size, b_gpu.gpudata, 1, b_gpu.gpudata, 1) bbt = cublas.cublasDdot(self.cublas_handle, b_gpu.size, b_gpu.gpudata, 1, b_gpu.gpudata, 1)
# print np.abs(bbt-bbt_cpu) # print np.abs(bbt-bbt_cpu)
# v,_ = dtrtrs(LL.T,b,lower=False) # v,_ = dtrtrs(LL.T,b,lower=False)
# vvt = np.einsum('md,od->mo',v,v) # vvt = np.einsum('md,od->mo',v,v)
# LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right') # LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
@ -288,7 +287,7 @@ class VarDTC_GPU(LatentFunctionInference):
tr_LmInvPsi2LmInvT = float(strideSum(LmInvPsi2LmInvT_gpu, num_inducing+1).get()) tr_LmInvPsi2LmInvT = float(strideSum(LmInvPsi2LmInvT_gpu, num_inducing+1).get())
# print np.abs(vvt-vvt_gpu.get()).max() # print np.abs(vvt-vvt_gpu.get()).max()
# print np.abs(np.trace(LmInvPsi2LmInvT)-tr_LmInvPsi2LmInvT) # print np.abs(np.trace(LmInvPsi2LmInvT)-tr_LmInvPsi2LmInvT)
# Psi2LLInvT = dtrtrs(LL,psi2_full)[0].T # Psi2LLInvT = dtrtrs(LL,psi2_full)[0].T
# LmInvPsi2LLInvT= dtrtrs(Lm,Psi2LLInvT)[0] # LmInvPsi2LLInvT= dtrtrs(Lm,Psi2LLInvT)[0]
# KmmInvPsi2LLInvT = dtrtrs(Lm,LmInvPsi2LLInvT,trans=True)[0] # KmmInvPsi2LLInvT = dtrtrs(Lm,LmInvPsi2LLInvT,trans=True)[0]
@ -303,7 +302,7 @@ class VarDTC_GPU(LatentFunctionInference):
cublas.cublasDcopy(self.cublas_handle, KmmInvPsi2LLInvT_gpu.size, KmmInvPsi2LLInvT_gpu.gpudata, 1, KmmInvPsi2P_gpu.gpudata, 1) cublas.cublasDcopy(self.cublas_handle, KmmInvPsi2LLInvT_gpu.size, KmmInvPsi2LLInvT_gpu.gpudata, 1, KmmInvPsi2P_gpu.gpudata, 1)
cublas.cublasDtrsm(self.cublas_handle , 'r', 'L', 'N', 'N', num_inducing, num_inducing, np.float64(1.0), LL_gpu.gpudata, num_inducing, KmmInvPsi2P_gpu.gpudata, num_inducing) cublas.cublasDtrsm(self.cublas_handle , 'r', 'L', 'N', 'N', num_inducing, num_inducing, np.float64(1.0), LL_gpu.gpudata, num_inducing, KmmInvPsi2P_gpu.gpudata, num_inducing)
# print np.abs(KmmInvPsi2P-KmmInvPsi2P_gpu.get()).max() # print np.abs(KmmInvPsi2P-KmmInvPsi2P_gpu.get()).max()
# dL_dpsi2R = (output_dim*KmmInvPsi2P - vvt)/2. # dL_dpsi2 with R inside psi2 # dL_dpsi2R = (output_dim*KmmInvPsi2P - vvt)/2. # dL_dpsi2 with R inside psi2
# #
dL_dpsi2R_gpu = self.gpuCache['dL_dpsi2R_gpu'] dL_dpsi2R_gpu = self.gpuCache['dL_dpsi2R_gpu']
@ -311,7 +310,7 @@ class VarDTC_GPU(LatentFunctionInference):
cublas.cublasDaxpy(self.cublas_handle, KmmInvPsi2P_gpu.size, np.float64(-output_dim), KmmInvPsi2P_gpu.gpudata, 1, dL_dpsi2R_gpu.gpudata, 1) cublas.cublasDaxpy(self.cublas_handle, KmmInvPsi2P_gpu.size, np.float64(-output_dim), KmmInvPsi2P_gpu.gpudata, 1, dL_dpsi2R_gpu.gpudata, 1)
cublas.cublasDscal(self.cublas_handle, dL_dpsi2R_gpu.size, np.float64(-0.5), dL_dpsi2R_gpu.gpudata, 1) cublas.cublasDscal(self.cublas_handle, dL_dpsi2R_gpu.size, np.float64(-0.5), dL_dpsi2R_gpu.gpudata, 1)
# print np.abs(dL_dpsi2R_gpu.get()-dL_dpsi2R).max() # print np.abs(dL_dpsi2R_gpu.get()-dL_dpsi2R).max()
#====================================================================== #======================================================================
# Compute log-likelihood # Compute log-likelihood
#====================================================================== #======================================================================
@ -320,7 +319,7 @@ class VarDTC_GPU(LatentFunctionInference):
else: else:
logL_R = -num_data*np.log(beta) logL_R = -num_data*np.log(beta)
# logL_old = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.-output_dim*(-np.log(np.diag(Lm)).sum()+np.log(np.diag(LL)).sum()) # logL_old = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.-output_dim*(-np.log(np.diag(Lm)).sum()+np.log(np.diag(LL)).sum())
logdetKmm = float(logDiagSum(Lm_gpu,num_inducing+1).get()) logdetKmm = float(logDiagSum(Lm_gpu,num_inducing+1).get())
logdetLambda = float(logDiagSum(LL_gpu,num_inducing+1).get()) logdetLambda = float(logDiagSum(LL_gpu,num_inducing+1).get())
logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-tr_LmInvPsi2LmInvT)+YRY_full-bbt)/2.+output_dim*(logdetKmm-logdetLambda) logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-tr_LmInvPsi2LmInvT)+YRY_full-bbt)/2.+output_dim*(logdetKmm-logdetLambda)
@ -329,7 +328,7 @@ class VarDTC_GPU(LatentFunctionInference):
#====================================================================== #======================================================================
# Compute dL_dKmm # Compute dL_dKmm
#====================================================================== #======================================================================
# dL_dKmm = -(output_dim*np.einsum('md,od->mo',KmmInvPsi2LLInvT,KmmInvPsi2LLInvT) + vvt)/2. # dL_dKmm = -(output_dim*np.einsum('md,od->mo',KmmInvPsi2LLInvT,KmmInvPsi2LLInvT) + vvt)/2.
# #
dL_dKmm_gpu = self.gpuCache['dL_dKmm_gpu'] dL_dKmm_gpu = self.gpuCache['dL_dKmm_gpu']
@ -341,24 +340,24 @@ class VarDTC_GPU(LatentFunctionInference):
#====================================================================== #======================================================================
# Compute the Posterior distribution of inducing points p(u|Y) # Compute the Posterior distribution of inducing points p(u|Y)
#====================================================================== #======================================================================
post = Posterior(woodbury_inv=KmmInvPsi2P_gpu.get(), woodbury_vector=v_gpu.get(), K=Kmm_gpu.get(), mean=None, cov=None, K_chol=Lm_gpu.get()) post = Posterior(woodbury_inv=KmmInvPsi2P_gpu.get(), woodbury_vector=v_gpu.get(), K=Kmm_gpu.get(), mean=None, cov=None, K_chol=Lm_gpu.get())
#====================================================================== #======================================================================
# Compute dL_dthetaL for uncertian input and non-heter noise # Compute dL_dthetaL for uncertian input and non-heter noise
#====================================================================== #======================================================================
if not het_noise: if not het_noise:
dL_dthetaL = (YRY_full + output_dim*psi0_full - num_data*output_dim)/-2. dL_dthetaL = (YRY_full + output_dim*psi0_full - num_data*output_dim)/-2.
dL_dthetaL += cublas.cublasDdot(self.cublas_handle,dL_dpsi2R_gpu.size, dL_dpsi2R_gpu.gpudata,1,psi2_gpu.gpudata,1) dL_dthetaL += cublas.cublasDdot(self.cublas_handle,dL_dpsi2R_gpu.size, dL_dpsi2R_gpu.gpudata,1,psi2_gpu.gpudata,1)
dL_dthetaL += cublas.cublasDdot(self.cublas_handle,v_gpu.size, v_gpu.gpudata,1,psi1Y_gpu.gpudata,1) dL_dthetaL += cublas.cublasDdot(self.cublas_handle,v_gpu.size, v_gpu.gpudata,1,psi1Y_gpu.gpudata,1)
self.midRes['dL_dthetaL'] = -beta*dL_dthetaL self.midRes['dL_dthetaL'] = -beta*dL_dthetaL
return logL, dL_dKmm_gpu.get(), post return logL, dL_dKmm_gpu.get(), post
def inference_minibatch(self, kern, X, Z, likelihood, Y): def inference_minibatch(self, kern, X, Z, likelihood, Y):
""" """
The second phase of inference: Computing the derivatives over a minibatch of Y The second phase of inference: Computing the derivatives over a minibatch of Y
Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL
return a flag showing whether it reached the end of Y (isEnd) return a flag showing whether it reached the end of Y (isEnd)
""" """
@ -370,10 +369,10 @@ class VarDTC_GPU(LatentFunctionInference):
uncertain_inputs = True uncertain_inputs = True
else: else:
uncertain_inputs = False uncertain_inputs = False
beta = 1./np.fmax(likelihood.variance, 1e-6) beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1 het_noise = beta.size > 1
n_start = self.batch_pos n_start = self.batch_pos
n_end = min(self.batchsize+n_start, num_data) n_end = min(self.batchsize+n_start, num_data)
if n_end==num_data: if n_end==num_data:
@ -382,12 +381,12 @@ class VarDTC_GPU(LatentFunctionInference):
else: else:
isEnd = False isEnd = False
self.batch_pos = n_end self.batch_pos = n_end
nSlice = n_end-n_start nSlice = n_end-n_start
X_slice = X[n_start:n_end] X_slice = X[n_start:n_end]
if het_noise: if het_noise:
beta = beta[n_start] # nSlice==1 beta = beta[n_start] # nSlice==1
if kern.useGPU: if kern.useGPU:
if not uncertain_inputs: if not uncertain_inputs:
psi0p_gpu = kern.Kdiag(X_slice) psi0p_gpu = kern.Kdiag(X_slice)
@ -416,28 +415,28 @@ class VarDTC_GPU(LatentFunctionInference):
psi1p_gpu.set(np.asfortranarray(psi1)) psi1p_gpu.set(np.asfortranarray(psi1))
if uncertain_inputs: if uncertain_inputs:
psi2p_gpu.set(np.asfortranarray(psi2)) psi2p_gpu.set(np.asfortranarray(psi2))
#====================================================================== #======================================================================
# Compute dL_dpsi # Compute dL_dpsi
#====================================================================== #======================================================================
dL_dpsi2R_gpu = self.gpuCache['dL_dpsi2R_gpu'] dL_dpsi2R_gpu = self.gpuCache['dL_dpsi2R_gpu']
v_gpu = self.gpuCache['v_gpu'] v_gpu = self.gpuCache['v_gpu']
dL_dpsi0_gpu = self.gpuCache['dL_dpsi0_gpu'] dL_dpsi0_gpu = self.gpuCache['dL_dpsi0_gpu']
dL_dpsi1_gpu = self.gpuCache['dL_dpsi1_gpu'] dL_dpsi1_gpu = self.gpuCache['dL_dpsi1_gpu']
dL_dpsi2_gpu = self.gpuCache['dL_dpsi2_gpu'] dL_dpsi2_gpu = self.gpuCache['dL_dpsi2_gpu']
betaYT_gpu = self.gpuCache['betaYT_gpu'] betaYT_gpu = self.gpuCache['betaYT_gpu']
betaYT_gpu_slice = betaYT_gpu[:,n_start:n_end] betaYT_gpu_slice = betaYT_gpu[:,n_start:n_end]
# Adjust to the batch size # Adjust to the batch size
if dL_dpsi0_gpu.shape[0] > nSlice: if dL_dpsi0_gpu.shape[0] > nSlice:
dL_dpsi0_gpu = dL_dpsi0_gpu.ravel()[:nSlice] dL_dpsi0_gpu = dL_dpsi0_gpu.ravel()[:nSlice]
dL_dpsi1_gpu = dL_dpsi1_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing) dL_dpsi1_gpu = dL_dpsi1_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing)
dL_dpsi0_gpu.fill(-output_dim *beta/2.) dL_dpsi0_gpu.fill(-output_dim *beta/2.)
cublas.cublasDgemm(self.cublas_handle, 'T', 'T', nSlice, num_inducing, output_dim, 1.0, betaYT_gpu_slice.gpudata, output_dim, v_gpu.gpudata, num_inducing, 0., dL_dpsi1_gpu.gpudata, nSlice) cublas.cublasDgemm(self.cublas_handle, 'T', 'T', nSlice, num_inducing, output_dim, 1.0, betaYT_gpu_slice.gpudata, output_dim, v_gpu.gpudata, num_inducing, 0., dL_dpsi1_gpu.gpudata, nSlice)
if uncertain_inputs: if uncertain_inputs:
cublas.cublasDcopy(self.cublas_handle, dL_dpsi2R_gpu.size, dL_dpsi2R_gpu.gpudata, 1, dL_dpsi2_gpu.gpudata, 1) cublas.cublasDcopy(self.cublas_handle, dL_dpsi2R_gpu.size, dL_dpsi2R_gpu.gpudata, 1, dL_dpsi2_gpu.gpudata, 1)
cublas.cublasDscal(self.cublas_handle, dL_dpsi2_gpu.size, beta, dL_dpsi2_gpu.gpudata, 1) cublas.cublasDscal(self.cublas_handle, dL_dpsi2_gpu.size, beta, dL_dpsi2_gpu.gpudata, 1)
@ -458,7 +457,7 @@ class VarDTC_GPU(LatentFunctionInference):
dL_dpsi1 = dL_dpsi1_gpu dL_dpsi1 = dL_dpsi1_gpu
else: else:
dL_dpsi0 = dL_dpsi0_gpu.get() dL_dpsi0 = dL_dpsi0_gpu.get()
dL_dpsi1 = dL_dpsi1_gpu.get() dL_dpsi1 = dL_dpsi1_gpu.get()
if uncertain_inputs: if uncertain_inputs:
if kern.useGPU: if kern.useGPU:
dL_dpsi2 = dL_dpsi2_gpu dL_dpsi2 = dL_dpsi2_gpu
@ -480,4 +479,4 @@ class VarDTC_GPU(LatentFunctionInference):
'dL_dthetaL':dL_dthetaL} 'dL_dthetaL':dL_dthetaL}
return isEnd, (n_start,n_end), grad_dict return isEnd, (n_start,n_end), grad_dict

View file

@ -6,7 +6,6 @@ from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri,pdi
from ...util import diag from ...util import diag
from ...core.parameterization.variational import VariationalPosterior from ...core.parameterization.variational import VariationalPosterior
import numpy as np import numpy as np
from ...util.misc import param_to_array
from . import LatentFunctionInference from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
@ -27,26 +26,26 @@ class VarDTC_minibatch(LatentFunctionInference):
""" """
const_jitter = 1e-6 const_jitter = 1e-6
def __init__(self, batchsize=None, limit=1, mpi_comm=None): def __init__(self, batchsize=None, limit=1, mpi_comm=None):
self.batchsize = batchsize self.batchsize = batchsize
self.mpi_comm = mpi_comm self.mpi_comm = mpi_comm
self.limit = limit self.limit = limit
# Cache functions # Cache functions
from ...util.caching import Cacher from ...util.caching import Cacher
self.get_trYYT = Cacher(self._get_trYYT, limit) self.get_trYYT = Cacher(self._get_trYYT, limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, limit) self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
self.midRes = {} self.midRes = {}
self.batch_pos = 0 # the starting position of the current mini-batch self.batch_pos = 0 # the starting position of the current mini-batch
self.Y_speedup = False # Replace Y with the cholesky factor of YY.T, but the posterior inference will be wrong self.Y_speedup = False # Replace Y with the cholesky factor of YY.T, but the posterior inference will be wrong
def __getstate__(self): def __getstate__(self):
# has to be overridden, as Cacher objects cannot be pickled. # has to be overridden, as Cacher objects cannot be pickled.
return self.batchsize, self.limit, self.Y_speedup return self.batchsize, self.limit, self.Y_speedup
def __setstate__(self, state): def __setstate__(self, state):
# has to be overridden, as Cacher objects cannot be pickled. # has to be overridden, as Cacher objects cannot be pickled.
self.batchsize, self.limit, self.Y_speedup = state self.batchsize, self.limit, self.Y_speedup = state
self.mpi_comm = None self.mpi_comm = None
self.midRes = {} self.midRes = {}
@ -58,9 +57,9 @@ class VarDTC_minibatch(LatentFunctionInference):
def set_limit(self, limit): def set_limit(self, limit):
self.get_trYYT.limit = limit self.get_trYYT.limit = limit
self.get_YYTfactor.limit = limit self.get_YYTfactor.limit = limit
def _get_trYYT(self, Y): def _get_trYYT(self, Y):
return param_to_array(np.sum(np.square(Y))) return np.sum(np.square(Y))
def _get_YYTfactor(self, Y): def _get_YYTfactor(self, Y):
""" """
@ -70,19 +69,19 @@ class VarDTC_minibatch(LatentFunctionInference):
""" """
N, D = Y.shape N, D = Y.shape
if (N>=D): if (N>=D):
return param_to_array(Y) return Y.view(np.ndarray)
else: else:
return jitchol(tdot(Y)) return jitchol(tdot(Y))
def gatherPsiStat(self, kern, X, Z, Y, beta, uncertain_inputs): def gatherPsiStat(self, kern, X, Z, Y, beta, uncertain_inputs):
het_noise = beta.size > 1 het_noise = beta.size > 1
trYYT = self.get_trYYT(Y) trYYT = self.get_trYYT(Y)
if self.Y_speedup and not het_noise: if self.Y_speedup and not het_noise:
Y = self.get_YYTfactor(Y) Y = self.get_YYTfactor(Y)
num_inducing = Z.shape[0] num_inducing = Z.shape[0]
num_data, output_dim = Y.shape num_data, output_dim = Y.shape
if self.batchsize == None: if self.batchsize == None:
self.batchsize = num_data self.batchsize = num_data
@ -91,8 +90,8 @@ class VarDTC_minibatch(LatentFunctionInference):
psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM
psi0_full = 0. psi0_full = 0.
YRY_full = 0. YRY_full = 0.
for n_start in xrange(0,num_data,self.batchsize): for n_start in xrange(0,num_data,self.batchsize):
n_end = min(self.batchsize+n_start, num_data) n_end = min(self.batchsize+n_start, num_data)
if (n_end-n_start)==num_data: if (n_end-n_start)==num_data:
Y_slice = Y Y_slice = Y
@ -100,13 +99,13 @@ class VarDTC_minibatch(LatentFunctionInference):
else: else:
Y_slice = Y[n_start:n_end] Y_slice = Y[n_start:n_end]
X_slice = X[n_start:n_end] X_slice = X[n_start:n_end]
if het_noise: if het_noise:
b = beta[n_start] b = beta[n_start]
YRY_full += np.inner(Y_slice, Y_slice)*b YRY_full += np.inner(Y_slice, Y_slice)*b
else: else:
b = beta b = beta
if uncertain_inputs: if uncertain_inputs:
psi0 = kern.psi0(Z, X_slice) psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice) psi1 = kern.psi1(Z, X_slice)
@ -115,13 +114,13 @@ class VarDTC_minibatch(LatentFunctionInference):
psi0 = kern.Kdiag(X_slice) psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z) psi1 = kern.K(X_slice, Z)
psi2_full += np.dot(psi1.T,psi1)*b psi2_full += np.dot(psi1.T,psi1)*b
psi0_full += psi0.sum()*b psi0_full += psi0.sum()*b
psi1Y_full += np.dot(Y_slice.T,psi1)*b # DxM psi1Y_full += np.dot(Y_slice.T,psi1)*b # DxM
if not het_noise: if not het_noise:
YRY_full = trYYT*beta YRY_full = trYYT*beta
if self.mpi_comm != None: if self.mpi_comm != None:
psi0_all = np.array(psi0_full) psi0_all = np.array(psi0_full)
psi1Y_all = psi1Y_full.copy() psi1Y_all = psi1Y_full.copy()
@ -132,18 +131,18 @@ class VarDTC_minibatch(LatentFunctionInference):
self.mpi_comm.Allreduce([psi2_full, MPI.DOUBLE], [psi2_all, MPI.DOUBLE]) self.mpi_comm.Allreduce([psi2_full, MPI.DOUBLE], [psi2_all, MPI.DOUBLE])
self.mpi_comm.Allreduce([YRY_full, MPI.DOUBLE], [YRY_all, MPI.DOUBLE]) self.mpi_comm.Allreduce([YRY_full, MPI.DOUBLE], [YRY_all, MPI.DOUBLE])
return psi0_all, psi1Y_all, psi2_all, YRY_all return psi0_all, psi1Y_all, psi2_all, YRY_all
return psi0_full, psi1Y_full, psi2_full, YRY_full return psi0_full, psi1Y_full, psi2_full, YRY_full
def inference_likelihood(self, kern, X, Z, likelihood, Y): def inference_likelihood(self, kern, X, Z, likelihood, Y):
""" """
The first phase of inference: The first phase of inference:
Compute: log-likelihood, dL_dKmm Compute: log-likelihood, dL_dKmm
Cached intermediate results: Kmm, KmmInv, Cached intermediate results: Kmm, KmmInv,
""" """
num_data, output_dim = Y.shape num_data, output_dim = Y.shape
input_dim = Z.shape[0] input_dim = Z.shape[0]
if self.mpi_comm != None: if self.mpi_comm != None:
num_data_all = np.array(num_data,dtype=np.int32) num_data_all = np.array(num_data,dtype=np.int32)
@ -154,7 +153,7 @@ class VarDTC_minibatch(LatentFunctionInference):
uncertain_inputs = True uncertain_inputs = True
else: else:
uncertain_inputs = False uncertain_inputs = False
#see whether we've got a different noise variance for each datum #see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6) beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1 het_noise = beta.size > 1
@ -162,28 +161,28 @@ class VarDTC_minibatch(LatentFunctionInference):
self.batchsize = 1 self.batchsize = 1
psi0_full, psi1Y_full, psi2_full, YRY_full = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs) psi0_full, psi1Y_full, psi2_full, YRY_full = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs)
#====================================================================== #======================================================================
# Compute Common Components # Compute Common Components
#====================================================================== #======================================================================
Kmm = kern.K(Z).copy() Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter) diag.add(Kmm, self.const_jitter)
KmmInv,Lm,LmInv,_ = pdinv(Kmm) KmmInv,Lm,LmInv,_ = pdinv(Kmm)
LmInvPsi2LmInvT = LmInv.dot(psi2_full).dot(LmInv.T) LmInvPsi2LmInvT = LmInv.dot(psi2_full).dot(LmInv.T)
Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
LInv,LL,LLInv,logdet_L = pdinv(Lambda) LInv,LL,LLInv,logdet_L = pdinv(Lambda)
b = LLInv.dot(LmInv.dot(psi1Y_full.T)) b = LLInv.dot(LmInv.dot(psi1Y_full.T))
bbt = np.square(b).sum() bbt = np.square(b).sum()
v = LmInv.T.dot(LLInv.T.dot(b)) v = LmInv.T.dot(LLInv.T.dot(b))
dL_dpsi2R = LmInv.T.dot(-LLInv.T.dot(tdot(b)+output_dim*np.eye(input_dim)).dot(LLInv)+output_dim*np.eye(input_dim)).dot(LmInv)/2. dL_dpsi2R = LmInv.T.dot(-LLInv.T.dot(tdot(b)+output_dim*np.eye(input_dim)).dot(LLInv)+output_dim*np.eye(input_dim)).dot(LmInv)/2.
# Cache intermediate results # Cache intermediate results
self.midRes['dL_dpsi2R'] = dL_dpsi2R self.midRes['dL_dpsi2R'] = dL_dpsi2R
self.midRes['v'] = v self.midRes['v'] = v
#====================================================================== #======================================================================
# Compute log-likelihood # Compute log-likelihood
#====================================================================== #======================================================================
@ -196,22 +195,22 @@ class VarDTC_minibatch(LatentFunctionInference):
#====================================================================== #======================================================================
# Compute dL_dKmm # Compute dL_dKmm
#====================================================================== #======================================================================
dL_dKmm = dL_dpsi2R - output_dim*KmmInv.dot(psi2_full).dot(KmmInv)/2. dL_dKmm = dL_dpsi2R - output_dim*KmmInv.dot(psi2_full).dot(KmmInv)/2.
#====================================================================== #======================================================================
# Compute the Posterior distribution of inducing points p(u|Y) # Compute the Posterior distribution of inducing points p(u|Y)
#====================================================================== #======================================================================
if not self.Y_speedup or het_noise: if not self.Y_speedup or het_noise:
post = Posterior(woodbury_inv=LmInv.T.dot(np.eye(input_dim)-LInv).dot(LmInv), woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm) post = Posterior(woodbury_inv=LmInv.T.dot(np.eye(input_dim)-LInv).dot(LmInv), woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm)
else: else:
post = None post = None
#====================================================================== #======================================================================
# Compute dL_dthetaL for uncertian input and non-heter noise # Compute dL_dthetaL for uncertian input and non-heter noise
#====================================================================== #======================================================================
if not het_noise: if not het_noise:
dL_dthetaL = (YRY_full*beta + beta*output_dim*psi0_full - num_data*output_dim*beta)/2. - beta*(dL_dpsi2R*psi2_full).sum() - beta*(v.T*psi1Y_full).sum() dL_dthetaL = (YRY_full*beta + beta*output_dim*psi0_full - num_data*output_dim*beta)/2. - beta*(dL_dpsi2R*psi2_full).sum() - beta*(v.T*psi1Y_full).sum()
self.midRes['dL_dthetaL'] = dL_dthetaL self.midRes['dL_dthetaL'] = dL_dthetaL
@ -220,7 +219,7 @@ class VarDTC_minibatch(LatentFunctionInference):
def inference_minibatch(self, kern, X, Z, likelihood, Y): def inference_minibatch(self, kern, X, Z, likelihood, Y):
""" """
The second phase of inference: Computing the derivatives over a minibatch of Y The second phase of inference: Computing the derivatives over a minibatch of Y
Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL
return a flag showing whether it reached the end of Y (isEnd) return a flag showing whether it reached the end of Y (isEnd)
""" """
@ -231,7 +230,7 @@ class VarDTC_minibatch(LatentFunctionInference):
uncertain_inputs = True uncertain_inputs = True
else: else:
uncertain_inputs = False uncertain_inputs = False
#see whether we've got a different noise variance for each datum #see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6) beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1 het_noise = beta.size > 1
@ -241,7 +240,7 @@ class VarDTC_minibatch(LatentFunctionInference):
YYT_factor = self.get_YYTfactor(Y) YYT_factor = self.get_YYTfactor(Y)
else: else:
YYT_factor = Y YYT_factor = Y
n_start = self.batch_pos n_start = self.batch_pos
n_end = min(self.batchsize+n_start, num_data) n_end = min(self.batchsize+n_start, num_data)
if n_end==num_data: if n_end==num_data:
@ -250,10 +249,10 @@ class VarDTC_minibatch(LatentFunctionInference):
else: else:
isEnd = False isEnd = False
self.batch_pos = n_end self.batch_pos = n_end
Y_slice = YYT_factor[n_start:n_end] Y_slice = YYT_factor[n_start:n_end]
X_slice = X[n_start:n_end] X_slice = X[n_start:n_end]
if not uncertain_inputs: if not uncertain_inputs:
psi0 = kern.Kdiag(X_slice) psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z) psi1 = kern.K(X_slice, Z)
@ -264,33 +263,33 @@ class VarDTC_minibatch(LatentFunctionInference):
psi1 = kern.psi1(Z, X_slice) psi1 = kern.psi1(Z, X_slice)
psi2 = kern.psi2(Z, X_slice) psi2 = kern.psi2(Z, X_slice)
betapsi1 = np.einsum('n,nm->nm',beta,psi1) betapsi1 = np.einsum('n,nm->nm',beta,psi1)
if het_noise: if het_noise:
beta = beta[n_start] # assuming batchsize==1 beta = beta[n_start] # assuming batchsize==1
betaY = beta*Y_slice betaY = beta*Y_slice
#====================================================================== #======================================================================
# Load Intermediate Results # Load Intermediate Results
#====================================================================== #======================================================================
dL_dpsi2R = self.midRes['dL_dpsi2R'] dL_dpsi2R = self.midRes['dL_dpsi2R']
v = self.midRes['v'] v = self.midRes['v']
#====================================================================== #======================================================================
# Compute dL_dpsi # Compute dL_dpsi
#====================================================================== #======================================================================
dL_dpsi0 = -output_dim * (beta * np.ones((n_end-n_start,)))/2. dL_dpsi0 = -output_dim * (beta * np.ones((n_end-n_start,)))/2.
dL_dpsi1 = np.dot(betaY,v.T) dL_dpsi1 = np.dot(betaY,v.T)
if uncertain_inputs: if uncertain_inputs:
dL_dpsi2 = beta* dL_dpsi2R dL_dpsi2 = beta* dL_dpsi2R
else: else:
dL_dpsi1 += np.dot(betapsi1,dL_dpsi2R)*2. dL_dpsi1 += np.dot(betapsi1,dL_dpsi2R)*2.
dL_dpsi2 = None dL_dpsi2 = None
#====================================================================== #======================================================================
# Compute dL_dthetaL # Compute dL_dthetaL
#====================================================================== #======================================================================
@ -300,14 +299,14 @@ class VarDTC_minibatch(LatentFunctionInference):
psiR = np.einsum('mo,mo->',dL_dpsi2R,psi2) psiR = np.einsum('mo,mo->',dL_dpsi2R,psi2)
else: else:
psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R) psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R)
dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0)-output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum(axis=-1) dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0)-output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum(axis=-1)
else: else:
if isEnd: if isEnd:
dL_dthetaL = self.midRes['dL_dthetaL'] dL_dthetaL = self.midRes['dL_dthetaL']
else: else:
dL_dthetaL = 0. dL_dthetaL = 0.
if uncertain_inputs: if uncertain_inputs:
grad_dict = {'dL_dpsi0':dL_dpsi0, grad_dict = {'dL_dpsi0':dL_dpsi0,
'dL_dpsi1':dL_dpsi1, 'dL_dpsi1':dL_dpsi1,
@ -317,7 +316,7 @@ class VarDTC_minibatch(LatentFunctionInference):
grad_dict = {'dL_dKdiag':dL_dpsi0, grad_dict = {'dL_dKdiag':dL_dpsi0,
'dL_dKnm':dL_dpsi1, 'dL_dKnm':dL_dpsi1,
'dL_dthetaL':dL_dthetaL} 'dL_dthetaL':dL_dthetaL}
return isEnd, (n_start,n_end), grad_dict return isEnd, (n_start,n_end), grad_dict
@ -330,18 +329,18 @@ def update_gradients(model, mpi_comm=None):
X = model.X[model.N_range[0]:model.N_range[1]] X = model.X[model.N_range[0]:model.N_range[1]]
model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, X, model.Z, model.likelihood, Y) model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, X, model.Z, model.likelihood, Y)
het_noise = model.likelihood.variance.size > 1 het_noise = model.likelihood.variance.size > 1
if het_noise: if het_noise:
dL_dthetaL = np.empty((model.Y.shape[0],)) dL_dthetaL = np.empty((model.Y.shape[0],))
else: else:
dL_dthetaL = np.float64(0.) dL_dthetaL = np.float64(0.)
kern_grad = model.kern.gradient.copy() kern_grad = model.kern.gradient.copy()
kern_grad[:] = 0. kern_grad[:] = 0.
model.Z.gradient = 0. model.Z.gradient = 0.
isEnd = False isEnd = False
while not isEnd: while not isEnd:
isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, X, model.Z, model.likelihood, Y) isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, X, model.Z, model.likelihood, Y)
@ -352,24 +351,24 @@ def update_gradients(model, mpi_comm=None):
X_slice = model.X[n_range[0]:n_range[1]] X_slice = model.X[n_range[0]:n_range[1]]
else: else:
X_slice = model.X[model.N_range[0]+n_range[0]:model.N_range[0]+n_range[1]] X_slice = model.X[model.N_range[0]+n_range[0]:model.N_range[0]+n_range[1]]
#gradients w.r.t. kernel #gradients w.r.t. kernel
model.kern.update_gradients_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2']) model.kern.update_gradients_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
kern_grad += model.kern.gradient kern_grad += model.kern.gradient
#gradients w.r.t. Z #gradients w.r.t. Z
model.Z.gradient += model.kern.gradients_Z_expectations( model.Z.gradient += model.kern.gradients_Z_expectations(
dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice) dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice)
#gradients w.r.t. posterior parameters of X #gradients w.r.t. posterior parameters of X
X_grad = model.kern.gradients_qX_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2']) X_grad = model.kern.gradients_qX_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
model.set_X_gradients(X_slice, X_grad) model.set_X_gradients(X_slice, X_grad)
if het_noise: if het_noise:
dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL'] dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
else: else:
dL_dthetaL += grad_dict['dL_dthetaL'] dL_dthetaL += grad_dict['dL_dthetaL']
# Gather the gradients from multiple MPI nodes # Gather the gradients from multiple MPI nodes
if mpi_comm != None: if mpi_comm != None:
if het_noise: if het_noise:
@ -380,14 +379,14 @@ def update_gradients(model, mpi_comm=None):
mpi_comm.Allreduce([model.Z.gradient, MPI.DOUBLE], [Z_grad_all, MPI.DOUBLE]) mpi_comm.Allreduce([model.Z.gradient, MPI.DOUBLE], [Z_grad_all, MPI.DOUBLE])
kern_grad = kern_grad_all kern_grad = kern_grad_all
model.Z.gradient = Z_grad_all model.Z.gradient = Z_grad_all
#gradients w.r.t. kernel #gradients w.r.t. kernel
model.kern.update_gradients_full(dL_dKmm, model.Z, None) model.kern.update_gradients_full(dL_dKmm, model.Z, None)
model.kern.gradient += kern_grad model.kern.gradient += kern_grad
#gradients w.r.t. Z #gradients w.r.t. Z
model.Z.gradient += model.kern.gradients_X(dL_dKmm, model.Z) model.Z.gradient += model.kern.gradients_X(dL_dKmm, model.Z)
# Update Log-likelihood # Update Log-likelihood
KL_div = model.variational_prior.KL_divergence(X) KL_div = model.variational_prior.KL_divergence(X)
# update for the KL divergence # update for the KL divergence

View file

@ -0,0 +1,48 @@
'''
Created on 9 Oct 2014
@author: maxz
'''
class StochasticStorage(object):
'''
This is a container for holding the stochastic parameters,
such as subset indices or step length and so on.
'''
def __init__(self, model):
"""
Initialize this stochastic container using the given model
"""
def do_stochastics(self):
"""
Update the internal state to the next batch of the stochastic
descent algorithm.
"""
pass
class SparseGPMissing(StochasticStorage):
def __init__(self, model, batchsize=1):
self.d = xrange(model.Y_normalized.shape[1])
class SparseGPStochastics(StochasticStorage):
"""
For the sparse gp we need to store the dimension we are in,
and the indices corresponding to those
"""
def __init__(self, model, batchsize=1):
import itertools
self.batchsize = batchsize
if self.batchsize == 1:
self.dimensions = itertools.cycle(range(model.Y_normalized.shape[1]))
else:
import numpy as np
self.dimensions = lambda: np.random.choice(model.Y_normalized.shape[1], size=batchsize, replace=False)
self.d = None
self.do_stochastics()
def do_stochastics(self):
if self.batchsize == 1:
self.d = [self.dimensions.next()]
else:
self.d = self.dimensions()

View file

@ -17,17 +17,3 @@ from _src.poly import Poly
from _src.trunclinear import TruncLinear,TruncLinear_inf from _src.trunclinear import TruncLinear,TruncLinear_inf
from _src.splitKern import SplitKern,DiffGenomeKern from _src.splitKern import SplitKern,DiffGenomeKern
# TODO: put this in an init file somewhere
#I'm commenting this out because the files were not added. JH. Remember to add the files before commiting
try:
import sympy as sym
sympy_available=True
except ImportError:
sympy_available=False
if sympy_available:
from _src.symbolic import Symbolic
from _src.eq import Eq
from _src.heat_eqinit import Heat_eqinit
#from _src.ode1_eq_lfm import Ode1_eq_lfm

View file

@ -6,6 +6,7 @@ import numpy as np
from scipy import weave from scipy import weave
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
from ...util.config import config # for assesing whether to use weave
class Coregionalize(Kern): class Coregionalize(Kern):
""" """
@ -56,6 +57,27 @@ class Coregionalize(Kern):
self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa) self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa)
def K(self, X, X2=None): def K(self, X, X2=None):
if config.getboolean('weave', 'working'):
try:
return self._K_weave(X, X2)
except:
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
config.set('weave', 'working', 'False')
return self._K_numpy(X, X2)
else:
return self._K_numpy(X, X2)
def _K_numpy(self, X, X2=None):
index = np.asarray(X, dtype=np.int)
if X2 is None:
return self.B[index,index.T]
else:
index2 = np.asarray(X2, dtype=np.int)
return self.B[index,index2.T]
def _K_weave(self, X, X2=None):
"""compute the kernel function using scipy.weave"""
index = np.asarray(X, dtype=np.int) index = np.asarray(X, dtype=np.int)
if X2 is None: if X2 is None:
@ -91,12 +113,33 @@ class Coregionalize(Kern):
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
index = np.asarray(X, dtype=np.int) index = np.asarray(X, dtype=np.int)
dL_dK_small = np.zeros_like(self.B)
if X2 is None: if X2 is None:
index2 = index index2 = index
else: else:
index2 = np.asarray(X2, dtype=np.int) index2 = np.asarray(X2, dtype=np.int)
#attempt to use weave for a nasty double indexing loop: fall back to numpy
if config.getboolean('weave', 'working'):
try:
dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
except:
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
config.set('weave', 'working', 'False')
dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
else:
dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
dkappa = np.diag(dL_dK_small)
dL_dK_small += dL_dK_small.T
dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0)
self.W.gradient = dW
self.kappa.gradient = dkappa
def _gradient_reduce_weave(self, dL_dK, index, index2):
dL_dK_small = np.zeros_like(self.B)
code=""" code="""
for(int i=0; i<num_inducing; i++){ for(int i=0; i<num_inducing; i++){
for(int j=0; j<N; j++){ for(int j=0; j<N; j++){
@ -106,13 +149,16 @@ class Coregionalize(Kern):
""" """
N, num_inducing, output_dim = index.size, index2.size, self.output_dim N, num_inducing, output_dim = index.size, index2.size, self.output_dim
weave.inline(code, ['N', 'num_inducing', 'output_dim', 'dL_dK', 'dL_dK_small', 'index', 'index2']) weave.inline(code, ['N', 'num_inducing', 'output_dim', 'dL_dK', 'dL_dK_small', 'index', 'index2'])
return dL_dK_small
dkappa = np.diag(dL_dK_small) def _gradient_reduce_numpy(self, dL_dK, index, index2):
dL_dK_small += dL_dK_small.T index, index2 = index[:,0], index2[:,0]
dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0) dL_dK_small = np.zeros_like(self.B)
for i in range(k.output_dim):
self.W.gradient = dW tmp1 = dL_dK[index==i]
self.kappa.gradient = dkappa for j in range(k.output_dim):
dL_dK_small[j,i] = tmp1[:,index2==j].sum()
return dL_dK_small
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
index = np.asarray(X, dtype=np.int).flatten() index = np.asarray(X, dtype=np.int).flatten()

View file

@ -1,30 +0,0 @@
try:
import sympy as sym
sympy_available=True
except ImportError:
sympy_available=False
import numpy as np
from symbolic import Symbolic
class Eq(Symbolic):
"""
The exponentiated quadratic covariance as a symbolic function.
"""
def __init__(self, input_dim, output_dim=1, variance=1.0, lengthscale=1.0, name='Eq'):
parameters = {'variance' : variance, 'lengthscale' : lengthscale}
x = sym.symbols('x_:' + str(input_dim))
z = sym.symbols('z_:' + str(input_dim))
variance = sym.var('variance',positive=True)
lengthscale = sym.var('lengthscale', positive=True)
dist_string = ' + '.join(['(x_%i - z_%i)**2' %(i, i) for i in range(input_dim)])
from sympy.parsing.sympy_parser import parse_expr
dist = parse_expr(dist_string)
# this is the covariance function
f = variance*sym.exp(-dist/(2*lengthscale**2))
# extra input dim is to signify the output dimension.
super(Eq, self).__init__(input_dim=input_dim, k=f, output_dim=output_dim, parameters=parameters, name=name)

View file

@ -1,30 +0,0 @@
try:
import sympy as sym
sympy_available=True
except ImportError:
sympy_available=False
import numpy as np
from symbolic import Symbolic
class Heat_eqinit(Symbolic):
"""
A symbolic covariance based on laying down an initial condition of the heat equation with an exponentiated quadratic covariance. The covariance then has multiple outputs which are interpreted as observations of the diffused process with different diffusion coefficients (or at different times).
"""
def __init__(self, input_dim, output_dim=1, param=None, name='Heat_eqinit'):
x = sym.symbols('x_:' + str(input_dim))
z = sym.symbols('z_:' + str(input_dim))
scale = sym.var('scale_i scale_j',positive=True)
lengthscale = sym.var('lengthscale_i lengthscale_j', positive=True)
shared_lengthscale = sym.var('shared_lengthscale', positive=True)
dist_string = ' + '.join(['(x_%i - z_%i)**2' %(i, i) for i in range(input_dim)])
from sympy.parsing.sympy_parser import parse_expr
dist = parse_expr(dist_string)
# this is the covariance function
f = scale_i*scale_j*sym.exp(-dist/(2*(shared_lengthscale**2 + lengthscale_i*lengthscale_j)))
# extra input dim is to signify the output dimension.
super(Heat_eqinit, self).__init__(input_dim=input_dim+1, k=f, output_dim=output_dim, name=name)

View file

@ -6,6 +6,7 @@ import numpy as np
from ...core.parameterization.parameterized import Parameterized from ...core.parameterization.parameterized import Parameterized
from kernel_slice_operations import KernCallsViaSlicerMeta from kernel_slice_operations import KernCallsViaSlicerMeta
from ...util.caching import Cache_this from ...util.caching import Cache_this
from GPy.core.parameterization.observable_array import ObsAr
@ -54,6 +55,20 @@ class Kern(Parameterized):
self._sliced_X = 0 self._sliced_X = 0
self.useGPU = self._support_GPU and useGPU self.useGPU = self._support_GPU and useGPU
self._return_psi2_n_flag = ObsAr(np.zeros(1)).astype(bool)
@property
def return_psi2_n(self):
"""
Flag whether to pass back psi2 as NxMxM or MxM, by summing out N.
"""
return self._return_psi2_n_flag[0]
@return_psi2_n.setter
def return_psi2_n(self, val):
def visit(self):
if isinstance(self, Kern):
self._return_psi2_n_flag[0]=val
self.traverse(visit)
@Cache_this(limit=20) @Cache_this(limit=20)
def _slice_X(self, X): def _slice_X(self, X):
@ -163,6 +178,10 @@ class Kern(Parameterized):
""" Here we overload the '*' operator. See self.prod for more information""" """ Here we overload the '*' operator. See self.prod for more information"""
return self.prod(other) return self.prod(other)
def __imul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information"""
return self.prod(other)
def __pow__(self, other): def __pow__(self, other):
""" """
Shortcut for tensor `prod`. Shortcut for tensor `prod`.
@ -183,7 +202,7 @@ class Kern(Parameterized):
:type tensor: bool :type tensor: bool
""" """
assert isinstance(other, Kern), "only kernels can be added to kernels..." assert isinstance(other, Kern), "only kernels can be multiplied to kernels..."
from prod import Prod from prod import Prod
#kernels = [] #kernels = []
#if isinstance(self, Prod): kernels.extend(self.parameters) #if isinstance(self, Prod): kernels.extend(self.parameters)

View file

@ -3,7 +3,6 @@
import numpy as np import numpy as np
from kern import Kern from kern import Kern
from ...util.misc import param_to_array
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
class Poly(Kern): class Poly(Kern):

View file

@ -18,7 +18,12 @@ class Prod(CombinationKernel):
""" """
def __init__(self, kernels, name='mul'): def __init__(self, kernels, name='mul'):
assert len(kernels) == 2, 'only implemented for two kernels as of yet' for i, kern in enumerate(kernels[:]):
if isinstance(kern, Prod):
del kernels[i]
for part in kern.parts[::-1]:
kern.unlink_parameter(part)
kernels.insert(i, part)
super(Prod, self).__init__(kernels, name) super(Prod, self).__init__(kernels, name)
@Cache_this(limit=2, force_kwargs=['which_parts']) @Cache_this(limit=2, force_kwargs=['which_parts'])
@ -37,25 +42,25 @@ class Prod(CombinationKernel):
return reduce(np.multiply, (p.Kdiag(X) for p in which_parts)) return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
for k1,k2 in itertools.combinations(self.parts, 2): k = self.K(X,X2)*dL_dK
k1.update_gradients_full(dL_dK*k2.K(X, X2), X, X2) for p in self.parts:
k2.update_gradients_full(dL_dK*k1.K(X, X2), X, X2) p.update_gradients_full(k/p.K(X,X2),X,X2)
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
for k1,k2 in itertools.combinations(self.parts, 2): k = self.Kdiag(X)*dL_dKdiag
k1.update_gradients_diag(dL_dKdiag*k2.Kdiag(X), X) for p in self.parts:
k2.update_gradients_diag(dL_dKdiag*k1.Kdiag(X), X) p.update_gradients_diag(k/p.Kdiag(X),X)
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
target = np.zeros(X.shape) target = np.zeros(X.shape)
for k1,k2 in itertools.combinations(self.parts, 2): k = self.K(X,X2)*dL_dK
target += k1.gradients_X(dL_dK*k2.K(X, X2), X, X2) for p in self.parts:
target += k2.gradients_X(dL_dK*k1.K(X, X2), X, X2) target += p.gradients_X(k/p.K(X,X2),X,X2)
return target return target
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape) target = np.zeros(X.shape)
for k1,k2 in itertools.combinations(self.parts, 2): k = self.Kdiag(X)*dL_dKdiag
target += k1.gradients_X_diag(dL_dKdiag*k2.Kdiag(X), X) for p in self.parts:
target += k2.gradients_X_diag(dL_dKdiag*k1.Kdiag(X), X) target += p.gradients_X_diag(k/p.Kdiag(X),X)
return target return target

View file

@ -10,7 +10,6 @@ import sslinear_psi_comp
import linear_psi_comp import linear_psi_comp
class PSICOMP_RBF(Pickleable): class PSICOMP_RBF(Pickleable):
@Cache_this(limit=2, ignore_args=(0,)) @Cache_this(limit=2, ignore_args=(0,))
def psicomputations(self, variance, lengthscale, Z, variational_posterior): def psicomputations(self, variance, lengthscale, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior): if isinstance(variational_posterior, variational.NormalPosterior):
@ -19,7 +18,7 @@ class PSICOMP_RBF(Pickleable):
return ssrbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior) return ssrbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
else: else:
raise ValueError, "unknown distriubtion received for psi-statistics" raise ValueError, "unknown distriubtion received for psi-statistics"
@Cache_this(limit=2, ignore_args=(0,1,2,3)) @Cache_this(limit=2, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior): if isinstance(variational_posterior, variational.NormalPosterior):
@ -28,10 +27,10 @@ class PSICOMP_RBF(Pickleable):
return ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior) return ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
else: else:
raise ValueError, "unknown distriubtion received for psi-statistics" raise ValueError, "unknown distriubtion received for psi-statistics"
def _setup_observers(self): def _setup_observers(self):
pass pass
class PSICOMP_Linear(Pickleable): class PSICOMP_Linear(Pickleable):
@Cache_this(limit=2, ignore_args=(0,)) @Cache_this(limit=2, ignore_args=(0,))
@ -42,7 +41,7 @@ class PSICOMP_Linear(Pickleable):
return sslinear_psi_comp.psicomputations(variance, Z, variational_posterior) return sslinear_psi_comp.psicomputations(variance, Z, variational_posterior)
else: else:
raise ValueError, "unknown distriubtion received for psi-statistics" raise ValueError, "unknown distriubtion received for psi-statistics"
@Cache_this(limit=2, ignore_args=(0,1,2,3)) @Cache_this(limit=2, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior): def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior): if isinstance(variational_posterior, variational.NormalPosterior):
@ -51,6 +50,6 @@ class PSICOMP_Linear(Pickleable):
return sslinear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior) return sslinear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior)
else: else:
raise ValueError, "unknown distriubtion received for psi-statistics" raise ValueError, "unknown distriubtion received for psi-statistics"
def _setup_observers(self): def _setup_observers(self):
pass pass

View file

@ -17,7 +17,7 @@ def psicomputations(variance, lengthscale, Z, variational_posterior):
# _psi1 NxM # _psi1 NxM
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
psi0 = np.empty(mu.shape[0]) psi0 = np.empty(mu.shape[0])
psi0[:] = variance psi0[:] = variance
psi1 = _psi1computations(variance, lengthscale, Z, mu, S) psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
@ -41,7 +41,7 @@ def __psi1computations(variance, lengthscale, Z, mu, S):
_psi1_logdenom = np.log(S/lengthscale2+1.).sum(axis=-1) # N _psi1_logdenom = np.log(S/lengthscale2+1.).sum(axis=-1) # N
_psi1_log = (_psi1_logdenom[:,None]+np.einsum('nmq,nq->nm',np.square(mu[:,None,:]-Z[None,:,:]),1./(S+lengthscale2)))/(-2.) _psi1_log = (_psi1_logdenom[:,None]+np.einsum('nmq,nq->nm',np.square(mu[:,None,:]-Z[None,:,:]),1./(S+lengthscale2)))/(-2.)
_psi1 = variance*np.exp(_psi1_log) _psi1 = variance*np.exp(_psi1_log)
return _psi1 return _psi1
def __psi2computations(variance, lengthscale, Z, mu, S): def __psi2computations(variance, lengthscale, Z, mu, S):
@ -54,27 +54,27 @@ def __psi2computations(variance, lengthscale, Z, mu, S):
# here are the "statistics" for psi2 # here are the "statistics" for psi2
# Produced intermediate results: # Produced intermediate results:
# _psi2 MxM # _psi2 MxM
lengthscale2 = np.square(lengthscale) lengthscale2 = np.square(lengthscale)
_psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N _psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N
_psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM _psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM
Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ
denom = 1./(2.*S+lengthscale2) denom = 1./(2.*S+lengthscale2)
_psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+2.*np.einsum('nq,moq,nq->nmo',mu,Z_hat,denom)-np.einsum('moq,nq->nmo',np.square(Z_hat),denom) _psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+2.*np.einsum('nq,moq,nq->nmo',mu,Z_hat,denom)-np.einsum('moq,nq->nmo',np.square(Z_hat),denom)
_psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2) _psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2)
return _psi2 return _psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1) ARD = (len(lengthscale)!=1)
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance) dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance) dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2 dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
dL_dlengscale = dl_psi1 + dl_psi2 dL_dlengscale = dl_psi1 + dl_psi2
if not ARD: if not ARD:
dL_dlengscale = dL_dlengscale.sum() dL_dlengscale = dL_dlengscale.sum()
@ -82,7 +82,7 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscal
dL_dmu = dmu_psi1 + dmu_psi2 dL_dmu = dmu_psi1 + dmu_psi2
dL_dS = dS_psi1 + dS_psi2 dL_dS = dS_psi1 + dS_psi2
dL_dZ = dZ_psi1 + dZ_psi2 dL_dZ = dZ_psi1 + dZ_psi2
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S): def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
@ -101,9 +101,9 @@ def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
# _dL_dgamma NxQ # _dL_dgamma NxQ
# _dL_dmu NxQ # _dL_dmu NxQ
# _dL_dS NxQ # _dL_dS NxQ
lengthscale2 = np.square(lengthscale) lengthscale2 = np.square(lengthscale)
_psi1 = _psi1computations(variance, lengthscale, Z, mu, S) _psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
Lpsi1 = dL_dpsi1*_psi1 Lpsi1 = dL_dpsi1*_psi1
Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ
@ -114,7 +114,7 @@ def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
_dL_dS = np.einsum('nm,nmq,nq->nq',Lpsi1,(Zmu2_denom-1.),denom)/2. _dL_dS = np.einsum('nm,nmq,nq->nq',Lpsi1,(Zmu2_denom-1.),denom)/2.
_dL_dZ = -np.einsum('nm,nmq,nq->mq',Lpsi1,Zmu,denom) _dL_dZ = -np.einsum('nm,nmq,nq->mq',Lpsi1,Zmu,denom)
_dL_dl = np.einsum('nm,nmq,nq->q',Lpsi1,(Zmu2_denom+(S/lengthscale2)[:,None,:]),denom*lengthscale) _dL_dl = np.einsum('nm,nmq,nq->q',Lpsi1,(Zmu2_denom+(S/lengthscale2)[:,None,:]),denom*lengthscale)
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S): def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
@ -133,21 +133,20 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
# _dL_dgamma NxQ # _dL_dgamma NxQ
# _dL_dmu NxQ # _dL_dmu NxQ
# _dL_dS NxQ # _dL_dS NxQ
lengthscale2 = np.square(lengthscale) lengthscale2 = np.square(lengthscale)
denom = 1./(2*S+lengthscale2) denom = 1./(2*S+lengthscale2)
denom2 = np.square(denom) denom2 = np.square(denom)
_psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM _psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM
Lpsi2 = dL_dpsi2*_psi2 # dL_dpsi2 is MxM, using broadcast to multiply N out
Lpsi2 = dL_dpsi2[None,:,:]*_psi2
Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N
Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ
Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ
Lpsi2Z2p = np.einsum('nmo,mq,oq->nq',Lpsi2,Z,Z) #NxQ Lpsi2Z2p = np.einsum('nmo,mq,oq->nq',Lpsi2,Z,Z) #NxQ
Lpsi2Zhat = Lpsi2Z Lpsi2Zhat = Lpsi2Z
Lpsi2Zhat2 = (Lpsi2Z2+Lpsi2Z2p)/2 Lpsi2Zhat2 = (Lpsi2Z2+Lpsi2Z2p)/2
_dL_dvar = Lpsi2sum.sum()*2/variance _dL_dvar = Lpsi2sum.sum()*2/variance
_dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat) _dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat)
_dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None] _dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None]
@ -157,6 +156,6 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
(2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0) (2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0)
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
_psi1computations = Cacher(__psi1computations, limit=1) _psi1computations = Cacher(__psi1computations, limit=1)
_psi2computations = Cacher(__psi2computations, limit=1) _psi2computations = Cacher(__psi2computations, limit=1)

View file

@ -37,7 +37,7 @@ class RBF(Stationary):
if self.useGPU: if self.useGPU:
dc['psicomp'] = PSICOMP_RBF() dc['psicomp'] = PSICOMP_RBF()
return dc return dc
def __setstate__(self, state): def __setstate__(self, state):
return super(RBF, self).__setstate__(state) return super(RBF, self).__setstate__(state)

View file

@ -8,7 +8,8 @@ from ...core.parameterization.transformations import Logexp
from ...util.linalg import tdot from ...util.linalg import tdot
from ... import util from ... import util
import numpy as np import numpy as np
from scipy import integrate from scipy import integrate, weave
from ...util.config import config # for assesing whether to use weave
from ...util.caching import Cache_this from ...util.caching import Cache_this
class Stationary(Kern): class Stationary(Kern):
@ -71,6 +72,13 @@ class Stationary(Kern):
@Cache_this(limit=5, ignore_args=()) @Cache_this(limit=5, ignore_args=())
def K(self, X, X2=None): def K(self, X, X2=None):
"""
Kernel function applied on inputs X and X2.
In the stationary case there is an inner function depending on the
distances from X to X2, called r.
K(X, X2) = K_of_r((X-X2)**2)
"""
r = self._scaled_dist(X, X2) r = self._scaled_dist(X, X2)
return self.K_of_r(r) return self.K_of_r(r)
@ -124,10 +132,22 @@ class Stationary(Kern):
return ret return ret
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
"""
Given the derivative of the objective with respect to the diagonal of
the covariance matrix, compute the derivative wrt the parameters of
this kernel and stor in the <parameter>.gradient field.
See also update_gradients_full
"""
self.variance.gradient = np.sum(dL_dKdiag) self.variance.gradient = np.sum(dL_dKdiag)
self.lengthscale.gradient = 0. self.lengthscale.gradient = 0.
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
"""
Given the derivative of the objective wrt the covariance matrix
(dL_dK), compute the gradient wrt the parameters of this kernel,
and store in the parameters object as e.g. self.variance.gradient
"""
self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance) self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance)
#now the lengthscale gradient(s) #now the lengthscale gradient(s)
@ -139,7 +159,17 @@ class Stationary(Kern):
#self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3 #self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
tmp = dL_dr*self._inv_dist(X, X2) tmp = dL_dr*self._inv_dist(X, X2)
if X2 is None: X2 = X if X2 is None: X2 = X
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
if config.getboolean('weave', 'working'):
try:
self.lengthscale.gradient = self.weave_lengthscale_grads(tmp, X, X2)
except:
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
config.set('weave', 'working', 'False')
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
else:
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
else: else:
r = self._scaled_dist(X, X2) r = self._scaled_dist(X, X2)
self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
@ -154,10 +184,43 @@ class Stationary(Kern):
dist = self._scaled_dist(X, X2).copy() dist = self._scaled_dist(X, X2).copy()
return 1./np.where(dist != 0., dist, np.inf) return 1./np.where(dist != 0., dist, np.inf)
def weave_lengthscale_grads(self, tmp, X, X2):
"""Use scipy.weave to compute derivatives wrt the lengthscales"""
N,M = tmp.shape
Q = X.shape[1]
if hasattr(X, 'values'):X = X.values
if hasattr(X2, 'values'):X2 = X2.values
grads = np.zeros(self.input_dim)
code = """
double gradq;
for(int q=0; q<Q; q++){
gradq = 0;
for(int n=0; n<N; n++){
for(int m=0; m<M; m++){
gradq += tmp(n,m)*(X(n,q)-X2(m,q))*(X(n,q)-X2(m,q));
}
}
grads[q] = gradq;
}
"""
weave.inline(code, ['tmp', 'X', 'X2', 'grads', 'N', 'M', 'Q'], type_converters=weave.converters.blitz, support_code="#include <math.h>")
return -grads/self.lengthscale**3
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
""" """
Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X
""" """
if config.getboolean('weave', 'working'):
try:
return self.gradients_X_weave(dL_dK, X, X2)
except:
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
config.set('weave', 'working', 'False')
return self.gradients_X_(dL_dK, X, X2)
else:
return self.gradients_X_(dL_dK, X, X2)
def gradients_X_(self, dL_dK, X, X2=None):
invdist = self._inv_dist(X, X2) invdist = self._inv_dist(X, X2)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
tmp = invdist*dL_dr tmp = invdist*dL_dr
@ -171,11 +234,40 @@ class Stationary(Kern):
#the lower memory way with a loop #the lower memory way with a loop
ret = np.empty(X.shape, dtype=np.float64) ret = np.empty(X.shape, dtype=np.float64)
[np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), axis=1, out=ret[:,q]) for q in xrange(self.input_dim)] for q in xrange(self.input_dim):
np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), axis=1, out=ret[:,q])
ret /= self.lengthscale**2 ret /= self.lengthscale**2
return ret return ret
def gradients_X_weave(self, dL_dK, X, X2=None):
invdist = self._inv_dist(X, X2)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
tmp = invdist*dL_dr
if X2 is None:
tmp = tmp + tmp.T
X2 = X
ret = np.zeros(X.shape)
code = """
int n,q,d;
double retnd;
for(n=0;n<N;n++){
for(d=0;d<D;d++){
retnd = 0;
for(q=0;q<Q;q++){
retnd += tmp(n,q)*(X(n,d)-X2(q,d));
}
ret(n,d) = retnd;
}
}
"""
from scipy import weave
N,D = X.shape
Q = tmp.shape[1]
weave.inline(code, ['ret', 'N', 'D', 'Q', 'tmp', 'X', 'X2'], type_converters=weave.converters.blitz)
return ret/self.lengthscale**2
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape) return np.zeros(X.shape)
@ -309,6 +401,19 @@ class Matern52(Stationary):
class ExpQuad(Stationary): class ExpQuad(Stationary):
"""
The Exponentiated quadratic covariance function.
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r)
notes::
- Yes, this is exactly the same as the RBF covariance function, but the
RBF implementation also has some features for doing variational kernels
(the psi-statistics).
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='ExpQuad'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='ExpQuad'):
super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name) super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)

View file

@ -6,18 +6,3 @@ from poisson import Poisson
from student_t import StudentT from student_t import StudentT
from likelihood import Likelihood from likelihood import Likelihood
from mixed_noise import MixedNoise from mixed_noise import MixedNoise
#TODO need to fix this in a config file.
#TODO need to add the files to the git repo!
try:
import sympy as sym
sympy_available=True
except ImportError:
sympy_available=False
if sympy_available:
#These are likelihoods that rely on symbolic.
from symbolic import Symbolic
from sstudent_t import SstudentT
from negative_binomial import Negative_binomial
from skew_normal import Skew_normal
from skew_exponential import Skew_exponential
# from null_category import Null_category

View file

@ -113,10 +113,8 @@ class Bernoulli(Likelihood):
.. Note: .. Note:
Each y_i must be in {0, 1} Each y_i must be in {0, 1}
""" """
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#objective = (inv_link_f**y) * ((1.-inv_link_f)**(1.-y)) #objective = (inv_link_f**y) * ((1.-inv_link_f)**(1.-y))
objective = np.where(y, inv_link_f, 1.-inv_link_f) return np.where(y, inv_link_f, 1.-inv_link_f)
return np.exp(np.sum(np.log(objective)))
def logpdf_link(self, inv_link_f, y, Y_metadata=None): def logpdf_link(self, inv_link_f, y, Y_metadata=None):
""" """
@ -133,13 +131,9 @@ class Bernoulli(Likelihood):
:returns: log likelihood evaluated at points inverse link of f. :returns: log likelihood evaluated at points inverse link of f.
:rtype: float :rtype: float
""" """
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#objective = y*np.log(inv_link_f) + (1.-y)*np.log(inv_link_f) #objective = y*np.log(inv_link_f) + (1.-y)*np.log(inv_link_f)
state = np.seterr(divide='ignore') p = np.where(y==1, inv_link_f, 1.-inv_link_f)
# TODO check y \in {0, 1} or {-1, 1} return np.log(p)
objective = np.where(y==1, np.log(inv_link_f), np.log(1-inv_link_f))
np.seterr(**state)
return np.sum(objective)
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None): def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
""" """
@ -156,13 +150,10 @@ class Bernoulli(Likelihood):
:returns: gradient of log likelihood evaluated at points inverse link of f. :returns: gradient of log likelihood evaluated at points inverse link of f.
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#grad = (y/inv_link_f) - (1.-y)/(1-inv_link_f) #grad = (y/inv_link_f) - (1.-y)/(1-inv_link_f)
state = np.seterr(divide='ignore') #grad = np.where(y, 1./inv_link_f, -1./(1-inv_link_f))
# TODO check y \in {0, 1} or {-1, 1} denom = np.where(y, inv_link_f, -(1-inv_link_f))
grad = np.where(y, 1./inv_link_f, -1./(1-inv_link_f)) return 1./denom
np.seterr(**state)
return grad
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None): def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
""" """
@ -185,13 +176,12 @@ class Bernoulli(Likelihood):
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i) (the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i)
""" """
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#d2logpdf_dlink2 = -y/(inv_link_f**2) - (1-y)/((1-inv_link_f)**2) #d2logpdf_dlink2 = -y/(inv_link_f**2) - (1-y)/((1-inv_link_f)**2)
state = np.seterr(divide='ignore') state = np.seterr(divide='ignore')
# TODO check y \in {0, 1} or {-1, 1} # TODO check y \in {0, 1} or {-1, 1}
d2logpdf_dlink2 = np.where(y, -1./np.square(inv_link_f), -1./np.square(1.-inv_link_f)) #d2logpdf_dlink2 = np.where(y, -1./np.square(inv_link_f), -1./np.square(1.-inv_link_f))
np.seterr(**state) arg = np.where(y, inv_link_f, 1.-inv_link_f)
return d2logpdf_dlink2 return -1./np.square(arg)
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None): def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
""" """

View file

@ -5,7 +5,6 @@
import numpy as np import numpy as np
from scipy import stats,special from scipy import stats,special
import scipy as sp import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import link_functions import link_functions
from likelihood import Likelihood from likelihood import Likelihood

View file

@ -5,7 +5,6 @@
import numpy as np import numpy as np
from scipy import stats,special from scipy import stats,special
import scipy as sp import scipy as sp
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
from ..core.parameterization import Param from ..core.parameterization import Param
import link_functions import link_functions
from likelihood import Likelihood from likelihood import Likelihood

View file

@ -13,7 +13,6 @@ James 11/12/13
import numpy as np import numpy as np
from scipy import stats, special from scipy import stats, special
from GPy.util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
import link_functions import link_functions
from likelihood import Likelihood from likelihood import Likelihood
from ..core.parameterization import Param from ..core.parameterization import Param

View file

@ -4,7 +4,6 @@
import numpy as np import numpy as np
from scipy import stats,special from scipy import stats,special
import scipy as sp import scipy as sp
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import link_functions import link_functions
from ..util.misc import chain_1, chain_2, chain_3 from ..util.misc import chain_1, chain_2, chain_3
from scipy.integrate import quad from scipy.integrate import quad
@ -132,6 +131,56 @@ class Likelihood(Parameterized):
return z, mean, variance return z, mean, variance
def variational_expectations(self, Y, m, v, gh_points=None):
"""
Use Gauss-Hermite Quadrature to compute
E_p(f) [ log p(y|f) ]
d/dm E_p(f) [ log p(y|f) ]
d/dv E_p(f) [ log p(y|f) ]
where p(f) is a Gaussian with mean m and variance v. The shapes of Y, m and v should match.
if no gh_points are passed, we construct them using defualt options
"""
if gh_points is None:
gh_x, gh_w = np.polynomial.hermite.hermgauss(12)
else:
gh_x, gh_w = gh_points
shape = m.shape
m,v,Y = m.flatten(), v.flatten(), Y.flatten()
#make a grid of points
X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + m[:,None]
#evaluate the likelhood for the grid. First ax indexes the data (and mu, var) and the second indexes the grid.
# broadcast needs to be handled carefully.
logp = self.logpdf(X,Y[:,None])
dlogp_dx = self.dlogpdf_df(X, Y[:,None])
d2logp_dx2 = self.d2logpdf_df2(X, Y[:,None])
#clipping for numerical stability
logp = np.clip(logp,-1e6,1e6)
dlogp_dx = np.clip(dlogp_dx,-1e6,1e6)
d2logp_dx2 = np.clip(d2logp_dx2,-1e6,1e6)
#average over the gird to get derivatives of the Gaussian's parameters
F = np.dot(logp, gh_w)
dF_dm = np.dot(dlogp_dx, gh_w)
dF_dv = np.dot(d2logp_dx2, gh_w)/2.
if np.any(np.isnan(dF_dv)) or np.any(np.isinf(dF_dv)):
stop
if np.any(np.isnan(dF_dm)) or np.any(np.isinf(dF_dm)):
stop
return F.reshape(*shape), dF_dm.reshape(*shape), dF_dv.reshape(*shape)
def predictive_mean(self, mu, variance, Y_metadata=None): def predictive_mean(self, mu, variance, Y_metadata=None):
""" """
Quadrature calculation of the predictive mean: E(Y_star|Y) = E( E(Y_star|f_star, Y) ) Quadrature calculation of the predictive mean: E(Y_star|Y) = E( E(Y_star|f_star, Y) )

View file

@ -70,7 +70,6 @@ class Probit(GPTransformation):
.. math:: .. math::
g(f) = \\Phi^{-1} (mu) g(f) = \\Phi^{-1} (mu)
""" """
def transf(self,f): def transf(self,f):
@ -88,6 +87,34 @@ class Probit(GPTransformation):
f2 = f**2 f2 = f**2
return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2) return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2)
class Cloglog(GPTransformation):
"""
Complementary log-log link
.. math::
p(f) = 1 - e^{-e^f}
or
f = \log (-\log(1-p))
"""
def transf(self,f):
return 1-np.exp(-np.exp(f))
def dtransf_df(self,f):
return np.exp(f-np.exp(f))
def d2transf_df2(self,f):
ef = np.exp(f)
return -np.exp(f-ef)*(ef-1.)
def d3transf_df3(self,f):
ef = np.exp(f)
return np.exp(f-ef)*(1.-3*ef + ef**2)
class Log(GPTransformation): class Log(GPTransformation):
""" """
.. math:: .. math::

View file

@ -1,6 +1,5 @@
import numpy as np import numpy as np
from scipy import stats, special from scipy import stats, special
from GPy.util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
import link_functions import link_functions
from likelihood import Likelihood from likelihood import Likelihood
from gaussian import Gaussian from gaussian import Gaussian

View file

@ -1,48 +0,0 @@
# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
try:
import sympy as sym
sympy_available=True
from sympy.utilities.lambdify import lambdify
from GPy.util.symbolic import gammaln, logisticln
except ImportError:
sympy_available=False
import numpy as np
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
import link_functions
from symbolic import Symbolic
from scipy import stats
class Negative_binomial(Symbolic):
"""
Negative binomial
.. math::
p(y_{i}|\pi(f_{i})) = \left(\frac{r}{r+f_i}\right)^r \frac{\Gamma(r+y_i)}{y!\Gamma(r)}\left(\frac{f_i}{r+f_i}\right)^{y_i}
.. Note::
Y takes non zero integer values..
link function should have a positive domain, e.g. log (default).
.. See also::
symbolic.py, for the parent class
"""
def __init__(self, gp_link=None, dispersion=1.0):
parameters = {'dispersion':dispersion}
if gp_link is None:
gp_link = link_functions.Identity()
dispersion = sym.Symbol('dispersion', positive=True, real=True)
y_0 = sym.Symbol('y_0', nonnegative=True, integer=True)
f_0 = sym.Symbol('f_0', positive=True, real=True)
gp_link = link_functions.Log()
log_pdf=dispersion*sym.log(dispersion) - (dispersion+y_0)*sym.log(dispersion+f_0) + gammaln(y_0+dispersion) - gammaln(y_0+1) - gammaln(dispersion) + y_0*sym.log(f_0)
#log_pdf= -(dispersion+y)*logisticln(f-sym.log(dispersion)) + gammaln(y+dispersion) - gammaln(y+1) - gammaln(dispersion) + y*(f-sym.log(dispersion))
super(Negative_binomial, self).__init__(log_pdf=log_pdf, parameters=parameters, gp_link=gp_link, name='Negative_binomial')
# TODO: Check this.
self.log_concave = False

View file

@ -5,7 +5,6 @@
import sympy as sym import sympy as sym
from GPy.util.symbolic import gammaln, normcdfln, normcdf, IndMatrix, create_matrix from GPy.util.symbolic import gammaln, normcdfln, normcdf, IndMatrix, create_matrix
import numpy as np import numpy as np
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
import link_functions import link_functions
from symbolic import Symbolic from symbolic import Symbolic
from scipy import stats from scipy import stats

View file

@ -5,7 +5,6 @@ from __future__ import division
import numpy as np import numpy as np
from scipy import stats,special from scipy import stats,special
import scipy as sp import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import link_functions import link_functions
from likelihood import Likelihood from likelihood import Likelihood

View file

@ -1,45 +0,0 @@
# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sympy as sym
from GPy.util.symbolic import normcdfln
import numpy as np
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
#from GPy.util.functions import clip_exp
import link_functions
from symbolic import Symbolic
from scipy import stats
class Skew_exponential(Symbolic):
"""
Negative binomial
.. math::
.. Note::
Y takes real values.
link function is identity
.. See also::
symbolic.py, for the parent class
"""
def __init__(self, gp_link=None, shape=1.0, scale=1.0):
parameters={'scale':scale, 'shape':shape}
if gp_link is None:
gp_link = link_functions.Identity()
#func_modules = [{'exp':clip_exp}]
scale = sym.Symbol('scale', positive=True, real=True)
shape = sym.Symbol('shape', real=True)
y_0 = sym.Symbol('y_0', real=True)
f_0 = sym.Symbol('f_0', real=True)
log_pdf=sym.log(shape)-sym.log(scale)-((y_0-f_0)/scale) + normcdfln(shape*(y_0-f_0)/scale)
super(Skew_exponential, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Skew_exponential', parameters=parameters)
# TODO: Check this.
self.log_concave = True

View file

@ -1,53 +0,0 @@
# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
try:
import sympy as sym
sympy_available=True
from sympy.utilities.lambdify import lambdify
from GPy.util.symbolic import normcdfln, normcdf
except ImportError:
sympy_available=False
import numpy as np
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
from GPy.util.functions import clip_exp
import link_functions
from symbolic import Symbolic
from scipy import stats
class Skew_normal(Symbolic):
"""
Skew Normal distribution.
.. math::
.. Note::
Y takes real values.
link function is identity
.. See also::
symbolic.py, for the parent class
"""
def __init__(self, gp_link=None, shape=1.0, scale=1.0):
parameters = {'scale': scale, 'shape':shape}
if gp_link is None:
gp_link = link_functions.Identity()
# # this likelihood has severe problems with likelihoods saturating exponentials, so clip_exp is used in place of the true exp as a solution for dealing with the numerics.
# func_modules = [{'exp':clip_exp}]
func_modules = []
scale = sym.Symbol('scale', positive=True, real=True)
shape = sym.Symbol('shape', real=True)
y_0 = sym.Symbol('y_0', real=True)
f_0 = sym.Symbol('f_0', real=True)
log_pdf=-sym.log(scale)-1./2*sym.log(2*sym.pi)-1./2*((y_0-f_0)/scale)**2 + sym.log(2) + normcdfln(shape*(y_0-f_0)/scale)
super(Skew_normal, self).__init__(log_pdf=log_pdf, parameters=parameters, gp_link=gp_link, name='Skew_normal', func_modules=func_modules)
self.log_concave = True

View file

@ -1,45 +0,0 @@
# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sympy as sym
from sympy.utilities.lambdify import lambdify
from GPy.util.symbolic import gammaln
import numpy as np
import link_functions
from symbolic import Symbolic
from scipy import stats
class SstudentT(Symbolic):
"""
Symbolic variant of the Student-t distribution.
.. math::
.. Note::
Y takes real values.
link function is identity
.. See also::
symbolic.py, for the parent class
"""
def __init__(self, gp_link=None, deg_free=5.0, t_scale2=1.0):
parameters = {'deg_free':5.0, 't_scale2':1.0}
if gp_link is None:
gp_link = link_functions.Identity()
# this likelihood has severe problems with likelihoods saturating ...
y_0 = sym.Symbol('y_0', real=True)
f_0 = sym.Symbol('f_0', real=True)
nu = sym.Symbol('nu', positive=True, real=True)
t_scale2 = sym.Symbol('t_scale2', positive=True, real=True)
log_pdf = (gammaln((nu + 1) * 0.5)
- gammaln(nu * 0.5)
- 0.5*sym.log(t_scale2 * nu * sym.pi)
- 0.5*(nu + 1)*sym.log(1 + (1/nu)*(((y_0-f_0)**2)/t_scale2)))
super(SstudentT, self).__init__(log_pdf=log_pdf, parameters=parameters, gp_link=gp_link, name='SstudentT')
self.log_concave = False

View file

@ -1,279 +0,0 @@
# Copyright (c) 2014 GPy Authors
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sympy as sym
import numpy as np
from likelihood import Likelihood
from ..core.symbolic import Symbolic_core
class Symbolic(Likelihood, Symbolic_core):
"""
Symbolic likelihood.
Likelihood where the form of the likelihood is provided by a sympy expression.
"""
def __init__(self, log_pdf=None, logZ=None, missing_log_pdf=None, gp_link=None, name='symbolic', log_concave=False, parameters=None, func_modules=[]):
if gp_link is None:
gp_link = link_functions.Identity()
if log_pdf is None:
raise ValueError, "You must provide an argument for the log pdf."
Likelihood.__init__(self, gp_link, name=name)
functions = {'log_pdf':log_pdf}
self.cacheable = ['F', 'Y']
self.missing_data = False
if missing_log_pdf:
self.missing_data = True
functions['missing_log_pdf']=missing_log_pdf
self.ep_analytic = False
if logZ:
self.ep_analytic = True
functions['logZ'] = logZ
self.cacheable += ['M', 'V']
Symbolic_core.__init__(self, functions, cacheable=self.cacheable, derivatives = ['F', 'theta'], parameters=parameters, func_modules=func_modules)
# TODO: Is there an easy way to check whether the pdf is log
self.log_concave = log_concave
def _set_derivatives(self, derivatives):
# these are arguments for computing derivatives.
print "Whoop"
Symbolic_core._set_derivatives(self, derivatives)
# add second and third derivatives for Laplace approximation.
derivative_arguments = []
if derivatives is not None:
for derivative in derivatives:
derivative_arguments += self.variables[derivative]
exprs = ['log_pdf']
if self.missing_data:
exprs.append('missing_log_pdf')
for expr in exprs:
self.expressions[expr]['second_derivative'] = {theta.name : self.stabilize(sym.diff(self.expressions[expr]['derivative']['f_0'], theta)) for theta in derivative_arguments}
self.expressions[expr]['third_derivative'] = {theta.name : self.stabilize(sym.diff(self.expressions[expr]['second_derivative']['f_0'], theta)) for theta in derivative_arguments}
if self.ep_analytic:
derivative_arguments = [M]
# add second derivative for EP
exprs = ['logZ']
if self.missing_data:
exprs.append('missing_logZ')
for expr in exprs:
self.expressions[expr]['second_derivative'] = {theta.name : self.stabilize(sym.diff(self.expressions[expr]['derivative'], theta)) for theta in derivative_arguments}
def eval_update_cache(self, Y, **kwargs):
# TODO: place checks for inf/nan in here
# for all provided keywords
Symbolic_core.eval_update_cache(self, Y=Y, **kwargs)
# Y = np.atleast_2d(Y)
# for variable, code in sorted(self.code['parameters_changed'].iteritems()):
# self._set_attribute(variable, eval(code, self.namespace))
# for i, theta in enumerate(self.variables['Y']):
# missing = np.isnan(Y[:, i])
# self._set_attribute('missing_' + str(i), missing)
# self._set_attribute(theta.name, value[missing, i][:, None])
# for variable, value in kwargs.items():
# # update their cached values
# if value is not None:
# if variable == 'F' or variable == 'M' or variable == 'V' or variable == 'Y_metadata':
# for i, theta in enumerate(self.variables[variable]):
# self._set_attribute(theta.name, value[:, i][:, None])
# else:
# self._set_attribute(theta.name, value[:, i])
# for variable, code in sorted(self.code['update_cache'].iteritems()):
# self._set_attribute(variable, eval(code, self.namespace))
def parameters_changed(self):
pass
def update_gradients(self, grads):
"""
Pull out the gradients, be careful as the order must match the order
in which the parameters are added
"""
# The way the Laplace approximation is run requires the
# covariance function to compute the true gradient (because it
# is dependent on the mode). This means we actually compute
# the gradient outside this object. This function would
# normally ask the object to update its gradients internally,
# but here it provides them externally, because they are
# computed in the inference code. TODO: Thought: How does this
# effect EP? Shouldn't this be done by a separate
# Laplace-approximation specific call?
for theta, grad in zip(self.variables['theta'], grads):
parameter = getattr(self, theta.name)
setattr(parameter, 'gradient', grad)
def pdf_link(self, f, y, Y_metadata=None):
"""
Likelihood function given inverse link of f.
:param f: inverse link of latent variables.
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
return np.exp(self.logpdf_link(f, y, Y_metadata=None))
def logpdf_link(self, f, y, Y_metadata=None):
"""
Log Likelihood Function given inverse link of latent variables.
:param f: latent variables (inverse link of f)
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
if self.missing_data:
missing_flag = np.isnan(y)
not_missing_flag = np.logical_not(missing_flag)
ll = self.eval_function('missing_log_pdf', F=f[missing_flag]).sum()
ll += self.eval_function('log_pdf', F=f[not_missing_flag], Y=y[not_missing_flag], Y_metadata=Y_metadata[not_missing_flag]).sum()
else:
ll = self.eval_function('log_pdf', F=f, Y=y, Y_metadata=Y_metadata).sum()
return ll
def dlogpdf_dlink(self, f, y, Y_metadata=None):
"""
Gradient of log likelihood with respect to the inverse link function.
:param f: latent variables (inverse link of f)
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata
:returns: gradient of likelihood with respect to each point.
:rtype: Nx1 array
"""
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
if self.missing_data:
return np.where(np.isnan(y),
eval(self.code['missing_log_pdf']['derivative']['f_0'], self.namespace),
eval(self.code['log_pdf']['derivative']['f_0'], self.namespace))
else:
return np.where(np.isnan(y),
0.,
eval(self.code['log_pdf']['derivative']['f_0'], self.namespace))
def d2logpdf_dlink2(self, f, y, Y_metadata=None):
"""
Hessian of log likelihood given inverse link of latent variables with respect to that inverse link.
i.e. second derivative logpdf at y given inv_link(f_i) and inv_link(f_j) w.r.t inv_link(f_i) and inv_link(f_j).
:param f: inverse link of the latent variables.
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: Diagonal of Hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
.. Note::
Returns diagonal of Hessian, since every where else it is
0, as the likelihood factorizes over cases (the
distribution for y_i depends only on link(f_i) not on
link(f_(j!=i))
"""
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
if self.missing_data:
return np.where(np.isnan(y),
eval(self.code['missing_log_pdf']['second_derivative']['f_0'], self.namespace),
eval(self.code['log_pdf']['second_derivative']['f_0'], self.namespace))
else:
return np.where(np.isnan(y),
0.,
eval(self.code['log_pdf']['second_derivative']['f_0'], self.namespace))
def d3logpdf_dlink3(self, f, y, Y_metadata=None):
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
if self.missing_data:
return np.where(np.isnan(y),
eval(self.code['missing_log_pdf']['third_derivative']['f_0'], self.namespace),
eval(self.code['log_pdf']['third_derivative']['f_0'], self.namespace))
else:
return np.where(np.isnan(y),
0.,
eval(self.code['log_pdf']['third_derivative']['f_0'], self.namespace))
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
g = np.zeros((np.atleast_1d(y).shape[0], len(self.variables['theta'])))
for i, theta in enumerate(self.variables['theta']):
if self.missing_data:
g[:, i:i+1] = np.where(np.isnan(y),
eval(self.code['missing_log_pdf']['derivative'][theta.name], self.namespace),
eval(self.code['log_pdf']['derivative'][theta.name], self.namespace))
else:
g[:, i:i+1] = np.where(np.isnan(y),
0.,
eval(self.code['log_pdf']['derivative'][theta.name], self.namespace))
return g.sum(0)
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
g = np.zeros((np.atleast_1d(y).shape[0], len(self.variables['theta'])))
for i, theta in enumerate(self.variables['theta']):
if self.missing_data:
g[:, i:i+1] = np.where(np.isnan(y),
eval(self.code['missing_log_pdf']['second_derivative'][theta.name], self.namespace),
eval(self.code['log_pdf']['second_derivative'][theta.name], self.namespace))
else:
g[:, i:i+1] = np.where(np.isnan(y),
0.,
eval(self.code['log_pdf']['second_derivative'][theta.name], self.namespace))
return g
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
g = np.zeros((np.atleast_1d(y).shape[0], len(self.variables['theta'])))
for i, theta in enumerate(self.variables['theta']):
if self.missing_data:
g[:, i:i+1] = np.where(np.isnan(y),
eval(self.code['missing_log_pdf']['third_derivative'][theta.name], self.namespace),
eval(self.code['log_pdf']['third_derivative'][theta.name], self.namespace))
else:
g[:, i:i+1] = np.where(np.isnan(y),
0.,
eval(self.code['log_pdf']['third_derivative'][theta.name], self.namespace))
return g
def predictive_mean(self, mu, sigma, Y_metadata=None):
raise NotImplementedError
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
raise NotImplementedError
def conditional_mean(self, gp):
raise NotImplementedError
def conditional_variance(self, gp):
raise NotImplementedError
def samples(self, gp, Y_metadata=None):
raise NotImplementedError

View file

@ -5,13 +5,3 @@ from kernel import Kernel
from linear import Linear from linear import Linear
from mlp import MLP from mlp import MLP
#from rbf import RBF #from rbf import RBF
# TODO need to fix this in a config file.
try:
import sympy as sym
sympy_available=True
except ImportError:
sympy_available=False
if sympy_available:
# These are likelihoods that rely on symbolic.
from symbolic import Symbolic

View file

@ -1,57 +0,0 @@
# Copyright (c) 2014 GPy Authors
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sympy as sym
from ..core.mapping import Mapping, Bijective_mapping
from ..core.symbolic import Symbolic_core
import numpy as np
class Symbolic(Mapping, Symbolic_core):
"""
Symbolic mapping
Mapping where the form of the mapping is provided by a sympy expression.
"""
def __init__(self, input_dim, output_dim, f=None, name='symbolic', parameters=None, func_modules=[]):
if f is None:
raise ValueError, "You must provide an argument for the function."
Mapping.__init__(self, input_dim, output_dim, name=name)
Symbolic_core.__init__(self, {'f': f}, ['X'], derivatives = ['X', 'theta'], parameters=parameters, func_modules=func_modules)
self._initialize_cache()
self.parameters_changed()
def _initialize_cache(self):
self._set_attribute('x_0', np.random.normal(size=(3, self.input_dim)))
def parameters_changed(self):
self.eval_parameters_changed()
def update_cache(self, X=None):
self.eval_update_cache(X=X)
def update_gradients(self, partial, X=None):
for name, val in self.eval_update_gradients('f', partial, X=X).iteritems():
setattr(getattr(self, name), 'gradient', val)
def gradients_X(self, partial, X=None):
return self.eval_gradients_X('f', partial, X=X)
def f(self, X=None):
"""
"""
return self.eval_function('f', X=X)
def df_dX(self, X):
"""
"""
pass
def df_dtheta(self, X):
pass

View file

@ -3,16 +3,14 @@
import numpy as np import numpy as np
from .. import kern from .. import kern
from ..core import SparseGP from ..core.sparse_gp_mpi import SparseGP_MPI
from ..likelihoods import Gaussian from ..likelihoods import Gaussian
from ..inference.optimization import SCG from ..core.parameterization.variational import NormalPosterior, NormalPrior
from ..util import linalg from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_minibatch
from ..core.parameterization.variational import NormalPosterior, NormalPrior, VariationalPosterior
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
import logging import logging
class BayesianGPLVM(SparseGP): class BayesianGPLVM(SparseGP_MPI):
""" """
Bayesian Gaussian Process Latent Variable Model Bayesian Gaussian Process Latent Variable Model
@ -25,12 +23,14 @@ class BayesianGPLVM(SparseGP):
""" """
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', mpi_comm=None, normalizer=None): Z=None, kernel=None, inference_method=None, likelihood=None,
name='bayesian gplvm', mpi_comm=None, normalizer=None,
missing_data=False, stochastic=False, batchsize=1):
self.mpi_comm = mpi_comm self.mpi_comm = mpi_comm
self.__IN_OPTIMIZATION__ = False self.__IN_OPTIMIZATION__ = False
self.logger = logging.getLogger(self.__class__.__name__) self.logger = logging.getLogger(self.__class__.__name__)
if X == None: if X is None:
from ..util.initialization import initialize_latent from ..util.initialization import initialize_latent
self.logger.info("initializing latent space X with method {}".format(init)) self.logger.info("initializing latent space X with method {}".format(init))
X, fracs = initialize_latent(init, input_dim, Y) X, fracs = initialize_latent(init, input_dim, Y)
@ -59,35 +59,24 @@ class BayesianGPLVM(SparseGP):
X = NormalPosterior(X, X_variance) X = NormalPosterior(X, X_variance)
if inference_method is None: if inference_method is None:
inan = np.isnan(Y) if mpi_comm is not None:
if np.any(inan):
from ..inference.latent_function_inference.var_dtc import VarDTCMissingData
self.logger.debug("creating inference_method with var_dtc missing data")
inference_method = VarDTCMissingData(inan=inan)
elif mpi_comm is not None:
inference_method = VarDTC_minibatch(mpi_comm=mpi_comm) inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
else: else:
from ..inference.latent_function_inference.var_dtc import VarDTC from ..inference.latent_function_inference.var_dtc import VarDTC
self.logger.debug("creating inference_method var_dtc") self.logger.debug("creating inference_method var_dtc")
inference_method = VarDTC() inference_method = VarDTC(limit=1 if not missing_data else Y.shape[1])
if isinstance(inference_method,VarDTC_minibatch): if isinstance(inference_method,VarDTC_minibatch):
inference_method.mpi_comm = mpi_comm inference_method.mpi_comm = mpi_comm
if kernel.useGPU and isinstance(inference_method, VarDTC_GPU): if kernel.useGPU and isinstance(inference_method, VarDTC_GPU):
kernel.psicomp.GPU_direct = True kernel.psicomp.GPU_direct = True
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, normalizer=normalizer) super(BayesianGPLVM,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
self.logger.info("Adding X as parameter") name=name, inference_method=inference_method,
self.link_parameter(self.X, index=0) normalizer=normalizer, mpi_comm=mpi_comm,
variational_prior=self.variational_prior,
if mpi_comm != None: missing_data=missing_data, stochastic=stochastic,
from ..util.mpi import divide_data batchsize=batchsize)
N_start, N_end, N_list = divide_data(Y.shape[0], mpi_comm)
self.N_range = (N_start, N_end)
self.N_list = np.array(N_list)
self.Y_local = self.Y[N_start:N_end]
print 'MPI RANK: '+str(self.mpi_comm.rank)+' with datasize: '+str(self.N_range)
mpi_comm.Bcast(self.param_array, root=0)
def set_X_gradients(self, X, X_grad): def set_X_gradients(self, X, X_grad):
"""Set the gradients of the posterior distribution of X in its specific form.""" """Set the gradients of the posterior distribution of X in its specific form."""
@ -97,15 +86,62 @@ class BayesianGPLVM(SparseGP):
"""Get the gradients of the posterior distribution of X in its specific form.""" """Get the gradients of the posterior distribution of X in its specific form."""
return X.mean.gradient, X.variance.gradient return X.mean.gradient, X.variance.gradient
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None):
posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVM, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices)
kl_fctr = 1.
if self.missing_data:
d = self.output_dim
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)/d
else:
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)
current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations(
variational_posterior=X,
Z=Z, dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=grad_dict['dL_dpsi1'],
dL_dpsi2=grad_dict['dL_dpsi2'])
# Subsetting Variational Posterior objects, makes the gradients
# empty. We need them to be 0 though:
X.mean.gradient[:] = 0
X.variance.gradient[:] = 0
self.variational_prior.update_gradients_KL(X)
if self.missing_data:
current_values['meangrad'] += kl_fctr*X.mean.gradient/d
current_values['vargrad'] += kl_fctr*X.variance.gradient/d
else:
current_values['meangrad'] += kl_fctr*X.mean.gradient
current_values['vargrad'] += kl_fctr*X.variance.gradient
if subset_indices is not None:
value_indices['meangrad'] = subset_indices['samples']
value_indices['vargrad'] = subset_indices['samples']
return posterior, log_marginal_likelihood, grad_dict, current_values, value_indices
def _outer_values_update(self, full_values):
"""
Here you put the values, which were collected before in the right places.
E.g. set the gradients of parameters, etc.
"""
super(BayesianGPLVM, self)._outer_values_update(full_values)
self.X.mean.gradient = full_values['meangrad']
self.X.variance.gradient = full_values['vargrad']
def _outer_init_full_values(self):
return dict(meangrad=np.zeros(self.X.mean.shape),
vargrad=np.zeros(self.X.variance.shape))
def parameters_changed(self): def parameters_changed(self):
if isinstance(self.inference_method, VarDTC_GPU) or isinstance(self.inference_method, VarDTC_minibatch): super(BayesianGPLVM,self).parameters_changed()
update_gradients(self, mpi_comm=self.mpi_comm) if isinstance(self.inference_method, VarDTC_minibatch):
return return
super(BayesianGPLVM, self).parameters_changed() #super(BayesianGPLVM, self).parameters_changed()
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) #self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2']) #self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
# This is testing code ------------------------- # This is testing code -------------------------
# i = np.random.randint(self.X.shape[0]) # i = np.random.randint(self.X.shape[0])
@ -124,7 +160,7 @@ class BayesianGPLVM(SparseGP):
# ----------------------------------------------- # -----------------------------------------------
# update for the KL divergence # update for the KL divergence
self.variational_prior.update_gradients_KL(self.X) #self.variational_prior.update_gradients_KL(self.X)
def plot_latent(self, labels=None, which_indices=None, def plot_latent(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40, resolution=50, ax=None, marker='o', s=40,
@ -189,7 +225,7 @@ class BayesianGPLVM(SparseGP):
""" """
dmu_dX = np.zeros_like(Xnew) dmu_dX = np.zeros_like(Xnew)
for i in range(self.Z.shape[0]): for i in range(self.Z.shape[0]):
dmu_dX += self.kern.gradients_X(self.Cpsi1Vf[i:i + 1, :], Xnew, self.Z[i:i + 1, :]) dmu_dX += self.kern.gradients_X(self.grad_dict['dL_dpsi1'][i:i + 1, :], Xnew, self.Z[i:i + 1, :])
return dmu_dX return dmu_dX
def dmu_dXnew(self, Xnew): def dmu_dXnew(self, Xnew):
@ -200,7 +236,7 @@ class BayesianGPLVM(SparseGP):
ones = np.ones((1, 1)) ones = np.ones((1, 1))
for i in range(self.Z.shape[0]): for i in range(self.Z.shape[0]):
gradients_X[:, i] = self.kern.gradients_X(ones, Xnew, self.Z[i:i + 1, :]).sum(-1) gradients_X[:, i] = self.kern.gradients_X(ones, Xnew, self.Z[i:i + 1, :]).sum(-1)
return np.dot(gradients_X, self.Cpsi1Vf) return np.dot(gradients_X, self.grad_dict['dL_dpsi1'])
def plot_steepest_gradient_map(self, *args, ** kwargs): def plot_steepest_gradient_map(self, *args, ** kwargs):
""" """
@ -211,50 +247,6 @@ class BayesianGPLVM(SparseGP):
from ..plotting.matplot_dep import dim_reduction_plots from ..plotting.matplot_dep import dim_reduction_plots
return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs) return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs)
def __getstate__(self):
dc = super(BayesianGPLVM, self).__getstate__()
dc['mpi_comm'] = None
if self.mpi_comm != None:
del dc['N_range']
del dc['N_list']
del dc['Y_local']
return dc
def __setstate__(self, state):
return super(BayesianGPLVM, self).__setstate__(state)
#=====================================================
# The MPI parallelization
# - can move to model at some point
#=====================================================
def _set_params_transformed(self, p):
if self.mpi_comm != None:
if self.__IN_OPTIMIZATION__ and self.mpi_comm.rank==0:
self.mpi_comm.Bcast(np.int32(1),root=0)
self.mpi_comm.Bcast(p, root=0)
super(BayesianGPLVM, self)._set_params_transformed(p)
def optimize(self, optimizer=None, start=None, **kwargs):
self.__IN_OPTIMIZATION__ = True
if self.mpi_comm==None:
super(BayesianGPLVM, self).optimize(optimizer,start,**kwargs)
elif self.mpi_comm.rank==0:
super(BayesianGPLVM, self).optimize(optimizer,start,**kwargs)
self.mpi_comm.Bcast(np.int32(-1),root=0)
elif self.mpi_comm.rank>0:
x = self._get_params_transformed().copy()
flag = np.empty(1,dtype=np.int32)
while True:
self.mpi_comm.Bcast(flag,root=0)
if flag==1:
self._set_params_transformed(x)
elif flag==-1:
break
else:
self.__IN_OPTIMIZATION__ = False
raise Exception("Unrecognizable flag for synchronization!")
self.__IN_OPTIMIZATION__ = False
def latent_cost_and_grad(mu_S, input_dim, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2): def latent_cost_and_grad(mu_S, input_dim, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):

View file

@ -7,7 +7,6 @@ from ..core import SparseGP
from .. import likelihoods from .. import likelihoods
from .. import kern from .. import kern
from ..inference.latent_function_inference import VarDTC from ..inference.latent_function_inference import VarDTC
from ..util.misc import param_to_array
from ..core.parameterization.variational import NormalPosterior from ..core.parameterization.variational import NormalPosterior
class SparseGPRegression(SparseGP): class SparseGPRegression(SparseGP):
@ -40,7 +39,7 @@ class SparseGPRegression(SparseGP):
# Z defaults to a subset of the data # Z defaults to a subset of the data
if Z is None: if Z is None:
i = np.random.permutation(num_data)[:min(num_inducing, num_data)] i = np.random.permutation(num_data)[:min(num_inducing, num_data)]
Z = param_to_array(X)[i].copy() Z = X.view(np.ndarray)[i].copy()
else: else:
assert Z.shape[1] == input_dim assert Z.shape[1] == input_dim

View file

@ -1,7 +1,6 @@
import numpy as np import numpy as np
from latent_space_visualizations.controllers.imshow_controller import ImshowController,ImAnnotateController from latent_space_visualizations.controllers.imshow_controller import ImshowController,ImAnnotateController
from ...util.misc import param_to_array
from ...core.parameterization.variational import VariationalPosterior from ...core.parameterization.variational import VariationalPosterior
from .base_plots import x_frame2D from .base_plots import x_frame2D
import itertools import itertools
@ -55,9 +54,9 @@ def plot_latent(model, labels=None, which_indices=None,
#fethch the data points X that we'd like to plot #fethch the data points X that we'd like to plot
X = model.X X = model.X
if isinstance(X, VariationalPosterior): if isinstance(X, VariationalPosterior):
X = param_to_array(X.mean) X = X.mean
else: else:
X = param_to_array(X) X = X
if X.shape[0] > 1000: if X.shape[0] > 1000:
@ -175,7 +174,7 @@ def plot_latent(model, labels=None, which_indices=None,
ax.set_aspect('auto') # set a nice aspect ratio ax.set_aspect('auto') # set a nice aspect ratio
if plot_inducing: if plot_inducing:
Z = param_to_array(model.Z) Z = model.Z
ax.plot(Z[:, input_1], Z[:, input_2], '^w') ax.plot(Z[:, input_1], Z[:, input_2], '^w')
ax.set_xlim((xmin, xmax)) ax.set_xlim((xmin, xmax))

View file

@ -35,8 +35,7 @@ def add_bar_labels(fig, ax, bars, bottom=0):
def plot_bars(fig, ax, x, ard_params, color, name, bottom=0): def plot_bars(fig, ax, x, ard_params, color, name, bottom=0):
from ...util.misc import param_to_array return ax.bar(left=x, height=ard_params.view(np.ndarray), width=.8,
return ax.bar(left=x, height=param_to_array(ard_params), width=.8,
bottom=bottom, align='center', bottom=bottom, align='center',
color=color, edgecolor='k', linewidth=1.2, color=color, edgecolor='k', linewidth=1.2,
label=name.replace("_"," ")) label=name.replace("_"," "))
@ -100,9 +99,7 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False, filtering=Non
return ax return ax
def plot(kernel, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs): def plot(kernel, x=None, plot_limits=None, resolution=None, *args, **kwargs):
if which_parts == 'all':
which_parts = [True] * kernel.size
if kernel.input_dim == 1: if kernel.input_dim == 1:
if x is None: if x is None:
x = np.zeros((1, 1)) x = np.zeros((1, 1))
@ -133,7 +130,7 @@ def plot(kernel, x=None, plot_limits=None, which_parts='all', resolution=None, *
assert x.size == 2, "The size of the fixed variable x is not 2" assert x.size == 2, "The size of the fixed variable x is not 2"
x = x.reshape((1, 2)) x = x.reshape((1, 2))
if plot_limits == None: if plot_limits is None:
xmin, xmax = (x - 5).flatten(), (x + 5).flatten() xmin, xmax = (x - 5).flatten(), (x + 5).flatten()
elif len(plot_limits) == 2: elif len(plot_limits) == 2:
xmin, xmax = plot_limits xmin, xmax = plot_limits
@ -142,12 +139,10 @@ def plot(kernel, x=None, plot_limits=None, which_parts='all', resolution=None, *
resolution = resolution or 51 resolution = resolution or 51
xx, yy = np.mgrid[xmin[0]:xmax[0]:1j * resolution, xmin[1]:xmax[1]:1j * resolution] xx, yy = np.mgrid[xmin[0]:xmax[0]:1j * resolution, xmin[1]:xmax[1]:1j * resolution]
xg = np.linspace(xmin[0], xmax[0], resolution)
yg = np.linspace(xmin[1], xmax[1], resolution)
Xnew = np.vstack((xx.flatten(), yy.flatten())).T Xnew = np.vstack((xx.flatten(), yy.flatten())).T
Kx = kernel.K(Xnew, x, which_parts) Kx = kernel.K(Xnew, x)
Kx = Kx.reshape(resolution, resolution).T Kx = Kx.reshape(resolution, resolution).T
pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs) # @UndefinedVariable pb.contour(xx, xx, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs) # @UndefinedVariable
pb.xlim(xmin[0], xmax[0]) pb.xlim(xmin[0], xmax[0])
pb.ylim(xmin[1], xmax[1]) pb.ylim(xmin[1], xmax[1])
pb.xlabel("x1") pb.xlabel("x1")

View file

@ -8,7 +8,6 @@ except:
pass pass
import numpy as np import numpy as np
from base_plots import gpplot, x_frame1D, x_frame2D from base_plots import gpplot, x_frame1D, x_frame2D
from ...util.misc import param_to_array
from ...models.gp_coregionalized_regression import GPCoregionalizedRegression from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
from scipy import sparse from scipy import sparse
@ -67,7 +66,6 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
X_variance = model.X.variance X_variance = model.X.variance
else: else:
X = model.X X = model.X
#X, Y = param_to_array(X, model.Y)
Y = model.Y Y = model.Y
if sparse.issparse(Y): Y = Y.todense().view(np.ndarray) if sparse.issparse(Y): Y = Y.todense().view(np.ndarray)

View file

@ -1,5 +1,4 @@
import pylab as pb, numpy as np import pylab as pb, numpy as np
from ...util.misc import param_to_array
def plot(parameterized, fignum=None, ax=None, colors=None): def plot(parameterized, fignum=None, ax=None, colors=None):
""" """
@ -21,7 +20,7 @@ def plot(parameterized, fignum=None, ax=None, colors=None):
else: else:
colors = iter(colors) colors = iter(colors)
plots = [] plots = []
means, variances = param_to_array(parameterized.mean, parameterized.variance) means, variances = parameterized.mean, parameterized.variance
x = np.arange(means.shape[0]) x = np.arange(means.shape[0])
for i in range(means.shape[1]): for i in range(means.shape[1]):
if ax is None: if ax is None:
@ -68,7 +67,7 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None, side_by_sid
else: else:
colors = iter(colors) colors = iter(colors)
plots = [] plots = []
means, variances, gamma = param_to_array(parameterized.mean, parameterized.variance, parameterized.binary_prob) means, variances, gamma = parameterized.mean, parameterized.variance, parameterized.binary_prob
x = np.arange(means.shape[0]) x = np.arange(means.shape[0])
for i in range(means.shape[1]): for i in range(means.shape[1]):
if side_by_side: if side_by_side:
@ -77,7 +76,7 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None, side_by_sid
else: else:
sub1 = (means.shape[1]*2,1,2*i+1) sub1 = (means.shape[1]*2,1,2*i+1)
sub2 = (means.shape[1]*2,1,2*i+2) sub2 = (means.shape[1]*2,1,2*i+2)
# mean and variance plot # mean and variance plot
a = fig.add_subplot(*sub1) a = fig.add_subplot(*sub1)
a.plot(means, c='k', alpha=.3) a.plot(means, c='k', alpha=.3)

View file

@ -4,7 +4,6 @@ import GPy
import numpy as np import numpy as np
import matplotlib as mpl import matplotlib as mpl
import time import time
from ...util.misc import param_to_array
from GPy.core.parameterization.variational import VariationalPosterior from GPy.core.parameterization.variational import VariationalPosterior
try: try:
import visual import visual
@ -127,7 +126,7 @@ class lvm(matplotlib_show):
self.latent_index = latent_index self.latent_index = latent_index
self.latent_dim = model.input_dim self.latent_dim = model.input_dim
self.disable_drag = disable_drag self.disable_drag = disable_drag
# The red cross which shows current latent point. # The red cross which shows current latent point.
self.latent_values = vals self.latent_values = vals
self.latent_handle = self.latent_axes.plot([0],[0],'rx',mew=2)[0] self.latent_handle = self.latent_axes.plot([0],[0],'rx',mew=2)[0]
@ -474,7 +473,7 @@ class mocap_data_show(matplotlib_show):
self.axes.set_ylim(self.y_lim) self.axes.set_ylim(self.y_lim)
self.axes.set_zlim(self.z_lim) self.axes.set_zlim(self.z_lim)
self.axes.auto_scale_xyz([-1., 1.], [-1., 1.], [-1., 1.]) self.axes.auto_scale_xyz([-1., 1.], [-1., 1.], [-1., 1.])
# self.axes.set_aspect('equal') # self.axes.set_aspect('equal')
# self.axes.autoscale(enable=False) # self.axes.autoscale(enable=False)
@ -500,7 +499,7 @@ class skeleton_show(mocap_data_show):
:param vals: set of modeled angles to use for printing in the axis when it's first created. :param vals: set of modeled angles to use for printing in the axis when it's first created.
:type vals: np.array :type vals: np.array
:param skel: skeleton object that has the parameters of the motion capture skeleton associated with it. :param skel: skeleton object that has the parameters of the motion capture skeleton associated with it.
:type skel: mocap.skeleton object :type skel: mocap.skeleton object
:param padding: :param padding:
:type int :type int
""" """
@ -512,7 +511,7 @@ class skeleton_show(mocap_data_show):
"""Takes a set of angles and converts them to the x,y,z coordinates in the internal prepresentation of the class, ready for plotting. """Takes a set of angles and converts them to the x,y,z coordinates in the internal prepresentation of the class, ready for plotting.
:param vals: the values that are being modelled.""" :param vals: the values that are being modelled."""
if self.padding>0: if self.padding>0:
channels = np.zeros((self.vals.shape[0], self.vals.shape[1]+self.padding)) channels = np.zeros((self.vals.shape[0], self.vals.shape[1]+self.padding))
channels[:, 0:self.vals.shape[0]] = self.vals channels[:, 0:self.vals.shape[0]] = self.vals
@ -524,7 +523,7 @@ class skeleton_show(mocap_data_show):
self.vals[:, 0] = vals_mat[:, 0].copy() self.vals[:, 0] = vals_mat[:, 0].copy()
self.vals[:, 1] = vals_mat[:, 2].copy() self.vals[:, 1] = vals_mat[:, 2].copy()
self.vals[:, 2] = vals_mat[:, 1].copy() self.vals[:, 2] = vals_mat[:, 1].copy()
def wrap_around(self, lim, connect): def wrap_around(self, lim, connect):
quot = lim[1] - lim[0] quot = lim[1] - lim[0]
self.vals = rem(self.vals, quot)+lim[0] self.vals = rem(self.vals, quot)+lim[0]
@ -546,7 +545,7 @@ def data_play(Y, visualizer, frame_rate=30):
Example usage: Example usage:
This example loads in the CMU mocap database (http://mocap.cs.cmu.edu) subject number 35 motion number 01. It then plays it using the mocap_show visualize object. This example loads in the CMU mocap database (http://mocap.cs.cmu.edu) subject number 35 motion number 01. It then plays it using the mocap_show visualize object.
.. code-block:: python .. code-block:: python
data = GPy.util.datasets.cmu_mocap(subject='35', train_motions=['01']) data = GPy.util.datasets.cmu_mocap(subject='35', train_motions=['01'])
@ -556,7 +555,7 @@ def data_play(Y, visualizer, frame_rate=30):
GPy.util.visualize.data_play(Y, visualize) GPy.util.visualize.data_play(Y, visualize)
""" """
for y in Y: for y in Y:
visualizer.modify(y[None, :]) visualizer.modify(y[None, :])

View file

@ -215,7 +215,10 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
if verbose: if verbose:
print("Checking gradients of Kdiag(X) wrt X.") print("Checking gradients of Kdiag(X) wrt X.")
try: try:
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose) testmodel = Kern_check_dKdiag_dX(kern, X=X)
if fixed_X_dims is not None:
testmodel.X[:,fixed_X_dims].fix()
result = testmodel.checkgrad(verbose=verbose)
except NotImplementedError: except NotImplementedError:
result=True result=True
if verbose: if verbose:
@ -346,12 +349,58 @@ class KernelTestsNonContinuous(unittest.TestCase):
kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split') kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1)) self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))
def test_ODE_UY(self): def test_ODE_UY(self):
kern = GPy.kern.ODE_UY(2, active_dims=[0, self.D]) kern = GPy.kern.ODE_UY(2, active_dims=[0, self.D])
X = self.X[self.X[:,-1]!=2] X = self.X[self.X[:,-1]!=2]
X2 = self.X2[self.X2[:,-1]!=2] X2 = self.X2[self.X2[:,-1]!=2]
self.assertTrue(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) self.assertTrue(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
class Coregionalize_weave_test(unittest.TestCase):
"""
Make sure that the coregionalize kernel work with and without weave enabled
"""
def setUp(self):
self.k = GPy.kern.Coregionalize(1, output_dim=12)
self.N1, self.N2 = 100, 200
self.X = np.random.randint(0,12,(self.N1,1))
self.X2 = np.random.randint(0,12,(self.N2,1))
def test_sym(self):
dL_dK = np.random.randn(self.N1, self.N1)
GPy.util.config.config.set('weave', 'working', 'True')
K_weave = self.k.K(self.X)
self.k.update_gradients_full(dL_dK, self.X)
grads_weave = self.k.gradient.copy()
GPy.util.config.config.set('weave', 'working', 'False')
K_numpy = self.k.K(self.X)
self.k.update_gradients_full(dL_dK, self.X)
grads_numpy = self.k.gradient.copy()
self.assertTrue(np.allclose(K_numpy, K_weave))
self.assertTrue(np.allclose(grads_numpy, grads_weave))
def test_nonsym(self):
dL_dK = np.random.randn(self.N1, self.N2)
GPy.util.config.config.set('weave', 'working', 'True')
K_weave = self.k.K(self.X, self.X2)
self.k.update_gradients_full(dL_dK, self.X, self.X2)
grads_weave = self.k.gradient.copy()
GPy.util.config.config.set('weave', 'working', 'False')
K_numpy = self.k.K(self.X, self.X2)
self.k.update_gradients_full(dL_dK, self.X, self.X2)
grads_numpy = self.k.gradient.copy()
self.assertTrue(np.allclose(K_numpy, K_weave))
self.assertTrue(np.allclose(grads_numpy, grads_weave))
#reset the weave state for any other tests
GPy.util.config.config.set('weave', 'working', 'False')
if __name__ == "__main__": if __name__ == "__main__":
print "Running unit tests, please be (very) patient..." print "Running unit tests, please be (very) patient..."

View file

@ -352,8 +352,8 @@ class TestNoiseModels(object):
print model print model
#print model._get_params() #print model._get_params()
np.testing.assert_almost_equal( np.testing.assert_almost_equal(
model.pdf(f.copy(), Y.copy()), model.pdf(f.copy(), Y.copy()).prod(),
np.exp(model.logpdf(f.copy(), Y.copy())) np.exp(model.logpdf(f.copy(), Y.copy()).sum())
) )
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)

View file

@ -20,7 +20,7 @@ class MiscTests(unittest.TestCase):
m = GPy.models.GPRegression(self.X, self.Y, kernel=k) m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
m.randomize() m.randomize()
m.likelihood.variance = .5 m.likelihood.variance = .5
Kinv = np.linalg.pinv(k.K(self.X) + np.eye(self.N)*m.likelihood.variance) Kinv = np.linalg.pinv(k.K(self.X) + np.eye(self.N) * m.likelihood.variance)
K_hat = k.K(self.X_new) - k.K(self.X_new, self.X).dot(Kinv).dot(k.K(self.X, self.X_new)) K_hat = k.K(self.X_new) - k.K(self.X_new, self.X).dot(Kinv).dot(k.K(self.X, self.X_new))
mu_hat = k.K(self.X_new, self.X).dot(Kinv).dot(m.Y_normalized) mu_hat = k.K(self.X_new, self.X).dot(Kinv).dot(m.Y_normalized)
@ -43,7 +43,7 @@ class MiscTests(unittest.TestCase):
Z = m.Z[:] Z = m.Z[:]
X = self.X[:] X = self.X[:]
#Not easy to check if woodbury_inv is correct in itself as it requires a large derivation and expression # Not easy to check if woodbury_inv is correct in itself as it requires a large derivation and expression
Kinv = m.posterior.woodbury_inv Kinv = m.posterior.woodbury_inv
K_hat = k.K(self.X_new) - k.K(self.X_new, Z).dot(Kinv).dot(k.K(Z, self.X_new)) K_hat = k.K(self.X_new) - k.K(self.X_new, Z).dot(Kinv).dot(k.K(Z, self.X_new))
@ -51,13 +51,13 @@ class MiscTests(unittest.TestCase):
self.assertEquals(mu.shape, (self.N_new, self.D)) self.assertEquals(mu.shape, (self.N_new, self.D))
self.assertEquals(covar.shape, (self.N_new, self.N_new)) self.assertEquals(covar.shape, (self.N_new, self.N_new))
np.testing.assert_almost_equal(K_hat, covar) np.testing.assert_almost_equal(K_hat, covar)
#np.testing.assert_almost_equal(mu_hat, mu) # np.testing.assert_almost_equal(mu_hat, mu)
mu, var = m._raw_predict(self.X_new) mu, var = m._raw_predict(self.X_new)
self.assertEquals(mu.shape, (self.N_new, self.D)) self.assertEquals(mu.shape, (self.N_new, self.D))
self.assertEquals(var.shape, (self.N_new, 1)) self.assertEquals(var.shape, (self.N_new, 1))
np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var) np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var)
#np.testing.assert_almost_equal(mu_hat, mu) # np.testing.assert_almost_equal(mu_hat, mu)
def test_likelihood_replicate(self): def test_likelihood_replicate(self):
m = GPy.models.GPRegression(self.X, self.Y) m = GPy.models.GPRegression(self.X, self.Y)
@ -110,22 +110,45 @@ class MiscTests(unittest.TestCase):
m2.kern.lengthscale = m.kern['.*lengthscale'] m2.kern.lengthscale = m.kern['.*lengthscale']
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
def test_missing_data(self):
from GPy import kern
from GPy.models import BayesianGPLVM
from GPy.examples.dimensionality_reduction import _simulate_matern
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
_, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, False)
Y = Ylist[0]
inan = np.random.binomial(1, .9, size=Y.shape).astype(bool) # 80% missing data
Ymissing = Y.copy()
Ymissing[inan] = np.nan
k = kern.Linear(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q)
m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
kernel=k, missing_data=True)
assert(m.checkgrad())
k = kern.RBF(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q)
m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
kernel=k, missing_data=True)
assert(m.checkgrad())
def test_likelihood_replicate_kern(self): def test_likelihood_replicate_kern(self):
m = GPy.models.GPRegression(self.X, self.Y) m = GPy.models.GPRegression(self.X, self.Y)
m2 = GPy.models.GPRegression(self.X, self.Y) m2 = GPy.models.GPRegression(self.X, self.Y)
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
m.kern.randomize() m.kern.randomize()
m2.kern[''] = m.kern[:] m2.kern[''] = m.kern[:]
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
m.kern.randomize() m.kern.randomize()
m2.kern[:] = m.kern[:] m2.kern[:] = m.kern[:]
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
m.kern.randomize() m.kern.randomize()
m2.kern[''] = m.kern[''] m2.kern[''] = m.kern['']
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
m.kern.randomize() m.kern.randomize()
m2.kern[:] = m.kern[''].values() m2.kern[:] = m.kern[''].values()
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
def test_big_model(self): def test_big_model(self):
m = GPy.examples.dimensionality_reduction.mrd_simulation(optimize=0, plot=0, plot_sim=0) m = GPy.examples.dimensionality_reduction.mrd_simulation(optimize=0, plot=0, plot_sim=0)
@ -158,7 +181,7 @@ class MiscTests(unittest.TestCase):
def test_model_optimize(self): def test_model_optimize(self):
X = np.random.uniform(-3., 3., (20, 1)) X = np.random.uniform(-3., 3., (20, 1))
Y = np.sin(X) + np.random.randn(20, 1) * 0.05 Y = np.sin(X) + np.random.randn(20, 1) * 0.05
m = GPy.models.GPRegression(X,Y) m = GPy.models.GPRegression(X, Y)
m.optimize() m.optimize()
print m print m
@ -219,8 +242,8 @@ class GradientTests(np.testing.TestCase):
mlp = GPy.kern.MLP(1) mlp = GPy.kern.MLP(1)
self.check_model(mlp, model_type='GPRegression', dimension=1) self.check_model(mlp, model_type='GPRegression', dimension=1)
#TODO: # TODO:
#def test_GPRegression_poly_1d(self): # def test_GPRegression_poly_1d(self):
# ''' Testing the GP regression with polynomial kernel with white kernel on 1d data ''' # ''' Testing the GP regression with polynomial kernel with white kernel on 1d data '''
# mlp = GPy.kern.Poly(1, degree=5) # mlp = GPy.kern.Poly(1, degree=5)
# self.check_model(mlp, model_type='GPRegression', dimension=1) # self.check_model(mlp, model_type='GPRegression', dimension=1)
@ -417,11 +440,11 @@ class GradientTests(np.testing.TestCase):
def test_gp_heteroscedastic_regression(self): def test_gp_heteroscedastic_regression(self):
num_obs = 25 num_obs = 25
X = np.random.randint(0,140,num_obs) X = np.random.randint(0, 140, num_obs)
X = X[:,None] X = X[:, None]
Y = 25. + np.sin(X/20.) * 2. + np.random.rand(num_obs)[:,None] Y = 25. + np.sin(X / 20.) * 2. + np.random.rand(num_obs)[:, None]
kern = GPy.kern.Bias(1) + GPy.kern.RBF(1) kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
m = GPy.models.GPHeteroscedasticRegression(X,Y,kern) m = GPy.models.GPHeteroscedasticRegression(X, Y, kern)
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_gp_kronecker_gaussian(self): def test_gp_kronecker_gaussian(self):
@ -432,12 +455,12 @@ class GradientTests(np.testing.TestCase):
k1 = GPy.kern.RBF(1) # + GPy.kern.White(1) k1 = GPy.kern.RBF(1) # + GPy.kern.White(1)
k2 = GPy.kern.RBF(1) # + GPy.kern.White(1) k2 = GPy.kern.RBF(1) # + GPy.kern.White(1)
Y = np.random.randn(N1, N2) Y = np.random.randn(N1, N2)
Y = Y-Y.mean(0) Y = Y - Y.mean(0)
Y = Y/Y.std(0) Y = Y / Y.std(0)
m = GPy.models.GPKroneckerGaussianRegression(X1, X2, Y, k1, k2) m = GPy.models.GPKroneckerGaussianRegression(X1, X2, Y, k1, k2)
# build the model the dumb way # build the model the dumb way
assert (N1*N2<1000), "too much data for standard GPs!" assert (N1 * N2 < 1000), "too much data for standard GPs!"
yy, xx = np.meshgrid(X2, X1) yy, xx = np.meshgrid(X2, X1)
Xgrid = np.vstack((xx.flatten(order='F'), yy.flatten(order='F'))).T Xgrid = np.vstack((xx.flatten(order='F'), yy.flatten(order='F'))).T
kg = GPy.kern.RBF(1, active_dims=[0]) * GPy.kern.RBF(1, active_dims=[1]) kg = GPy.kern.RBF(1, active_dims=[0]) * GPy.kern.RBF(1, active_dims=[1])
@ -458,11 +481,11 @@ class GradientTests(np.testing.TestCase):
def test_gp_VGPC(self): def test_gp_VGPC(self):
num_obs = 25 num_obs = 25
X = np.random.randint(0,140,num_obs) X = np.random.randint(0, 140, num_obs)
X = X[:,None] X = X[:, None]
Y = 25. + np.sin(X/20.) * 2. + np.random.rand(num_obs)[:,None] Y = 25. + np.sin(X / 20.) * 2. + np.random.rand(num_obs)[:, None]
kern = GPy.kern.Bias(1) + GPy.kern.RBF(1) kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
m = GPy.models.GPVariationalGaussianApproximation(X,Y,kern) m = GPy.models.GPVariationalGaussianApproximation(X, Y, kern)
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())

View file

@ -8,7 +8,9 @@ import GPy
import numpy as np import numpy as np
from GPy.core.parameterization.parameter_core import HierarchyError from GPy.core.parameterization.parameter_core import HierarchyError
from GPy.core.parameterization.observable_array import ObsAr from GPy.core.parameterization.observable_array import ObsAr
from GPy.core.parameterization.transformations import NegativeLogexp from GPy.core.parameterization.transformations import NegativeLogexp, Logistic
from GPy.core.parameterization.parameterized import Parameterized
from GPy.core.parameterization.param import Param
class ArrayCoreTest(unittest.TestCase): class ArrayCoreTest(unittest.TestCase):
def setUp(self): def setUp(self):
@ -32,7 +34,7 @@ class ParameterizedTest(unittest.TestCase):
self.white = GPy.kern.White(1) self.white = GPy.kern.White(1)
from GPy.core.parameterization import Param from GPy.core.parameterization import Param
from GPy.core.parameterization.transformations import Logistic from GPy.core.parameterization.transformations import Logistic
self.param = Param('param', np.random.uniform(0,1,(25,2)), Logistic(0, 1)) self.param = Param('param', np.random.uniform(0,1,(10,5)), Logistic(0, 1))
self.test1 = GPy.core.Parameterized("test model") self.test1 = GPy.core.Parameterized("test model")
self.test1.param = self.param self.test1.param = self.param
@ -153,12 +155,14 @@ class ParameterizedTest(unittest.TestCase):
self.test1.kern.randomize() self.test1.kern.randomize()
self.assertEqual(val, self.rbf.variance) self.assertEqual(val, self.rbf.variance)
# def test_updates(self): def test_updates(self):
# # WHAT DO YOU WANT TO TEST HERE? val = float(self.testmodel.log_likelihood())
# self.test1.update_model(False) self.testmodel.update_model(False)
# val = float(self.rbf.variance) self.testmodel.kern.randomize()
# self.test1.kern.randomize() self.testmodel.likelihood.randomize()
# self.assertEqual(val, self.rbf.variance,str(self.test1)) self.assertEqual(val, self.testmodel.log_likelihood())
self.testmodel.update_model(True)
self.assertNotEqual(val, self.testmodel.log_likelihood())
def test_fixing_optimize(self): def test_fixing_optimize(self):
self.testmodel.kern.lengthscale.fix() self.testmodel.kern.lengthscale.fix()
@ -196,6 +200,20 @@ class ParameterizedTest(unittest.TestCase):
unfixed = self.testmodel.kern.unfix() unfixed = self.testmodel.kern.unfix()
self.assertListEqual(unfixed.tolist(), [0,1]) self.assertListEqual(unfixed.tolist(), [0,1])
def test_constraints_in_init(self):
class Test(Parameterized):
def __init__(self, name=None, parameters=[], *a, **kw):
super(Test, self).__init__(name=name)
self.x = Param('x', np.random.uniform(0,1,(3,4)))
self.x[0].constrain_bounded(0,1)
self.link_parameter(self.x)
self.x[1].fix()
t = Test()
c = {Logistic(0,1): np.array([0, 1, 2, 3]), 'fixed': np.array([4, 5, 6, 7])}
np.testing.assert_equal(t.x.constraints[Logistic(0,1)], c[Logistic(0,1)])
np.testing.assert_equal(t.x.constraints['fixed'], c['fixed'])
def test_printing(self): def test_printing(self):
print self.test1 print self.test1
print self.param print self.param

View file

@ -18,12 +18,3 @@ import multioutput
import linalg_gpu import linalg_gpu
import mpi import mpi
try:
import sympy
_sympy_available = True
del sympy
except ImportError as e:
_sympy_available = False
if _sympy_available:
import symbolic

View file

@ -188,4 +188,6 @@ class Cache_this(object):
self.ignore_args = ignore_args self.ignore_args = ignore_args
self.force_args = force_kwargs self.force_args = force_kwargs
def __call__(self, f): def __call__(self, f):
return Cacher_wrap(f, self.limit, self.ignore_args, self.force_args) newf = Cacher_wrap(f, self.limit, self.ignore_args, self.force_args)
update_wrapper(newf, f)
return newf

View file

@ -228,7 +228,7 @@
"fruitfly_tomancak_cel_files": { "fruitfly_tomancak_cel_files": {
"citation": "'Systematic determination of patterns of gene expression during Drosophila embryogenesis' Pavel Tomancak, Amy Beaton, Richard Weiszmann, Elaine Kwan, ShengQiang Shu, Suzanna E Lewis, Stephen Richards, Michael Ashburner, Volker Hartenstein, Susan E Celniker, and Gerald M Rubin", "citation": "'Systematic determination of patterns of gene expression during Drosophila embryogenesis' Pavel Tomancak, Amy Beaton, Richard Weiszmann, Elaine Kwan, ShengQiang Shu, Suzanna E Lewis, Stephen Richards, Michael Ashburner, Volker Hartenstein, Susan E Celniker, and Gerald M Rubin",
"details": "Gene expression results from blastoderm development in Drosophila Melanogaster.", "details": "Gene expression results from blastoderm development in Drosophila Melanogaster.",
"files": [ "files": [
[ [
"embryo_tc_4_1.CEL", "embryo_tc_4_1.CEL",
"embryo_tc_4_2.CEL", "embryo_tc_4_2.CEL",
@ -284,7 +284,7 @@
"details": "Google trends results.", "details": "Google trends results.",
"files": [ "files": [
[ [
] ]
], ],
"license": null, "license": null,
@ -293,7 +293,7 @@
"http://www.google.com/trends/" "http://www.google.com/trends/"
] ]
}, },
"hapmap3": { "hapmap3": {
"citation": "Gibbs, Richard A., et al. 'The international HapMap project.' Nature 426.6968 (2003): 789-796.", "citation": "Gibbs, Richard A., et al. 'The international HapMap project.' Nature 426.6968 (2003): 789-796.",
"details": "HapMap Project: Single Nucleotide Polymorphism sequenced in all human populations. \n The HapMap phase three SNP dataset - 1184 samples out of 11 populations.\n See http://www.nature.com/nature/journal/v426/n6968/abs/nature02168.html for details.\n\n SNP_matrix (A) encoding [see Paschou et all. 2007 (PCA-Correlated SNPs...)]:\n Let (B1,B2) be the alphabetically sorted bases, which occur in the j-th SNP, then\n\n / 1, iff SNPij==(B1,B1)\n Aij = | 0, iff SNPij==(B1,B2)\n \\\\ -1, iff SNPij==(B2,B2)\n\n The SNP data and the meta information (such as iid, sex and phenotype) are\n stored in the dataframe datadf, index is the Individual ID, \n with following columns for metainfo:\n\n * family_id -> Family ID\n * paternal_id -> Paternal ID\n * maternal_id -> Maternal ID\n * sex -> Sex (1=male; 2=female; other=unknown)\n * phenotype -> Phenotype (-9, or 0 for unknown)\n * population -> Population string (e.g. 'ASW' - 'YRI')\n * rest are SNP rs (ids)\n\n More information is given in infodf:\n\n * Chromosome:\n - autosomal chromosemes -> 1-22\n - X X chromosome -> 23\n - Y Y chromosome -> 24\n - XY Pseudo-autosomal region of X -> 25\n - MT Mitochondrial -> 26\n * Relative Positon (to Chromosome) [base pairs]\n\n ", "details": "HapMap Project: Single Nucleotide Polymorphism sequenced in all human populations. \n The HapMap phase three SNP dataset - 1184 samples out of 11 populations.\n See http://www.nature.com/nature/journal/v426/n6968/abs/nature02168.html for details.\n\n SNP_matrix (A) encoding [see Paschou et all. 2007 (PCA-Correlated SNPs...)]:\n Let (B1,B2) be the alphabetically sorted bases, which occur in the j-th SNP, then\n\n / 1, iff SNPij==(B1,B1)\n Aij = | 0, iff SNPij==(B1,B2)\n \\\\ -1, iff SNPij==(B2,B2)\n\n The SNP data and the meta information (such as iid, sex and phenotype) are\n stored in the dataframe datadf, index is the Individual ID, \n with following columns for metainfo:\n\n * family_id -> Family ID\n * paternal_id -> Paternal ID\n * maternal_id -> Maternal ID\n * sex -> Sex (1=male; 2=female; other=unknown)\n * phenotype -> Phenotype (-9, or 0 for unknown)\n * population -> Population string (e.g. 'ASW' - 'YRI')\n * rest are SNP rs (ids)\n\n More information is given in infodf:\n\n * Chromosome:\n - autosomal chromosemes -> 1-22\n - X X chromosome -> 23\n - Y Y chromosome -> 24\n - XY Pseudo-autosomal region of X -> 25\n - MT Mitochondrial -> 26\n * Relative Positon (to Chromosome) [base pairs]\n\n ",
@ -523,6 +523,23 @@
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/singlecell/" "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/singlecell/"
] ]
}, },
"singlecell_islam": {
"citation": "Single-Cell RNA-Seq Reveals Dynamic, Random Monoallelic Gene Expression in Mammalian Cells Qiaolin Deng, Daniel Ramskoeld, Bjoern Reinius, and Rickard Sandberg Science 10 January 2014: 343 (6167), 193-196. [DOI:10.1126/science.1245316]",
"details" : "92 single cells (48 mouse ES cells, 44 mouse embryonic fibroblasts and 4 negative controls) were analyzed by single-cell tagged reverse transcription (STRT)",
"files" : [["GSE29087_L139_expression_tab.txt.gz"], ["GSE29087_family.soft.gz"]],
"license" : "Gene Expression Omnibus: http://www.ncbi.nlm.nih.gov/geo/info/disclaimer.html",
"size" : 1159449,
"urls" : ["ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE29nnn/GSE29087/suppl/", "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE29nnn/GSE29087/soft/"]
},
"singlecell_deng": {
"citation": "Deng Q, Ramsköld D, Reinius B, Sandberg R. Single-cell RNA-seq reveals dynamic, random monoallelic gene expression in mammalian cells. Science 2014 Jan 10;343(6167):193-6. PMID: 24408435",
"details" : "First generation mouse strain crosses were used to study monoallelic expression on the single cell level",
"files" : [["?acc=GSE45719&format=file"], ["GSE45719_series_matrix.txt.gz"]],
"license" : "Gene Expression Omnibus: http://www.ncbi.nlm.nih.gov/geo/info/disclaimer.html",
"size" : 1159449,
"save_names": [["GSE45719_Raw.tar"], [null]],
"urls" : ["http://www.ncbi.nlm.nih.gov/geo/download/", "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE45nnn/GSE45719/matrix/"]
},
"sod1_mouse": { "sod1_mouse": {
"citation": "Transcriptomic indices of fast and slow disease progression in two mouse models of amyotrophic lateral sclerosis' Nardo G1, Iennaco R, Fusi N, Heath PR, Marino M, Trolese MC, Ferraiuolo L, Lawrence N, Shaw PJ, Bendotti C Brain. 2013 Nov;136(Pt 11):3305-32. doi: 10.1093/brain/awt250. Epub 2013 Sep 24.", "citation": "Transcriptomic indices of fast and slow disease progression in two mouse models of amyotrophic lateral sclerosis' Nardo G1, Iennaco R, Fusi N, Heath PR, Marino M, Trolese MC, Ferraiuolo L, Lawrence N, Shaw PJ, Bendotti C Brain. 2013 Nov;136(Pt 11):3305-32. doi: 10.1093/brain/awt250. Epub 2013 Sep 24.",
"details": "Gene expression data from two separate strains of mice: C57 and 129Sv in wild type and SOD1 mutant strains.", "details": "Gene expression data from two separate strains of mice: C57 and 129Sv in wild type and SOD1 mutant strains.",

View file

@ -82,20 +82,32 @@ def prompt_user(prompt):
def data_available(dataset_name=None): def data_available(dataset_name=None):
"""Check if the data set is available on the local machine already.""" """Check if the data set is available on the local machine already."""
for file_list in data_resources[dataset_name]['files']: from itertools import izip_longest
for file in file_list: dr = data_resources[dataset_name]
if not os.path.exists(os.path.join(data_path, dataset_name, file)): zip_urls = (dr['files'], )
if dr.has_key('save_names'): zip_urls += (dr['save_names'], )
else: zip_urls += ([],)
for file_list, save_list in izip_longest(*zip_urls, fillvalue=[]):
for f, s in izip_longest(file_list, save_list, fillvalue=None):
if s is not None: f=s # If there is a save_name given, use that one
if not os.path.exists(os.path.join(data_path, dataset_name, f)):
return False return False
return True return True
def download_url(url, store_directory, save_name = None, messages = True, suffix=''): def download_url(url, store_directory, save_name=None, messages=True, suffix=''):
"""Download a file from a url and save it to disk.""" """Download a file from a url and save it to disk."""
i = url.rfind('/') i = url.rfind('/')
file = url[i+1:] file = url[i+1:]
print file print file
dir_name = os.path.join(data_path, store_directory) dir_name = os.path.join(data_path, store_directory)
save_name = os.path.join(dir_name, file)
print "Downloading ", url, "->", os.path.join(store_directory, file) if save_name is None: save_name = os.path.join(dir_name, file)
else: save_name = os.path.join(dir_name, save_name)
if suffix is None: suffix=''
print "Downloading ", url, "->", save_name
if not os.path.exists(dir_name): if not os.path.exists(dir_name):
os.makedirs(dir_name) os.makedirs(dir_name)
try: try:
@ -178,19 +190,24 @@ def authorize_download(dataset_name=None):
def download_data(dataset_name=None): def download_data(dataset_name=None):
"""Check with the user that the are happy with terms and conditions for the data set, then download it.""" """Check with the user that the are happy with terms and conditions for the data set, then download it."""
import itertools
dr = data_resources[dataset_name] dr = data_resources[dataset_name]
if not authorize_download(dataset_name): if not authorize_download(dataset_name):
raise Exception("Permission to download data set denied.") raise Exception("Permission to download data set denied.")
if dr.has_key('suffices'): zip_urls = (dr['urls'], dr['files'])
for url, files, suffices in zip(dr['urls'], dr['files'], dr['suffices']):
for file, suffix in zip(files, suffices): if dr.has_key('save_names'): zip_urls += (dr['save_names'], )
download_url(os.path.join(url,file), dataset_name, dataset_name, suffix=suffix) else: zip_urls += ([],)
else:
for url, files in zip(dr['urls'], dr['files']): if dr.has_key('suffices'): zip_urls += (dr['suffices'], )
for file in files: else: zip_urls += ([],)
download_url(os.path.join(url,file), dataset_name, dataset_name)
for url, files, save_names, suffices in itertools.izip_longest(*zip_urls, fillvalue=[]):
for f, save_name, suffix in itertools.izip_longest(files, save_names, suffices, fillvalue=None):
download_url(os.path.join(url,f), dataset_name, save_name, suffix=suffix)
return True return True
def data_details_return(data, data_set): def data_details_return(data, data_set):
@ -895,6 +912,143 @@ def singlecell(data_set='singlecell'):
'genes': genes, 'labels':labels, 'genes': genes, 'labels':labels,
}, data_set) }, data_set)
def singlecell_rna_seq_islam(dataset='singlecell_islam'):
if not data_available(dataset):
download_data(dataset)
from pandas import read_csv, DataFrame, concat
dir_path = os.path.join(data_path, dataset)
filename = os.path.join(dir_path, 'GSE29087_L139_expression_tab.txt.gz')
data = read_csv(filename, sep='\t', skiprows=6, compression='gzip', header=None)
header1 = read_csv(filename, sep='\t', header=None, skiprows=5, nrows=1, compression='gzip')
header2 = read_csv(filename, sep='\t', header=None, skiprows=3, nrows=1, compression='gzip')
data.columns = np.concatenate((header1.ix[0, :], header2.ix[0, 7:]))
Y = data.set_index("Feature").ix[8:, 6:-4].T.astype(float)
# read the info .soft
filename = os.path.join(dir_path, 'GSE29087_family.soft.gz')
info = read_csv(filename, sep='\t', skiprows=0, compression='gzip', header=None)
# split at ' = '
info = DataFrame(info.ix[:,0].str.split(' = ').tolist())
# only take samples:
info = info[info[0].str.contains("!Sample")]
info[0] = info[0].apply(lambda row: row[len("!Sample_"):])
groups = info.groupby(0).groups
# remove 'GGG' from barcodes
barcode = info[1][groups['barcode']].apply(lambda row: row[:-3])
title = info[1][groups['title']]
title.index = barcode
title.name = 'title'
geo_accession = info[1][groups['geo_accession']]
geo_accession.index = barcode
geo_accession.name = 'geo_accession'
case_id = info[1][groups['source_name_ch1']]
case_id.index = barcode
case_id.name = 'source_name_ch1'
info = concat([title, geo_accession, case_id], axis=1)
labels = info.join(Y).source_name_ch1[:-4]
labels[labels=='Embryonic stem cell'] = "ES"
labels[labels=='Embryonic fibroblast'] = "MEF"
return data_details_return({'Y': Y,
'info': '92 single cells (48 mouse ES cells, 44 mouse embryonic fibroblasts and 4 negative controls) were analyzed by single-cell tagged reverse transcription (STRT)',
'genes': Y.columns,
'labels': labels,
'datadf': data,
'infodf': info}, dataset)
def singlecell_rna_seq_deng(dataset='singlecell_deng'):
if not data_available(dataset):
download_data(dataset)
from pandas import read_csv, isnull
dir_path = os.path.join(data_path, dataset)
# read the info .soft
filename = os.path.join(dir_path, 'GSE45719_series_matrix.txt.gz')
info = read_csv(filename, sep='\t', skiprows=0, compression='gzip', header=None, nrows=29, index_col=0)
summary = info.loc['!Series_summary'][1]
design = info.loc['!Series_overall_design']
# only take samples:
sample_info = read_csv(filename, sep='\t', skiprows=30, compression='gzip', header=0, index_col=0).T
sample_info.columns = sample_info.columns.to_series().apply(lambda row: row[len("!Sample_"):])
sample_info.columns.name = sample_info.columns.name[len("!Sample_"):]
sample_info = sample_info[['geo_accession', 'characteristics_ch1', 'description']]
sample_info = sample_info.iloc[:, np.r_[0:4, 5:sample_info.shape[1]]]
c = sample_info.columns.to_series()
c[1:4] = ['strain', 'cross', 'developmental_stage']
sample_info.columns = c
# get the labels right:
rep = re.compile('\(.*\)')
def filter_dev_stage(row):
if isnull(row):
row = "2-cell stage embryo"
if row.startswith("developmental stage: "):
row = row[len("developmental stage: "):]
if row == 'adult':
row += " liver"
row = row.replace(' stage ', ' ')
row = rep.sub(' ', row)
row = row.strip(' ')
return row
labels = sample_info.developmental_stage.apply(filter_dev_stage)
# Extract the tar file
filename = os.path.join(dir_path, 'GSE45719_Raw.tar')
with tarfile.open(filename, 'r') as files:
print "Extracting Archive {}...".format(files.name)
data = None
gene_info = None
message = ''
members = files.getmembers()
overall = len(members)
for i, file_info in enumerate(members):
f = files.extractfile(file_info)
inner = read_csv(f, sep='\t', header=0, compression='gzip', index_col=0)
print ' '*(len(message)+1) + '\r',
message = "{: >7.2%}: Extracting: {}".format(float(i+1)/overall, file_info.name[:20]+"...txt.gz")
print message,
if data is None:
data = inner.RPKM.to_frame()
data.columns = [file_info.name[:-18]]
gene_info = inner.Refseq_IDs.to_frame()
gene_info.columns = [file_info.name[:-18]]
else:
data[file_info.name[:-18]] = inner.RPKM
gene_info[file_info.name[:-18]] = inner.Refseq_IDs
# Strip GSM number off data index
rep = re.compile('GSM\d+_')
data.columns = data.columns.to_series().apply(lambda row: row[rep.match(row).end():])
data = data.T
# make sure the same index gets used
sample_info.index = data.index
# get the labels from the description
#rep = re.compile('fibroblast|\d+-cell|embryo|liver|early blastocyst|mid blastocyst|late blastocyst|blastomere|zygote', re.IGNORECASE)
sys.stdout.write(' '*len(message) + '\r')
sys.stdout.flush()
print
print "Read Archive {}".format(files.name)
return data_details_return({'Y': data,
'series_info': info,
'sample_info': sample_info,
'gene_info': gene_info,
'summary': summary,
'design': design,
'genes': data.columns,
'labels': labels,
}, dataset)
def swiss_roll_1000(): def swiss_roll_1000():
return swiss_roll(num_samples=1000) return swiss_roll(num_samples=1000)

File diff suppressed because one or more lines are too long

View file

@ -13,7 +13,7 @@ def initialize_latent(init, input_dim, Y):
p = pca(Y) p = pca(Y)
PC = p.project(Y, min(input_dim, Y.shape[1])) PC = p.project(Y, min(input_dim, Y.shape[1]))
Xr[:PC.shape[0], :PC.shape[1]] = PC Xr[:PC.shape[0], :PC.shape[1]] = PC
var = p.fracs[:input_dim] var = .1*p.fracs[:input_dim]
else: else:
var = Xr.var(0) var = Xr.var(0)

View file

@ -10,11 +10,10 @@ from scipy import linalg, weave
import types import types
import ctypes import ctypes
from ctypes import byref, c_char, c_int, c_double # TODO from ctypes import byref, c_char, c_int, c_double # TODO
# import scipy.lib.lapack
import scipy import scipy
import warnings import warnings
import os import os
from config import * from config import config
import logging import logging
_scipyversion = np.float64((scipy.__version__).split('.')[:2]) _scipyversion = np.float64((scipy.__version__).split('.')[:2])
@ -521,6 +520,27 @@ def symmetrify(A, upper=False):
Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
works IN PLACE. works IN PLACE.
note: tries to use weave, falls back to a slower numpy version
"""
if config.getboolean('weave', 'working'):
try:
symmetrify_weave(A, upper)
except:
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
config.set('weave', 'working', 'False')
symmetrify_numpy(A, upper)
else:
symmetrify_numpy(A, upper)
def symmetrify_weave(A, upper=False):
"""
Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
works IN PLACE.
""" """
N, M = A.shape N, M = A.shape
assert N == M assert N == M
@ -563,10 +583,15 @@ def symmetrify(A, upper=False):
A += np.tril(tmp, -1).T A += np.tril(tmp, -1).T
def symmetrify_murray(A): def symmetrify_numpy(A, upper=False):
A += A.T """
nn = A.shape[0] Force a matrix to be symmetric
A[[range(nn), range(nn)]] /= 2.0 """
triu = np.triu_indices_from(A,k=1)
if upper:
A.T[triu] = A[triu]
else:
A[triu] = A.T[triu]
def cholupdate(L, x): def cholupdate(L, x):
""" """

View file

@ -2,7 +2,6 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from scipy import weave
from config import * from config import *
def chain_1(df_dg, dg_dx): def chain_1(df_dg, dg_dx):
@ -84,96 +83,6 @@ def kmm_init(X, m = 10):
inducing = np.array(inducing) inducing = np.array(inducing)
return X[inducing] return X[inducing]
def fast_array_equal(A, B):
if config.getboolean('parallel', 'openmp'):
pragma_string = '#pragma omp parallel for private(i, j)'
else:
pragma_string = ''
code2="""
int i, j;
return_val = 1;
%s
for(i=0;i<N;i++){
for(j=0;j<D;j++){
if(A(i, j) != B(i, j)){
return_val = 0;
break;
}
}
}
""" % pragma_string
if config.getboolean('parallel', 'openmp'):
pragma_string = '#pragma omp parallel for private(i, j, z)'
else:
pragma_string = ''
code3="""
int i, j, z;
return_val = 1;
%s
for(i=0;i<N;i++){
for(j=0;j<D;j++){
for(z=0;z<Q;z++){
if(A(i, j, z) != B(i, j, z)){
return_val = 0;
break;
}
}
}
}
""" % pragma_string
if config.getboolean('parallel', 'openmp'):
header_string = '#include <omp.h>'
else:
header_string = ''
support_code = """
%s
#include <math.h>
""" % header_string
weave_options_openmp = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'],
'extra_link_args' : ['-lgomp'],
'libraries': ['gomp']}
weave_options_noopenmp = {'extra_compile_args': ['-O3']}
if config.getboolean('parallel', 'openmp'):
weave_options = weave_options_openmp
else:
weave_options = weave_options_noopenmp
value = False
if (A == None) and (B == None):
return True
elif ((A == None) and (B != None)) or ((A != None) and (B == None)):
return False
elif A.shape == B.shape:
if A.ndim == 2:
N, D = [int(i) for i in A.shape]
value = weave.inline(code2, support_code=support_code,
arg_names=['A', 'B', 'N', 'D'],
type_converters=weave.converters.blitz, **weave_options)
elif A.ndim == 3:
N, D, Q = [int(i) for i in A.shape]
value = weave.inline(code3, support_code=support_code,
arg_names=['A', 'B', 'N', 'D', 'Q'],
type_converters=weave.converters.blitz, **weave_options)
else:
value = np.array_equal(A,B)
return value
### make a parameter to its corresponding array: ### make a parameter to its corresponding array:
def param_to_array(*param): def param_to_array(*param):
""" """
@ -181,6 +90,8 @@ Convert an arbitrary number of parameters to :class:ndarray class objects. This
converting parameter objects to numpy arrays, when using scipy.weave.inline routine. converting parameter objects to numpy arrays, when using scipy.weave.inline routine.
In scipy.weave.blitz there is no automatic array detection (even when the array inherits In scipy.weave.blitz there is no automatic array detection (even when the array inherits
from :class:ndarray)""" from :class:ndarray)"""
import warnings
warnings.warn("Please use param.values, as this function will be deprecated in the next release.", DeprecationWarning)
assert len(param) > 0, "At least one parameter needed" assert len(param) > 0, "At least one parameter needed"
if len(param) == 1: if len(param) == 1:
return param[0].view(np.ndarray) return param[0].view(np.ndarray)

View file

@ -11,14 +11,16 @@ try:
except: except:
pass pass
from numpy.linalg.linalg import LinAlgError from numpy.linalg.linalg import LinAlgError
from operator import setitem
import itertools
class pca(object): class pca(object):
""" """
pca module with automatic primal/dual determination. pca module with automatic primal/dual determination.
""" """
def __init__(self, X): def __init__(self, X):
self.mu = X.mean(0) self.mu = None
self.sigma = X.std(0) self.sigma = None
X = self.center(X) X = self.center(X)
@ -39,6 +41,13 @@ class pca(object):
""" """
Center `X` in pca space. Center `X` in pca space.
""" """
X = X.copy()
inan = numpy.isnan(X)
if self.mu is None:
X_ = numpy.ma.masked_array(X, inan)
self.mu = X_.mean(0).base
self.sigma = X_.std(0).base
reduce(lambda y,x: setitem(x[0], x[1], x[2]), itertools.izip(X.T, inan.T, self.mu), None)
X = X - self.mu X = X - self.mu
X = X / numpy.where(self.sigma == 0, 1e-30, self.sigma) X = X / numpy.where(self.sigma == 0, 1e-30, self.sigma)
return X return X
@ -94,7 +103,7 @@ class pca(object):
fignum=None, cmap=None, # @UndefinedVariable fignum=None, cmap=None, # @UndefinedVariable
** kwargs): ** kwargs):
""" """
Plot dimensions `dimensions` with given labels against each other in Plot dimensions `dimensions` with given labels against each other in
PC space. Labels can be any sequence of labels of dimensions X.shape[0]. PC space. Labels can be any sequence of labels of dimensions X.shape[0].
Labels can be drawn with a subsequent call to legend() Labels can be drawn with a subsequent call to legend()
""" """

View file

@ -1,367 +0,0 @@
import sys
import numpy as np
import sympy as sym
from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma, polygamma
from sympy.matrices import Matrix
########################################
## Try to do some matrix functions: problem, you can't do derivatives
## with respect to matrix functions :-(
class GPySymMatrix(Matrix):
def __init__(self, indices):
Matrix.__init__(self)
def atoms(self):
return [e2 for e in self for e2 in e.atoms()]
class selector(Function):
"""A function that returns an element of a Matrix depending on input indices."""
nargs = 3
def fdiff(self, argindex=1):
return selector(*self.args)
@classmethod
def eval(cls, X, i, j):
if i.is_Number and j.is_Number:
return X[i, j]
##################################################
class logistic(Function):
"""The logistic function as a symbolic function."""
nargs = 1
def fdiff(self, argindex=1):
x = self.args[0]
return logistic(x)*(1-logistic(x))
@classmethod
def eval(cls, x):
if x.is_Number:
return 1/(1+exp(-x))
class logisticln(Function):
"""The log logistic, which can often be computed with more precision than the simply taking log(logistic(x)) when x is small or large."""
nargs = 1
def fdiff(self, argindex=1):
x = self.args[0]
return 1-logistic(x)
@classmethod
def eval(cls, x):
if x.is_Number:
return -np.log(1+exp(-x))
class erfc(Function):
"""The complementary error function, erfc(x) = 1-erf(x). Used as a helper function, particularly for erfcx, the scaled complementary error function. and the normal distributions cdf."""
nargs = 1
@classmethod
def eval(cls, arg):
return 1-erf(arg)
class erfcx(Function):
nargs = 1
def fdiff(self, argindex=1):
x = self.args[0]
return x*erfcx(x)-2/sqrt(pi)
@classmethod
def eval(cls, x):
if x.is_Number:
return exp(x**2)*erfc(x)
class gammaln(Function):
"""The log of the gamma function, which is often needed instead of log(gamma(x)) for better accuracy for large x."""
nargs = 1
def fdiff(self, argindex=1):
x=self.args[0]
return polygamma(0, x)
@classmethod
def eval(cls, x):
if x.is_Number:
return log(gamma(x))
class normcdfln(Function):
"""The log of the normal cdf. Can often be computed with better accuracy than log(normcdf(x)), particulary when x is either small or large."""
nargs = 1
def fdiff(self, argindex=1):
x = self.args[0]
#return -erfcx(-x/sqrt(2))/sqrt(2*pi)
#return exp(-normcdfln(x) - 0.5*x*x)/sqrt(2*pi)
return sqrt(2/pi)*1/erfcx(-x/sqrt(2))
@classmethod
def eval(cls, x):
if x.is_Number:
return log(normcdf(x))
def _eval_is_real(self):
return self.args[0].is_real
class normcdf(Function):
"""The cumulative distribution function of the standard normal. Provided as a convenient helper function. It is computed throught -0.5*erfc(-x/sqrt(2))."""
nargs = 1
def fdiff(self, argindex=1):
x = self.args[0]
return gaussian(x)
@classmethod
def eval(cls, x):
if x.is_Number:
return 0.5*(erfc(-x/sqrt(2)))
def _eval_is_real(self):
return self.args[0].is_real
class normalln(Function):
"""The log of the standard normal distribution."""
nargs = 1
def fdiff(self, argindex=1):
x = self.args[0]
return -x
@classmethod
def eval(cls, x):
if x.is_Number:
return 0.5*sqrt(2*pi) - 0.5*x*x
def _eval_is_real(self):
return True
class normal(Function):
"""The standard normal distribution. Provided as a convenience function."""
nargs = 1
@classmethod
def eval(cls, x):
return 1/sqrt(2*pi)*exp(-0.5*x*x)
def _eval_is_real(self):
return True
class differfln(Function):
nargs = 2
def fdiff(self, argindex=2):
if argindex == 2:
x0, x1 = self.args
return -2/(sqrt(pi)*(erfcx(x1)-exp(x1**2-x0**2)*erfcx(x0)))
elif argindex == 1:
x0, x1 = self.args
return 2/(sqrt(pi)*(exp(x0**2-x1**2)*erfcx(x1)-erfcx(x0)))
else:
raise ArgumentIndexError(self, argindex)
@classmethod
def eval(cls, x0, x1):
if x0.is_Number and x1.is_Number:
return log(erfc(x1)-erfc(x0))
class dh_dd_i(Function):
nargs = 5
@classmethod
def eval(cls, t, tprime, d_i, d_j, l):
if (t.is_Number
and tprime.is_Number
and d_i.is_Number
and d_j.is_Number
and l.is_Number):
diff_t = (t-tprime)
l2 = l*l
h = h(t, tprime, d_i, d_j, l)
half_l_di = 0.5*l*d_i
arg_1 = half_l_di + tprime/l
arg_2 = half_l_di - (t-tprime)/l
ln_part_1 = ln_diff_erf(arg_1, arg_2)
arg_1 = half_l_di
arg_2 = half_l_di - t/l
sign_val = sign(t/l)
ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l)
base = ((0.5*d_i*l2*(d_i+d_j)-1)*h
+ (-diff_t*sign_val*exp(half_l_di*half_l_di
-d_i*diff_t
+ln_part_1)
+t*sign_val*exp(half_l_di*half_l_di
-d_i*t-d_j*tprime
+ln_part_2))
+ l/sqrt(pi)*(-exp(-diff_t*diff_t/l2)
+exp(-tprime*tprime/l2-d_i*t)
+exp(-t*t/l2-d_j*tprime)
-exp(-(d_i*t + d_j*tprime))))
return base/(d_i+d_j)
class dh_dd_j(Function):
nargs = 5
@classmethod
def eval(cls, t, tprime, d_i, d_j, l):
if (t.is_Number
and tprime.is_Number
and d_i.is_Number
and d_j.is_Number
and l.is_Number):
diff_t = (t-tprime)
l2 = l*l
half_l_di = 0.5*l*d_i
h = h(t, tprime, d_i, d_j, l)
arg_1 = half_l_di + tprime/l
arg_2 = half_l_di - (t-tprime)/l
ln_part_1 = ln_diff_erf(arg_1, arg_2)
arg_1 = half_l_di
arg_2 = half_l_di - t/l
sign_val = sign(t/l)
ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l)
sign_val = sign(t/l)
base = tprime*sign_val*exp(half_l_di*half_l_di-(d_i*t+d_j*tprime)+ln_part_2)-h
return base/(d_i+d_j)
class dh_dl(Function):
nargs = 5
@classmethod
def eval(cls, t, tprime, d_i, d_j, l):
if (t.is_Number
and tprime.is_Number
and d_i.is_Number
and d_j.is_Number
and l.is_Number):
diff_t = (t-tprime)
l2 = l*l
h = h(t, tprime, d_i, d_j, l)
return 0.5*d_i*d_i*l*h + 2./(sqrt(pi)*(d_i+d_j))*((-diff_t/l2-d_i/2.)*exp(-diff_t*diff_t/l2)+(-tprime/l2+d_i/2.)*exp(-tprime*tprime/l2-d_i*t)-(-t/l2-d_i/2.)*exp(-t*t/l2-d_j*tprime)-d_i/2.*exp(-(d_i*t+d_j*tprime)))
class dh_dt(Function):
nargs = 5
@classmethod
def eval(cls, t, tprime, d_i, d_j, l):
if (t.is_Number
and tprime.is_Number
and d_i.is_Number
and d_j.is_Number
and l.is_Number):
if (t is S.NaN
or tprime is S.NaN
or d_i is S.NaN
or d_j is S.NaN
or l is S.NaN):
return S.NaN
else:
half_l_di = 0.5*l*d_i
arg_1 = half_l_di + tprime/l
arg_2 = half_l_di - (t-tprime)/l
ln_part_1 = ln_diff_erf(arg_1, arg_2)
arg_1 = half_l_di
arg_2 = half_l_di - t/l
sign_val = sign(t/l)
ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l)
return (sign_val*exp(half_l_di*half_l_di
- d_i*(t-tprime)
+ ln_part_1
- log(d_i + d_j))
- sign_val*exp(half_l_di*half_l_di
- d_i*t - d_j*tprime
+ ln_part_2
- log(d_i + d_j))).diff(t)
class dh_dtprime(Function):
nargs = 5
@classmethod
def eval(cls, t, tprime, d_i, d_j, l):
if (t.is_Number
and tprime.is_Number
and d_i.is_Number
and d_j.is_Number
and l.is_Number):
if (t is S.NaN
or tprime is S.NaN
or d_i is S.NaN
or d_j is S.NaN
or l is S.NaN):
return S.NaN
else:
half_l_di = 0.5*l*d_i
arg_1 = half_l_di + tprime/l
arg_2 = half_l_di - (t-tprime)/l
ln_part_1 = ln_diff_erf(arg_1, arg_2)
arg_1 = half_l_di
arg_2 = half_l_di - t/l
sign_val = sign(t/l)
ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l)
return (sign_val*exp(half_l_di*half_l_di
- d_i*(t-tprime)
+ ln_part_1
- log(d_i + d_j))
- sign_val*exp(half_l_di*half_l_di
- d_i*t - d_j*tprime
+ ln_part_2
- log(d_i + d_j))).diff(tprime)
class h(Function):
nargs = 5
def fdiff(self, argindex=5):
t, tprime, d_i, d_j, l = self.args
if argindex == 1:
return dh_dt(t, tprime, d_i, d_j, l)
elif argindex == 2:
return dh_dtprime(t, tprime, d_i, d_j, l)
elif argindex == 3:
return dh_dd_i(t, tprime, d_i, d_j, l)
elif argindex == 4:
return dh_dd_j(t, tprime, d_i, d_j, l)
elif argindex == 5:
return dh_dl(t, tprime, d_i, d_j, l)
@classmethod
def eval(cls, t, tprime, d_i, d_j, l):
if (t.is_Number
and tprime.is_Number
and d_i.is_Number
and d_j.is_Number
and l.is_Number):
if (t is S.NaN
or tprime is S.NaN
or d_i is S.NaN
or d_j is S.NaN
or l is S.NaN):
return S.NaN
else:
half_l_di = 0.5*l*d_i
arg_1 = half_l_di + tprime/l
arg_2 = half_l_di - (t-tprime)/l
ln_part_1 = ln_diff_erf(arg_1, arg_2)
arg_1 = half_l_di
arg_2 = half_l_di - t/l
sign_val = sign(t/l)
ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l)
return (sign_val*exp(half_l_di*half_l_di
- d_i*(t-tprime)
+ ln_part_1
- log(d_i + d_j))
- sign_val*exp(half_l_di*half_l_di
- d_i*t - d_j*tprime
+ ln_part_2
- log(d_i + d_j)))
# return (exp((d_j/2.*l)**2)/(d_i+d_j)
# *(exp(-d_j*(tprime - t))
# *(erf((tprime-t)/l - d_j/2.*l)
# + erf(t/l + d_j/2.*l))
# - exp(-(d_j*tprime + d_i))
# *(erf(tprime/l - d_j/2.*l)
# + erf(d_j/2.*l))))

View file

@ -12,6 +12,39 @@ def std_norm_cdf(x):
""" """
Cumulative standard Gaussian distribution Cumulative standard Gaussian distribution
Based on Abramowitz, M. and Stegun, I. (1970) Based on Abramowitz, M. and Stegun, I. (1970)
"""
x_shape = np.asarray(x).shape
if len(x_shape) == 0 or x_shape[0] == 1:
sign = np.sign(x)
x *= sign
x /= np.sqrt(2.)
t = 1.0/(1.0 + 0.3275911*x)
erf = 1. - np.exp(-x**2)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))))
cdf_x = 0.5*(1.0 + sign*erf)
return cdf_x
else:
x = np.atleast_1d(x).copy()
cdf_x = np.zeros_like(x)
sign = np.ones_like(x)
neg_x_ind = x<0
sign[neg_x_ind] = -1.0
x[neg_x_ind] = -x[neg_x_ind]
x /= np.sqrt(2.)
t = 1.0/(1.0 + 0.3275911*x)
erf = 1. - np.exp(-x**2)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))))
cdf_x = 0.5*(1.0 + sign*erf)
cdf_x = cdf_x.reshape(x_shape)
return cdf_x
def std_norm_cdf_weave(x):
"""
Cumulative standard Gaussian distribution
Based on Abramowitz, M. and Stegun, I. (1970)
A weave implementation of std_norm_cdf, which is faster. this is unused,
because of the difficulties of a weave dependency. (see github issue #94)
""" """
#Generalize for many x #Generalize for many x
x = np.asarray(x).copy() x = np.asarray(x).copy()
@ -40,37 +73,6 @@ def std_norm_cdf(x):
weave.inline(code, arg_names=['x', 'cdf_x', 'N'], support_code=support_code) weave.inline(code, arg_names=['x', 'cdf_x', 'N'], support_code=support_code)
return cdf_x return cdf_x
def std_norm_cdf_np(x):
"""
Cumulative standard Gaussian distribution
Based on Abramowitz, M. and Stegun, I. (1970)
Around 3 times slower when x is a scalar otherwise quite a lot slower
"""
x_shape = np.asarray(x).shape
if len(x_shape) == 0 or x_shape[0] == 1:
sign = np.sign(x)
x *= sign
x /= np.sqrt(2.)
t = 1.0/(1.0 + 0.3275911*x)
erf = 1. - np.exp(-x**2)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))))
cdf_x = 0.5*(1.0 + sign*erf)
return cdf_x
else:
x = np.atleast_1d(x).copy()
cdf_x = np.zeros_like(x)
sign = np.ones_like(x)
neg_x_ind = x<0
sign[neg_x_ind] = -1.0
x[neg_x_ind] = -x[neg_x_ind]
x /= np.sqrt(2.)
t = 1.0/(1.0 + 0.3275911*x)
erf = 1. - np.exp(-x**2)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))))
cdf_x = 0.5*(1.0 + sign*erf)
cdf_x = cdf_x.reshape(x_shape)
return cdf_x
def inv_std_norm_cdf(x): def inv_std_norm_cdf(x):
""" """
Inverse cumulative standard Gaussian distribution Inverse cumulative standard Gaussian distribution