mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-15 06:52:39 +02:00
Merge branch 'devel' of https://github.com/SheffieldML/GPy into devel
This commit is contained in:
commit
e3b6d9c9c5
49 changed files with 1817 additions and 867 deletions
112
GPy/core/gp.py
112
GPy/core/gp.py
|
|
@ -10,7 +10,7 @@ from model import Model
|
|||
from parameterization import ObsAr
|
||||
from .. import likelihoods
|
||||
from ..likelihoods.gaussian import Gaussian
|
||||
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
|
||||
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation, LatentFunctionInference
|
||||
from parameterization.variational import VariationalPosterior
|
||||
|
||||
class GP(Model):
|
||||
|
|
@ -21,6 +21,7 @@ class GP(Model):
|
|||
:param Y: output observations
|
||||
:param kernel: a GPy kernel, defaults to rbf+white
|
||||
:param likelihood: a GPy likelihood
|
||||
:param :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference_method: The inference method to use for this GP
|
||||
:rtype: model object
|
||||
|
||||
.. Note:: Multiple independent outputs are allowed using columns of Y
|
||||
|
|
@ -32,13 +33,13 @@ class GP(Model):
|
|||
|
||||
assert X.ndim == 2
|
||||
if isinstance(X, (ObsAr, VariationalPosterior)):
|
||||
self.X = X
|
||||
else: self.X = ObsAr(X)
|
||||
self.X = X.copy()
|
||||
else: self.X = ObsAr(X.copy())
|
||||
|
||||
self.num_data, self.input_dim = self.X.shape
|
||||
|
||||
assert Y.ndim == 2
|
||||
self.Y = ObsAr(Y)
|
||||
self.Y = ObsAr(Y.copy())
|
||||
assert Y.shape[0] == self.num_data
|
||||
_, self.output_dim = self.Y.shape
|
||||
|
||||
|
|
@ -179,40 +180,80 @@ class GP(Model):
|
|||
|
||||
return Ysim
|
||||
|
||||
def plot_f(self, *args, **kwargs):
|
||||
def plot_f(self, plot_limits=None, which_data_rows='all',
|
||||
which_data_ycols='all', fixed_inputs=[],
|
||||
levels=20, samples=0, fignum=None, ax=None, resolution=None,
|
||||
plot_raw=True,
|
||||
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'):
|
||||
"""
|
||||
|
||||
Plot the GP's view of the world, where the data is normalized and
|
||||
before applying a likelihood.
|
||||
|
||||
This is a convenience function: arguments are passed to
|
||||
GPy.plotting.matplot_dep.models_plots.plot_f_fit
|
||||
|
||||
Plot the GP's view of the world, where the data is normalized and before applying a likelihood.
|
||||
This is a call to plot with plot_raw=True.
|
||||
Data will not be plotted in this, as the GP's view of the world
|
||||
may live in another space, or units then the data.
|
||||
"""
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import models_plots
|
||||
return models_plots.plot_fit_f(self,*args,**kwargs)
|
||||
kw = {}
|
||||
if linecol is not None:
|
||||
kw['linecol'] = linecol
|
||||
if fillcol is not None:
|
||||
kw['fillcol'] = fillcol
|
||||
return models_plots.plot_fit(self, plot_limits, which_data_rows,
|
||||
which_data_ycols, fixed_inputs,
|
||||
levels, samples, fignum, ax, resolution,
|
||||
plot_raw=plot_raw, Y_metadata=Y_metadata,
|
||||
data_symbol=data_symbol, **kw)
|
||||
|
||||
def plot(self, *args, **kwargs):
|
||||
def plot(self, plot_limits=None, which_data_rows='all',
|
||||
which_data_ycols='all', fixed_inputs=[],
|
||||
levels=20, samples=0, fignum=None, ax=None, resolution=None,
|
||||
plot_raw=False,
|
||||
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'):
|
||||
"""
|
||||
Plot the posterior of the GP.
|
||||
- In one dimension, the function is plotted with a shaded region
|
||||
identifying two standard deviations.
|
||||
- In two dimsensions, a contour-plot shows the mean predicted
|
||||
function
|
||||
- In higher dimensions, use fixed_inputs to plot the GP with some of
|
||||
the inputs fixed.
|
||||
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
||||
- In two dimsensions, a contour-plot shows the mean predicted function
|
||||
- In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed.
|
||||
|
||||
Can plot only part of the data and part of the posterior functions
|
||||
using which_data_rows which_data_ycols and which_parts
|
||||
|
||||
This is a convenience function: arguments are passed to
|
||||
GPy.plotting.matplot_dep.models_plots.plot_fit
|
||||
using which_data_rowsm which_data_ycols.
|
||||
|
||||
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
||||
:type plot_limits: np.array
|
||||
:param which_data_rows: which of the training data to plot (default all)
|
||||
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
|
||||
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
|
||||
:type which_data_rows: 'all' or a list of integers
|
||||
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
|
||||
:type fixed_inputs: a list of tuples
|
||||
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
||||
:type resolution: int
|
||||
:param levels: number of levels to plot in a contour plot.
|
||||
:type levels: int
|
||||
:param samples: the number of a posteriori samples to plot
|
||||
:type samples: int
|
||||
:param fignum: figure to plot on.
|
||||
:type fignum: figure number
|
||||
:param ax: axes to plot on.
|
||||
:type ax: axes handle
|
||||
:type output: integer (first output is 0)
|
||||
:param linecol: color of line to plot [Tango.colorsHex['darkBlue']]
|
||||
:type linecol:
|
||||
:param fillcol: color of fill [Tango.colorsHex['lightBlue']]
|
||||
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
|
||||
"""
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import models_plots
|
||||
return models_plots.plot_fit(self,*args,**kwargs)
|
||||
kw = {}
|
||||
if linecol is not None:
|
||||
kw['linecol'] = linecol
|
||||
if fillcol is not None:
|
||||
kw['fillcol'] = fillcol
|
||||
return models_plots.plot_fit(self, plot_limits, which_data_rows,
|
||||
which_data_ycols, fixed_inputs,
|
||||
levels, samples, fignum, ax, resolution,
|
||||
plot_raw=plot_raw, Y_metadata=Y_metadata,
|
||||
data_symbol=data_symbol, **kw)
|
||||
|
||||
def input_sensitivity(self):
|
||||
"""
|
||||
|
|
@ -220,3 +261,24 @@ class GP(Model):
|
|||
"""
|
||||
return self.kern.input_sensitivity()
|
||||
|
||||
def optimize(self, optimizer=None, start=None, **kwargs):
|
||||
"""
|
||||
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
|
||||
kwargs are passed to the optimizer. They can be:
|
||||
|
||||
:param max_f_eval: maximum number of function evaluations
|
||||
:type max_f_eval: int
|
||||
:messages: whether to display during optimisation
|
||||
:type messages: bool
|
||||
:param optimizer: which optimizer to use (defaults to self.preferred optimizer)
|
||||
:type optimizer: string
|
||||
|
||||
TODO: valid args
|
||||
"""
|
||||
self.inference_method.on_optimization_start()
|
||||
try:
|
||||
super(GP, self).optimize(optimizer, start, **kwargs)
|
||||
except KeyboardInterrupt:
|
||||
print "KeyboardInterrupt caught, calling on_optimization_end() to round things up"
|
||||
self.inference_method.on_optimization_end()
|
||||
raise
|
||||
|
|
@ -20,7 +20,7 @@ class Model(Parameterized):
|
|||
super(Model, self).__init__(name) # Parameterized.__init__(self)
|
||||
self.optimization_runs = []
|
||||
self.sampling_runs = []
|
||||
self.preferred_optimizer = 'scg'
|
||||
self.preferred_optimizer = 'bfgs'
|
||||
|
||||
def log_likelihood(self):
|
||||
raise NotImplementedError, "this needs to be implemented to use the model class"
|
||||
|
|
@ -61,7 +61,7 @@ class Model(Parameterized):
|
|||
on the current machine.
|
||||
|
||||
"""
|
||||
initial_parameters = self._get_params_transformed()
|
||||
initial_parameters = self.optimizer_array
|
||||
|
||||
if parallel:
|
||||
try:
|
||||
|
|
@ -124,13 +124,15 @@ class Model(Parameterized):
|
|||
|
||||
For probabilistic models this is the negative log_likelihood
|
||||
(including the MAP prior), so we return it here. If your model is not
|
||||
probabilistic, just return your objective here!
|
||||
probabilistic, just return your objective to minimize here!
|
||||
"""
|
||||
return -float(self.log_likelihood()) - self.log_prior()
|
||||
|
||||
def objective_function_gradients(self):
|
||||
"""
|
||||
The gradients for the objective function for the given algorithm.
|
||||
The gradients are w.r.t. the *negative* objective function, as
|
||||
this framework works with *negative* log-likelihoods as a default.
|
||||
|
||||
You can find the gradient for the parameters in self.gradient at all times.
|
||||
This is the place, where gradients get stored for parameters.
|
||||
|
|
@ -141,7 +143,7 @@ class Model(Parameterized):
|
|||
|
||||
For probabilistic models this is the gradient of the negative log_likelihood
|
||||
(including the MAP prior), so we return it here. If your model is not
|
||||
probabilistic, just return your gradient here!
|
||||
probabilistic, just return your *negative* gradient here!
|
||||
"""
|
||||
return -(self._log_likelihood_gradients() + self._log_prior_gradients())
|
||||
|
||||
|
|
@ -157,7 +159,8 @@ class Model(Parameterized):
|
|||
:type x: np.array
|
||||
"""
|
||||
try:
|
||||
self._set_params_transformed(x)
|
||||
# self._set_params_transformed(x)
|
||||
self.optimizer_array = x
|
||||
obj_grads = self._transform_gradients(self.objective_function_gradients())
|
||||
self._fail_count = 0
|
||||
except (LinAlgError, ZeroDivisionError, ValueError):
|
||||
|
|
@ -180,7 +183,7 @@ class Model(Parameterized):
|
|||
:parameter type: np.array
|
||||
"""
|
||||
try:
|
||||
self._set_params_transformed(x)
|
||||
self.optimizer_array = x
|
||||
obj = self.objective_function()
|
||||
self._fail_count = 0
|
||||
except (LinAlgError, ZeroDivisionError, ValueError):
|
||||
|
|
@ -192,7 +195,7 @@ class Model(Parameterized):
|
|||
|
||||
def _objective_grads(self, x):
|
||||
try:
|
||||
self._set_params_transformed(x)
|
||||
self.optimizer_array = x
|
||||
obj_f, obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients())
|
||||
self._fail_count = 0
|
||||
except (LinAlgError, ZeroDivisionError, ValueError):
|
||||
|
|
@ -220,13 +223,13 @@ class Model(Parameterized):
|
|||
if self.is_fixed:
|
||||
raise RuntimeError, "Cannot optimize, when everything is fixed"
|
||||
if self.size == 0:
|
||||
raise RuntimeError, "Model without parameters cannot be minimized"
|
||||
raise RuntimeError, "Model without parameters cannot be optimized"
|
||||
|
||||
if optimizer is None:
|
||||
optimizer = self.preferred_optimizer
|
||||
|
||||
if start == None:
|
||||
start = self._get_params_transformed()
|
||||
start = self.optimizer_array
|
||||
|
||||
optimizer = optimization.get_optimizer(optimizer)
|
||||
opt = optimizer(start, model=self, **kwargs)
|
||||
|
|
@ -235,7 +238,7 @@ class Model(Parameterized):
|
|||
|
||||
self.optimization_runs.append(opt)
|
||||
|
||||
self._set_params_transformed(opt.x_opt)
|
||||
self.optimizer_array = opt.x_opt
|
||||
|
||||
def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
|
||||
# assert self.Y.shape[1] > 1, "SGD only works with D > 1"
|
||||
|
|
@ -260,7 +263,7 @@ class Model(Parameterized):
|
|||
The gradient is considered correct if the ratio of the analytical
|
||||
and numerical gradients is within <tolerance> of unity.
|
||||
"""
|
||||
x = self._get_params_transformed().copy()
|
||||
x = self.optimizer_array.copy()
|
||||
|
||||
if not verbose:
|
||||
# make sure only to test the selected parameters
|
||||
|
|
@ -270,8 +273,8 @@ class Model(Parameterized):
|
|||
transformed_index = self._raveled_index_for(target_param)
|
||||
if self._has_fixes():
|
||||
indices = np.r_[:self.size]
|
||||
which = (transformed_index[:,None]==indices[self._fixes_][None,:]).nonzero()
|
||||
transformed_index = (indices-(~self._fixes_).cumsum())[transformed_index[which[0]]]
|
||||
which = (transformed_index[:, None] == indices[self._fixes_][None, :]).nonzero()
|
||||
transformed_index = (indices - (~self._fixes_).cumsum())[transformed_index[which[0]]]
|
||||
|
||||
if transformed_index.size == 0:
|
||||
print "No free parameters to check"
|
||||
|
|
@ -290,7 +293,7 @@ class Model(Parameterized):
|
|||
gradient = gradient[transformed_index]
|
||||
|
||||
denominator = (2 * np.dot(dx, gradient))
|
||||
global_ratio = (f1 - f2) / np.where(denominator==0., 1e-32, denominator)
|
||||
global_ratio = (f1 - f2) / np.where(denominator == 0., 1e-32, denominator)
|
||||
global_diff = np.abs(f1 - f2) < tolerance and np.allclose(gradient, 0, atol=tolerance)
|
||||
if global_ratio is np.nan:
|
||||
global_ratio = 0
|
||||
|
|
@ -319,10 +322,10 @@ class Model(Parameterized):
|
|||
param_index = self._raveled_index_for(target_param)
|
||||
if self._has_fixes():
|
||||
indices = np.r_[:self.size]
|
||||
which = (param_index[:,None]==indices[self._fixes_][None,:]).nonzero()
|
||||
which = (param_index[:, None] == indices[self._fixes_][None, :]).nonzero()
|
||||
param_index = param_index[which[0]]
|
||||
transformed_index = (indices-(~self._fixes_).cumsum())[param_index]
|
||||
#print param_index, transformed_index
|
||||
transformed_index = (indices - (~self._fixes_).cumsum())[param_index]
|
||||
# print param_index, transformed_index
|
||||
else:
|
||||
transformed_index = param_index
|
||||
|
||||
|
|
@ -340,7 +343,7 @@ class Model(Parameterized):
|
|||
xx[xind] -= 2.*step
|
||||
f2 = self._objective(xx)
|
||||
numerical_gradient = (f1 - f2) / (2 * step)
|
||||
if np.all(gradient[xind]==0): ratio = (f1-f2) == gradient[xind]
|
||||
if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind]
|
||||
else: ratio = (f1 - f2) / (2 * step * gradient[xind])
|
||||
difference = np.abs((f1 - f2) / 2 / step - gradient[xind])
|
||||
|
||||
|
|
@ -358,7 +361,7 @@ class Model(Parameterized):
|
|||
grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
|
||||
print grad_string
|
||||
|
||||
self._set_params_transformed(x)
|
||||
self.optimizer_array = x
|
||||
return ret
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,20 @@ import numpy
|
|||
from numpy.lib.function_base import vectorize
|
||||
from lists_and_dicts import IntArrayDict
|
||||
|
||||
def extract_properties_to_index(index, props):
|
||||
prop_index = dict()
|
||||
for i, cl in enumerate(props):
|
||||
for c in cl:
|
||||
ind = prop_index.get(c, list())
|
||||
ind.append(index[i])
|
||||
prop_index[c] = ind
|
||||
|
||||
for c, i in prop_index.items():
|
||||
prop_index[c] = numpy.array(i, dtype=int)
|
||||
|
||||
return prop_index
|
||||
|
||||
|
||||
class ParameterIndexOperations(object):
|
||||
'''
|
||||
Index operations for storing param index _properties
|
||||
|
|
@ -66,8 +80,34 @@ class ParameterIndexOperations(object):
|
|||
return self._properties.values()
|
||||
|
||||
def properties_for(self, index):
|
||||
"""
|
||||
Returns a list of properties, such that each entry in the list corresponds
|
||||
to the element of the index given.
|
||||
|
||||
Example:
|
||||
let properties: 'one':[1,2,3,4], 'two':[3,5,6]
|
||||
|
||||
>>> properties_for([2,3,5])
|
||||
[['one'], ['one', 'two'], ['two']]
|
||||
"""
|
||||
return vectorize(lambda i: [prop for prop in self.iterproperties() if i in self[prop]], otypes=[list])(index)
|
||||
|
||||
def properties_to_index_dict(self, index):
|
||||
"""
|
||||
Return a dictionary, containing properties as keys and indices as index
|
||||
Thus, the indices for each constraint, which is contained will be collected as
|
||||
one dictionary
|
||||
|
||||
Example:
|
||||
let properties: 'one':[1,2,3,4], 'two':[3,5,6]
|
||||
|
||||
>>> properties_to_index_dict([2,3,5])
|
||||
{'one':[2,3], 'two':[3,5]}
|
||||
"""
|
||||
props = self.properties_for(index)
|
||||
prop_index = extract_properties_to_index(index, props)
|
||||
return prop_index
|
||||
|
||||
def add(self, prop, indices):
|
||||
self._properties[prop] = combine_indices(self._properties[prop], indices)
|
||||
|
||||
|
|
@ -174,8 +214,32 @@ class ParameterIndexOperationsView(object):
|
|||
|
||||
|
||||
def properties_for(self, index):
|
||||
"""
|
||||
Returns a list of properties, such that each entry in the list corresponds
|
||||
to the element of the index given.
|
||||
|
||||
Example:
|
||||
let properties: 'one':[1,2,3,4], 'two':[3,5,6]
|
||||
|
||||
>>> properties_for([2,3,5])
|
||||
[['one'], ['one', 'two'], ['two']]
|
||||
"""
|
||||
return vectorize(lambda i: [prop for prop in self.iterproperties() if i in self[prop]], otypes=[list])(index)
|
||||
|
||||
def properties_to_index_dict(self, index):
|
||||
"""
|
||||
Return a dictionary, containing properties as keys and indices as index
|
||||
Thus, the indices for each constraint, which is contained will be collected as
|
||||
one dictionary
|
||||
|
||||
Example:
|
||||
let properties: 'one':[1,2,3,4], 'two':[3,5,6]
|
||||
|
||||
>>> properties_to_index_dict([2,3,5])
|
||||
{'one':[2,3], 'two':[3,5]}
|
||||
"""
|
||||
return extract_properties_to_index(index, self.properties_for(index))
|
||||
|
||||
|
||||
def add(self, prop, indices):
|
||||
self._param_index_ops.add(prop, indices+self._offset)
|
||||
|
|
|
|||
|
|
@ -38,7 +38,12 @@ class ArrayList(list):
|
|||
raise ValueError, "{} is not in list".format(item)
|
||||
pass
|
||||
|
||||
class ObservablesList(object):
|
||||
class ObserverList(object):
|
||||
"""
|
||||
A list which containts the observables.
|
||||
It only holds weak references to observers, such that unbound
|
||||
observers dont dangle in memory.
|
||||
"""
|
||||
def __init__(self):
|
||||
self._poc = []
|
||||
|
||||
|
|
@ -46,31 +51,44 @@ class ObservablesList(object):
|
|||
p,o,c = self._poc[ind]
|
||||
return p, o(), c
|
||||
|
||||
def remove(self, priority, observable, callble):
|
||||
def remove(self, priority, observer, callble):
|
||||
"""
|
||||
Remove one observer, which had priority and callble.
|
||||
"""
|
||||
self.flush()
|
||||
for i in range(len(self) - 1, -1, -1):
|
||||
p,o,c = self[i]
|
||||
if priority==p and observable==o and callble==c:
|
||||
if priority==p and observer==o and callble==c:
|
||||
del self._poc[i]
|
||||
|
||||
def __repr__(self):
|
||||
return self._poc.__repr__()
|
||||
|
||||
|
||||
def add(self, priority, observable, callble):
|
||||
if observable is not None:
|
||||
def add(self, priority, observer, callble):
|
||||
"""
|
||||
Add an observer with priority and callble
|
||||
"""
|
||||
if observer is not None:
|
||||
ins = 0
|
||||
for pr, _, _ in self:
|
||||
if priority > pr:
|
||||
break
|
||||
ins += 1
|
||||
self._poc.insert(ins, (priority, weakref.ref(observable), callble))
|
||||
self._poc.insert(ins, (priority, weakref.ref(observer), callble))
|
||||
|
||||
def __str__(self):
|
||||
from . import ObsAr, Param
|
||||
from parameter_core import Parameterizable
|
||||
ret = []
|
||||
curr_p = None
|
||||
|
||||
def frmt(o):
|
||||
if isinstance(o, ObsAr):
|
||||
return 'ObsArr <{}>'.format(hex(id(o)))
|
||||
elif isinstance(o, (Param,Parameterizable)):
|
||||
return '{}'.format(o.hierarchy_name())
|
||||
else:
|
||||
return repr(o)
|
||||
for p, o, c in self:
|
||||
curr = ''
|
||||
if curr_p != p:
|
||||
|
|
@ -79,10 +97,14 @@ class ObservablesList(object):
|
|||
else: curr_pre = " "*len(pre)
|
||||
curr_p = p
|
||||
curr += curr_pre
|
||||
ret.append(curr + ", ".join(map(repr, [o,c])))
|
||||
return '\n'.join(ret)
|
||||
|
||||
ret.append(curr + ", ".join([frmt(o), str(c)]))
|
||||
return '\n'.join(ret)
|
||||
|
||||
def flush(self):
|
||||
"""
|
||||
Make sure all weak references, which point to nothing are flushed (deleted)
|
||||
"""
|
||||
self._poc = [(p,o,c) for p,o,c in self._poc if o() is not None]
|
||||
|
||||
def __iter__(self):
|
||||
|
|
@ -95,7 +117,7 @@ class ObservablesList(object):
|
|||
return self._poc.__len__()
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
s = ObservablesList()
|
||||
s = ObserverList()
|
||||
for p,o,c in self:
|
||||
import copy
|
||||
s.add(p, copy.deepcopy(o, memo), copy.deepcopy(c, memo))
|
||||
|
|
|
|||
|
|
@ -30,16 +30,22 @@ class ObsAr(np.ndarray, Pickleable, Observable):
|
|||
def __array_wrap__(self, out_arr, context=None):
|
||||
return out_arr.view(np.ndarray)
|
||||
|
||||
def _setup_observers(self):
|
||||
# do not setup anything, as observable arrays do not have default observers
|
||||
pass
|
||||
|
||||
def copy(self):
|
||||
from lists_and_dicts import ObserverList
|
||||
memo = {}
|
||||
memo[id(self)] = self
|
||||
memo[id(self.observers)] = ObserverList()
|
||||
return self.__deepcopy__(memo)
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
s = self.__new__(self.__class__, input_array=self.view(np.ndarray).copy())
|
||||
memo[id(self)] = s
|
||||
import copy
|
||||
s.__dict__.update(copy.deepcopy(self.__dict__, memo))
|
||||
Pickleable.__setstate__(s, copy.deepcopy(self.__getstate__(), memo))
|
||||
return s
|
||||
|
||||
def __reduce__(self):
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
import itertools
|
||||
import numpy
|
||||
np = numpy
|
||||
from parameter_core import OptimizationHandlable, adjust_name_for_printing
|
||||
from parameter_core import Parameterizable, adjust_name_for_printing, Pickleable
|
||||
from observable_array import ObsAr
|
||||
|
||||
###### printing
|
||||
|
|
@ -16,7 +16,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision
|
|||
__print_threshold__ = 5
|
||||
######
|
||||
|
||||
class Param(OptimizationHandlable, ObsAr):
|
||||
class Param(Parameterizable, ObsAr):
|
||||
"""
|
||||
Parameter object for GPy models.
|
||||
|
||||
|
|
@ -42,7 +42,7 @@ class Param(OptimizationHandlable, ObsAr):
|
|||
"""
|
||||
__array_priority__ = -1 # Never give back Param
|
||||
_fixes_ = None
|
||||
_parameters_ = []
|
||||
parameters = []
|
||||
def __new__(cls, name, input_array, default_constraint=None):
|
||||
obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array))
|
||||
obj._current_slice_ = (slice(obj.shape[0]),)
|
||||
|
|
@ -87,6 +87,9 @@ class Param(OptimizationHandlable, ObsAr):
|
|||
|
||||
@property
|
||||
def param_array(self):
|
||||
"""
|
||||
As we are a leaf, this just returns self
|
||||
"""
|
||||
return self
|
||||
|
||||
@property
|
||||
|
|
@ -139,6 +142,9 @@ class Param(OptimizationHandlable, ObsAr):
|
|||
def _raveled_index_for(self, obj):
|
||||
return self._raveled_index()
|
||||
|
||||
#===========================================================================
|
||||
# Index recreation
|
||||
#===========================================================================
|
||||
def _expand_index(self, slice_index=None):
|
||||
# this calculates the full indexing arrays from the slicing objects given by get_item for _real..._ attributes
|
||||
# it basically translates slices to their respective index arrays and turns negative indices around
|
||||
|
|
@ -147,6 +153,8 @@ class Param(OptimizationHandlable, ObsAr):
|
|||
slice_index = self._current_slice_
|
||||
def f(a):
|
||||
a, b = a
|
||||
if isinstance(a, numpy.ndarray) and a.dtype == bool:
|
||||
raise ValueError, "Boolean indexing not implemented, use Param[np.where(index)] to index by boolean arrays!"
|
||||
if a not in (slice(None), Ellipsis):
|
||||
if isinstance(a, slice):
|
||||
start, stop, step = a.indices(b)
|
||||
|
|
@ -165,34 +173,6 @@ class Param(OptimizationHandlable, ObsAr):
|
|||
def _ensure_fixes(self):
|
||||
if not self._has_fixes(): self._fixes_ = numpy.ones(self._realsize_, dtype=bool)
|
||||
|
||||
#===========================================================================
|
||||
# parameterizable
|
||||
#===========================================================================
|
||||
def traverse(self, visit, *args, **kwargs):
|
||||
"""
|
||||
Traverse the hierarchy performing visit(self, *args, **kwargs) at every node passed by.
|
||||
See "visitor pattern" in literature. This is implemented in pre-order fashion.
|
||||
|
||||
This will function will just call visit on self, as Param are leaf nodes.
|
||||
"""
|
||||
visit(self, *args, **kwargs)
|
||||
|
||||
def traverse_parents(self, visit, *args, **kwargs):
|
||||
"""
|
||||
Traverse the hierarchy upwards, visiting all parents and their children, except self.
|
||||
See "visitor pattern" in literature. This is implemented in pre-order fashion.
|
||||
|
||||
Example:
|
||||
|
||||
parents = []
|
||||
self.traverse_parents(parents.append)
|
||||
print parents
|
||||
"""
|
||||
if self.has_parent():
|
||||
self.__visited = True
|
||||
self._parent_._traverse_parents(visit, *args, **kwargs)
|
||||
self.__visited = False
|
||||
|
||||
#===========================================================================
|
||||
# Convenience
|
||||
#===========================================================================
|
||||
|
|
@ -207,14 +187,24 @@ class Param(OptimizationHandlable, ObsAr):
|
|||
#===========================================================================
|
||||
# Pickling and copying
|
||||
#===========================================================================
|
||||
def copy(self):
|
||||
return Parameterizable.copy(self, which=self)
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
s = self.__new__(self.__class__, name=self.name, input_array=self.view(numpy.ndarray).copy())
|
||||
memo[id(self)] = s
|
||||
memo[id(self)] = s
|
||||
import copy
|
||||
s.__dict__.update(copy.deepcopy(self.__dict__, memo))
|
||||
Pickleable.__setstate__(s, copy.deepcopy(self.__getstate__(), memo))
|
||||
return s
|
||||
|
||||
|
||||
def _setup_observers(self):
|
||||
"""
|
||||
Setup the default observers
|
||||
|
||||
1: pass through to parent, if present
|
||||
"""
|
||||
if self.has_parent():
|
||||
self.add_observer(self._parent_, self._parent_._pass_through_notify_observers, -np.inf)
|
||||
|
||||
#===========================================================================
|
||||
# Printing -> done
|
||||
#===========================================================================
|
||||
|
|
@ -316,7 +306,7 @@ class Param(OptimizationHandlable, ObsAr):
|
|||
class ParamConcatenation(object):
|
||||
def __init__(self, params):
|
||||
"""
|
||||
Parameter concatenation for convienience of printing regular expression matched arrays
|
||||
Parameter concatenation for convenience of printing regular expression matched arrays
|
||||
you can index this concatenation as if it was the flattened concatenation
|
||||
of all the parameters it contains, same for setting parameters (Broadcasting enabled).
|
||||
|
||||
|
|
|
|||
|
|
@ -16,8 +16,9 @@ Observable Pattern for patameterization
|
|||
from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
|
||||
import numpy as np
|
||||
import re
|
||||
import logging
|
||||
|
||||
__updated__ = '2014-05-12'
|
||||
__updated__ = '2014-05-21'
|
||||
|
||||
class HierarchyError(Exception):
|
||||
"""
|
||||
|
|
@ -49,21 +50,49 @@ class Observable(object):
|
|||
as an observer. Every time the observable changes, it sends a notification with
|
||||
self as only argument to all its observers.
|
||||
"""
|
||||
_updated = True
|
||||
_updates = True
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(Observable, self).__init__()
|
||||
from lists_and_dicts import ObservablesList
|
||||
self.observers = ObservablesList()
|
||||
from lists_and_dicts import ObserverList
|
||||
self.observers = ObserverList()
|
||||
|
||||
@property
|
||||
def updates(self):
|
||||
p = getattr(self, '_highest_parent_', None)
|
||||
if p is not None:
|
||||
self._updates = p._updates
|
||||
return self._updates
|
||||
|
||||
@updates.setter
|
||||
def updates(self, ups):
|
||||
assert isinstance(ups, bool), "updates are either on (True) or off (False)"
|
||||
p = getattr(self, '_highest_parent_', None)
|
||||
if p is not None:
|
||||
p._updates = ups
|
||||
else:
|
||||
self._updates = ups
|
||||
if ups:
|
||||
self._trigger_params_changed()
|
||||
|
||||
def add_observer(self, observer, callble, priority=0):
|
||||
"""
|
||||
Add an observer `observer` with the callback `callble`
|
||||
and priority `priority` to this observers list.
|
||||
"""
|
||||
self.observers.add(priority, observer, callble)
|
||||
|
||||
def remove_observer(self, observer, callble=None):
|
||||
"""
|
||||
Either (if callble is None) remove all callables,
|
||||
which were added alongside observer,
|
||||
or remove callable `callble` which was added alongside
|
||||
the observer `observer`.
|
||||
"""
|
||||
to_remove = []
|
||||
for poc in self.observers:
|
||||
_, obs, clble = poc
|
||||
if callble is not None:
|
||||
if (obs == observer) and (callble == clble):
|
||||
if (obs is observer) and (callble == clble):
|
||||
to_remove.append(poc)
|
||||
else:
|
||||
if obs is observer:
|
||||
|
|
@ -81,6 +110,8 @@ class Observable(object):
|
|||
:param min_priority: only notify observers with priority > min_priority
|
||||
if min_priority is None, notify all observers in order
|
||||
"""
|
||||
if not self.updates:
|
||||
return
|
||||
if which is None:
|
||||
which = self
|
||||
if min_priority is None:
|
||||
|
|
@ -91,10 +122,6 @@ class Observable(object):
|
|||
break
|
||||
callble(self, which=which)
|
||||
|
||||
#===============================================================================
|
||||
# Foundation framework for parameterized and param objects:
|
||||
#===============================================================================
|
||||
|
||||
class Parentable(object):
|
||||
"""
|
||||
Enable an Object to have a parent.
|
||||
|
|
@ -151,6 +178,7 @@ class Pickleable(object):
|
|||
"""
|
||||
def __init__(self, *a, **kw):
|
||||
super(Pickleable, self).__init__()
|
||||
|
||||
#===========================================================================
|
||||
# Pickling operations
|
||||
#===========================================================================
|
||||
|
|
@ -171,37 +199,49 @@ class Pickleable(object):
|
|||
#===========================================================================
|
||||
# copy and pickling
|
||||
#===========================================================================
|
||||
def copy(self):
|
||||
"""Returns a (deep) copy of the current model"""
|
||||
def copy(self, memo=None, which=None):
|
||||
"""
|
||||
Returns a (deep) copy of the current parameter handle.
|
||||
|
||||
All connections to parents of the copy will be cut.
|
||||
|
||||
:param dict memo: memo for deepcopy
|
||||
:param Parameterized which: parameterized object which started the copy process [default: self]
|
||||
"""
|
||||
#raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy"
|
||||
if memo is None:
|
||||
memo = {}
|
||||
import copy
|
||||
memo = {}
|
||||
# the next part makes sure that we do not include parents in any form:
|
||||
parents = []
|
||||
self.traverse_parents(parents.append) # collect parents
|
||||
if which is None:
|
||||
which = self
|
||||
which.traverse_parents(parents.append) # collect parents
|
||||
for p in parents:
|
||||
memo[id(p)] = None # set all parents to be None, so they will not be copied
|
||||
memo[id(self.gradient)] = None # reset the gradient
|
||||
memo[id(self.param_array)] = None # and param_array
|
||||
memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent
|
||||
c = copy.deepcopy(self, memo) # and start the copy
|
||||
c._parent_index_ = None
|
||||
return c
|
||||
if not memo.has_key(id(p)):memo[id(p)] = None # set all parents to be None, so they will not be copied
|
||||
if not memo.has_key(id(self.gradient)):memo[id(self.gradient)] = None # reset the gradient
|
||||
if not memo.has_key(id(self._fixes_)):memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent
|
||||
copy = copy.deepcopy(self, memo) # and start the copy
|
||||
copy._parent_index_ = None
|
||||
copy._trigger_params_changed()
|
||||
return copy
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
s = self.__new__(self.__class__) # fresh instance
|
||||
memo[id(self)] = s # be sure to break all cycles --> self is already done
|
||||
import copy
|
||||
s.__dict__.update(copy.deepcopy(self.__dict__, memo)) # standard copy
|
||||
s.__setstate__(copy.deepcopy(self.__getstate__(), memo)) # standard copy
|
||||
return s
|
||||
|
||||
def __getstate__(self):
|
||||
ignore_list = ([#'_parent_', '_parent_index_',
|
||||
#'observers',
|
||||
'_param_array_', '_gradient_array_', '_fixes_',
|
||||
'_Cacher_wrap__cachers']
|
||||
#+ self.parameter_names(recursive=False)
|
||||
)
|
||||
ignore_list = ['_param_array_', # parameters get set from bottom to top
|
||||
'_gradient_array_', # as well as gradients
|
||||
'_optimizer_copy_',
|
||||
'logger',
|
||||
'observers',
|
||||
'_fixes_', # and fixes
|
||||
'_Cacher_wrap__cachers', # never pickle cachers
|
||||
]
|
||||
dc = dict()
|
||||
for k,v in self.__dict__.iteritems():
|
||||
if k not in ignore_list:
|
||||
|
|
@ -210,7 +250,11 @@ class Pickleable(object):
|
|||
|
||||
def __setstate__(self, state):
|
||||
self.__dict__.update(state)
|
||||
return self
|
||||
from lists_and_dicts import ObserverList
|
||||
self.observers = ObserverList()
|
||||
self._setup_observers()
|
||||
self._optimizer_copy_transformed = False
|
||||
|
||||
|
||||
class Gradcheckable(Pickleable, Parentable):
|
||||
"""
|
||||
|
|
@ -246,7 +290,6 @@ class Gradcheckable(Pickleable, Parentable):
|
|||
"""
|
||||
raise HierarchyError, "This parameter is not in a model with a likelihood, and, therefore, cannot be gradient checked!"
|
||||
|
||||
|
||||
class Nameable(Gradcheckable):
|
||||
"""
|
||||
Make an object nameable inside the hierarchy.
|
||||
|
|
@ -285,41 +328,8 @@ class Nameable(Gradcheckable):
|
|||
return self._parent_.hierarchy_name() + "." + adjust(self.name)
|
||||
return adjust(self.name)
|
||||
|
||||
class Indexable(object):
|
||||
"""
|
||||
Enable enraveled indexes and offsets for this object.
|
||||
The raveled index of an object is the index for its parameters in a flattened int array.
|
||||
"""
|
||||
def __init__(self, *a, **kw):
|
||||
super(Indexable, self).__init__()
|
||||
|
||||
def _raveled_index(self):
|
||||
"""
|
||||
Flattened array of ints, specifying the index of this object.
|
||||
This has to account for shaped parameters!
|
||||
"""
|
||||
raise NotImplementedError, "Need to be able to get the raveled Index"
|
||||
|
||||
def _offset_for(self, param):
|
||||
"""
|
||||
Return the offset of the param inside this parameterized object.
|
||||
This does not need to account for shaped parameters, as it
|
||||
basically just sums up the parameter sizes which come before param.
|
||||
"""
|
||||
return 0
|
||||
#raise NotImplementedError, "shouldnt happen, offset required from non parameterization object?"
|
||||
|
||||
def _raveled_index_for(self, param):
|
||||
"""
|
||||
get the raveled index for a param
|
||||
that is an int array, containing the indexes for the flattened
|
||||
param inside this parameterized logic.
|
||||
"""
|
||||
return param._raveled_index()
|
||||
#raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?"
|
||||
|
||||
|
||||
class Constrainable(Nameable, Indexable, Observable):
|
||||
class Indexable(Nameable, Observable):
|
||||
"""
|
||||
Make an object constrainable with Priors and Transformations.
|
||||
TODO: Mappings!!
|
||||
|
|
@ -330,7 +340,7 @@ class Constrainable(Nameable, Indexable, Observable):
|
|||
:func:`constrain()` and :func:`unconstrain()` are main methods here
|
||||
"""
|
||||
def __init__(self, name, default_constraint=None, *a, **kw):
|
||||
super(Constrainable, self).__init__(name=name, *a, **kw)
|
||||
super(Indexable, self).__init__(name=name, *a, **kw)
|
||||
self._default_constraint_ = default_constraint
|
||||
from index_operations import ParameterIndexOperations
|
||||
self.constraints = ParameterIndexOperations()
|
||||
|
|
@ -352,6 +362,39 @@ class Constrainable(Nameable, Indexable, Observable):
|
|||
self._connect_fixes()
|
||||
self._notify_parent_change()
|
||||
|
||||
#===========================================================================
|
||||
# Indexable
|
||||
#===========================================================================
|
||||
def _offset_for(self, param):
|
||||
"""
|
||||
Return the offset of the param inside this parameterized object.
|
||||
This does not need to account for shaped parameters, as it
|
||||
basically just sums up the parameter sizes which come before param.
|
||||
"""
|
||||
if param.has_parent():
|
||||
if param._parent_._get_original(param) in self.parameters:
|
||||
return self._param_slices_[param._parent_._get_original(param)._parent_index_].start
|
||||
return self._offset_for(param._parent_) + param._parent_._offset_for(param)
|
||||
return 0
|
||||
|
||||
def _raveled_index_for(self, param):
|
||||
"""
|
||||
get the raveled index for a param
|
||||
that is an int array, containing the indexes for the flattened
|
||||
param inside this parameterized logic.
|
||||
"""
|
||||
from param import ParamConcatenation
|
||||
if isinstance(param, ParamConcatenation):
|
||||
return np.hstack((self._raveled_index_for(p) for p in param.params))
|
||||
return param._raveled_index() + self._offset_for(param)
|
||||
|
||||
def _raveled_index(self):
|
||||
"""
|
||||
Flattened array of ints, specifying the index of this object.
|
||||
This has to account for shaped parameters!
|
||||
"""
|
||||
return np.r_[:self.size]
|
||||
|
||||
#===========================================================================
|
||||
# Fixing Parameters:
|
||||
#===========================================================================
|
||||
|
|
@ -363,8 +406,10 @@ class Constrainable(Nameable, Indexable, Observable):
|
|||
"""
|
||||
if value is not None:
|
||||
self[:] = value
|
||||
reconstrained = self.unconstrain()
|
||||
index = self._add_to_index_operations(self.constraints, reconstrained, __fixed__, warning)
|
||||
|
||||
index = self._raveled_index()
|
||||
#reconstrained = self.unconstrain()
|
||||
index = self._add_to_index_operations(self.constraints, index, __fixed__, warning)
|
||||
self._highest_parent_._set_fixed(self, index)
|
||||
self.notify_observers(self, None if trigger_parent else -np.inf)
|
||||
return index
|
||||
|
|
@ -406,9 +451,24 @@ class Constrainable(Nameable, Indexable, Observable):
|
|||
self._fixes_ = None
|
||||
del self.constraints[__fixed__]
|
||||
|
||||
#===========================================================================
|
||||
# Convenience for fixed
|
||||
#===========================================================================
|
||||
def _has_fixes(self):
|
||||
return hasattr(self, "_fixes_") and self._fixes_ is not None and self._fixes_.size == self.size
|
||||
|
||||
@property
|
||||
def is_fixed(self):
|
||||
for p in self.parameters:
|
||||
if not p.is_fixed: return False
|
||||
return True
|
||||
|
||||
def _get_original(self, param):
|
||||
# if advanced indexing is activated it happens that the array is a copy
|
||||
# you can retrieve the original param through this method, by passing
|
||||
# the copy here
|
||||
return self.parameters[param._parent_index_]
|
||||
|
||||
#===========================================================================
|
||||
# Prior Operations
|
||||
#===========================================================================
|
||||
|
|
@ -432,8 +492,7 @@ class Constrainable(Nameable, Indexable, Observable):
|
|||
|
||||
def unset_priors(self, *priors):
|
||||
"""
|
||||
Un-set all priors given from this parameter handle.
|
||||
|
||||
Un-set all priors given (in *priors) from this parameter handle.
|
||||
"""
|
||||
return self._remove_from_index_operations(self.priors, priors)
|
||||
|
||||
|
|
@ -535,7 +594,7 @@ class Constrainable(Nameable, Indexable, Observable):
|
|||
self.constraints = ParameterIndexOperationsView(parent.constraints, parent._offset_for(self), self.size)
|
||||
self.priors = ParameterIndexOperationsView(parent.priors, parent._offset_for(self), self.size)
|
||||
self._fixes_ = None
|
||||
for p in self._parameters_:
|
||||
for p in self.parameters:
|
||||
p._parent_changed(parent)
|
||||
|
||||
def _add_to_index_operations(self, which, reconstrained, what, warning):
|
||||
|
|
@ -563,53 +622,142 @@ class Constrainable(Nameable, Indexable, Observable):
|
|||
removed = np.empty((0,), dtype=int)
|
||||
for t in transforms:
|
||||
unconstrained = which.remove(t, self._raveled_index())
|
||||
print unconstrained
|
||||
removed = np.union1d(removed, unconstrained)
|
||||
if t is __fixed__:
|
||||
self._highest_parent_._set_unfixed(self, unconstrained)
|
||||
|
||||
return removed
|
||||
|
||||
class OptimizationHandlable(Constrainable):
|
||||
class OptimizationHandlable(Indexable):
|
||||
"""
|
||||
This enables optimization handles on an Object as done in GPy 0.4.
|
||||
|
||||
`..._transformed`: make sure the transformations and constraints etc are handled
|
||||
`..._optimizer_copy_transformed`: make sure the transformations and constraints etc are handled
|
||||
"""
|
||||
def __init__(self, name, default_constraint=None, *a, **kw):
|
||||
super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw)
|
||||
self._optimizer_copy_ = None
|
||||
self._optimizer_copy_transformed = False
|
||||
|
||||
def _get_params_transformed(self):
|
||||
# transformed parameters (apply transformation rules)
|
||||
p = self.param_array.copy()
|
||||
[np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
if self.has_parent() and self.constraints[__fixed__].size != 0:
|
||||
fixes = np.ones(self.size).astype(bool)
|
||||
fixes[self.constraints[__fixed__]] = FIXED
|
||||
return p[fixes]
|
||||
elif self._has_fixes():
|
||||
return p[self._fixes_]
|
||||
return p
|
||||
|
||||
def _set_params_transformed(self, p):
|
||||
if not(p is self.param_array):
|
||||
#===========================================================================
|
||||
# Optimizer copy
|
||||
#===========================================================================
|
||||
@property
|
||||
def optimizer_array(self):
|
||||
"""
|
||||
Array for the optimizer to work on.
|
||||
This array always lives in the space for the optimizer.
|
||||
Thus, it is untransformed, going from Transformations.
|
||||
|
||||
Setting this array, will make sure the transformed parameters for this model
|
||||
will be set accordingly. It has to be set with an array, retrieved from
|
||||
this method, as e.g. fixing will resize the array.
|
||||
|
||||
The optimizer should only interfere with this array, such that transofrmations
|
||||
are secured.
|
||||
"""
|
||||
if self.__dict__.get('_optimizer_copy_', None) is None or self.size != self._optimizer_copy_.size:
|
||||
self._optimizer_copy_ = np.empty(self.size)
|
||||
|
||||
if not self._optimizer_copy_transformed:
|
||||
self._optimizer_copy_.flat = self.param_array.flat
|
||||
[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
if self.has_parent() and self.constraints[__fixed__].size != 0:
|
||||
fixes = np.ones(self.size).astype(bool)
|
||||
fixes[self.constraints[__fixed__]] = FIXED
|
||||
self.param_array.flat[fixes] = p
|
||||
elif self._has_fixes(): self.param_array.flat[self._fixes_] = p
|
||||
else: self.param_array.flat = p
|
||||
[np.put(self.param_array, ind, c.f(self.param_array.flat[ind]))
|
||||
for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
return self._optimizer_copy_[fixes]
|
||||
elif self._has_fixes():
|
||||
return self._optimizer_copy_[self._fixes_]
|
||||
self._optimizer_copy_transformed = True
|
||||
|
||||
return self._optimizer_copy_
|
||||
|
||||
@optimizer_array.setter
|
||||
def optimizer_array(self, p):
|
||||
"""
|
||||
Make sure the optimizer copy does not get touched, thus, we only want to
|
||||
set the values *inside* not the array itself.
|
||||
|
||||
Also we want to update param_array in here.
|
||||
"""
|
||||
f = None
|
||||
if self.has_parent() and self.constraints[__fixed__].size != 0:
|
||||
f = np.ones(self.size).astype(bool)
|
||||
f[self.constraints[__fixed__]] = FIXED
|
||||
elif self._has_fixes():
|
||||
f = self._fixes_
|
||||
if f is None:
|
||||
self.param_array.flat = p
|
||||
[np.put(self.param_array, ind, c.f(self.param_array.flat[ind]))
|
||||
for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
else:
|
||||
self.param_array.flat[f] = p
|
||||
[np.put(self.param_array, ind[f[ind]], c.f(self.param_array.flat[ind[f[ind]]]))
|
||||
for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
|
||||
self._optimizer_copy_transformed = False
|
||||
self._trigger_params_changed()
|
||||
|
||||
def _get_params_transformed(self):
|
||||
raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
|
||||
# # transformed parameters (apply un-transformation rules)
|
||||
# p = self.param_array.copy()
|
||||
# [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
# if self.has_parent() and self.constraints[__fixed__].size != 0:
|
||||
# fixes = np.ones(self.size).astype(bool)
|
||||
# fixes[self.constraints[__fixed__]] = FIXED
|
||||
# return p[fixes]
|
||||
# elif self._has_fixes():
|
||||
# return p[self._fixes_]
|
||||
# return p
|
||||
#
|
||||
def _set_params_transformed(self, p):
|
||||
raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
|
||||
|
||||
# """
|
||||
# Set parameters p, but make sure they get transformed before setting.
|
||||
# This means, the optimizer sees p, whereas the model sees transformed(p),
|
||||
# such that, the parameters the model sees are in the right domain.
|
||||
# """
|
||||
# if not(p is self.param_array):
|
||||
# if self.has_parent() and self.constraints[__fixed__].size != 0:
|
||||
# fixes = np.ones(self.size).astype(bool)
|
||||
# fixes[self.constraints[__fixed__]] = FIXED
|
||||
# self.param_array.flat[fixes] = p
|
||||
# elif self._has_fixes(): self.param_array.flat[self._fixes_] = p
|
||||
# else: self.param_array.flat = p
|
||||
# [np.put(self.param_array, ind, c.f(self.param_array.flat[ind]))
|
||||
# for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
# self._trigger_params_changed()
|
||||
|
||||
def _trigger_params_changed(self, trigger_parent=True):
|
||||
[p._trigger_params_changed(trigger_parent=False) for p in self._parameters_]
|
||||
"""
|
||||
First tell all children to update,
|
||||
then update yourself.
|
||||
|
||||
If trigger_parent is True, we will tell the parent, otherwise not.
|
||||
"""
|
||||
[p._trigger_params_changed(trigger_parent=False) for p in self.parameters]
|
||||
self.notify_observers(None, None if trigger_parent else -np.inf)
|
||||
|
||||
def _size_transformed(self):
|
||||
"""
|
||||
As fixes are not passed to the optimiser, the size of the model for the optimiser
|
||||
is the size of all parameters minus the size of the fixes.
|
||||
"""
|
||||
return self.size - self.constraints[__fixed__].size
|
||||
|
||||
def _transform_gradients(self, g):
|
||||
"""
|
||||
Transform the gradients by multiplying the gradient factor for each
|
||||
constraint to it.
|
||||
"""
|
||||
if self.has_parent():
|
||||
return g
|
||||
[np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
|
||||
if self._has_fixes(): return g[self._fixes_]
|
||||
return g
|
||||
|
||||
@property
|
||||
def num_params(self):
|
||||
"""
|
||||
|
|
@ -628,8 +776,8 @@ class OptimizationHandlable(Constrainable):
|
|||
"""
|
||||
if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x)
|
||||
else: adjust = lambda x: x
|
||||
if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)]
|
||||
else: names = [adjust(x.name) for x in self._parameters_]
|
||||
if recursive: names = [xi for x in self.parameters for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)]
|
||||
else: names = [adjust(x.name) for x in self.parameters]
|
||||
if add_self: names = map(lambda x: adjust(self.name) + "." + x, names)
|
||||
return names
|
||||
|
||||
|
|
@ -651,7 +799,7 @@ class OptimizationHandlable(Constrainable):
|
|||
Randomize the model.
|
||||
Make this draw from the prior if one exists, else draw from given random generator
|
||||
|
||||
:param rand_gen: numpy random number generator which takes args and kwargs
|
||||
:param rand_gen: np random number generator which takes args and kwargs
|
||||
:param flaot loc: loc parameter for random number generator
|
||||
:param float scale: scale parameter for random number generator
|
||||
:param args, kwargs: will be passed through to random number generator
|
||||
|
|
@ -660,14 +808,14 @@ class OptimizationHandlable(Constrainable):
|
|||
x = rand_gen(loc=loc, scale=scale, size=self._size_transformed(), *args, **kwargs)
|
||||
# now draw from prior where possible
|
||||
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
|
||||
self._set_params_transformed(x) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
|
||||
self.optimizer_array = x # makes sure all of the tied parameters get the same init (since there's only one prior object...)
|
||||
|
||||
#===========================================================================
|
||||
# For shared memory arrays. This does nothing in Param, but sets the memory
|
||||
# for all parameterized objects
|
||||
#===========================================================================
|
||||
@property
|
||||
def full_gradient(self):
|
||||
def gradient_full(self):
|
||||
"""
|
||||
Note to users:
|
||||
This does not return the gradient in the right shape! Use self.gradient
|
||||
|
|
@ -681,27 +829,45 @@ class OptimizationHandlable(Constrainable):
|
|||
return self._gradient_array_
|
||||
|
||||
def _propagate_param_grad(self, parray, garray):
|
||||
"""
|
||||
For propagating the param_array and gradient_array.
|
||||
This ensures the in memory view of each subsequent array.
|
||||
|
||||
1.) connect param_array of children to self.param_array
|
||||
2.) tell all children to propagate further
|
||||
"""
|
||||
pi_old_size = 0
|
||||
for pi in self._parameters_:
|
||||
for pi in self.parameters:
|
||||
pislice = slice(pi_old_size, pi_old_size + pi.size)
|
||||
|
||||
self.param_array[pislice] = pi.param_array.flat # , requirements=['C', 'W']).flat
|
||||
self.full_gradient[pislice] = pi.full_gradient.flat # , requirements=['C', 'W']).flat
|
||||
self.gradient_full[pislice] = pi.gradient_full.flat # , requirements=['C', 'W']).flat
|
||||
|
||||
pi.param_array.data = parray[pislice].data
|
||||
pi.full_gradient.data = garray[pislice].data
|
||||
pi.gradient_full.data = garray[pislice].data
|
||||
|
||||
pi._propagate_param_grad(parray[pislice], garray[pislice])
|
||||
pi_old_size += pi.size
|
||||
|
||||
class Parameterizable(OptimizationHandlable):
|
||||
"""
|
||||
A parameterisable class.
|
||||
|
||||
This class provides the parameters list (ArrayList) and standard parameter handling,
|
||||
such as {add|remove}_parameter(), traverse hierarchy and param_array, gradient_array
|
||||
and the empty parameters_changed().
|
||||
|
||||
This class is abstract and should not be instantiated.
|
||||
Use GPy.core.Parameterized() as node (or leaf) in the parameterized hierarchy.
|
||||
Use GPy.core.Param() for a leaf in the parameterized hierarchy.
|
||||
"""
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(Parameterizable, self).__init__(*args, **kwargs)
|
||||
from GPy.core.parameterization.lists_and_dicts import ArrayList
|
||||
self._parameters_ = ArrayList()
|
||||
self.parameters = ArrayList()
|
||||
self._param_array_ = None
|
||||
self.size = 0
|
||||
self._added_names_ = set()
|
||||
self.logger = logging.getLogger(self.__class__.__name__)
|
||||
self.__visited = False # for traversing in reverse order we need to know if we were here already
|
||||
|
||||
@property
|
||||
|
|
@ -735,7 +901,7 @@ class Parameterizable(OptimizationHandlable):
|
|||
if not self.__visited:
|
||||
visit(self, *args, **kwargs)
|
||||
self.__visited = True
|
||||
for c in self._parameters_:
|
||||
for c in self.parameters:
|
||||
c.traverse(visit, *args, **kwargs)
|
||||
self.__visited = False
|
||||
|
||||
|
|
@ -743,9 +909,9 @@ class Parameterizable(OptimizationHandlable):
|
|||
"""
|
||||
Traverse the hierarchy upwards, visiting all parents and their children except self.
|
||||
See "visitor pattern" in literature. This is implemented in pre-order fashion.
|
||||
|
||||
|
||||
Example:
|
||||
|
||||
|
||||
parents = []
|
||||
self.traverse_parents(parents.append)
|
||||
print parents
|
||||
|
|
@ -754,7 +920,7 @@ class Parameterizable(OptimizationHandlable):
|
|||
self.__visited = True
|
||||
self._parent_._traverse_parents(visit, *args, **kwargs)
|
||||
self.__visited = False
|
||||
|
||||
|
||||
def _traverse_parents(self, visit, *args, **kwargs):
|
||||
if not self.__visited:
|
||||
self.__visited = True
|
||||
|
|
@ -779,7 +945,7 @@ class Parameterizable(OptimizationHandlable):
|
|||
|
||||
@property
|
||||
def num_params(self):
|
||||
return len(self._parameters_)
|
||||
return len(self.parameters)
|
||||
|
||||
def _add_parameter_name(self, param, ignore_added_names=False):
|
||||
pname = adjust_name_for_printing(param.name)
|
||||
|
|
@ -812,131 +978,10 @@ class Parameterizable(OptimizationHandlable):
|
|||
self._remove_parameter_name(None, old_name)
|
||||
self._add_parameter_name(param)
|
||||
|
||||
def add_parameter(self, param, index=None, _ignore_added_names=False):
|
||||
"""
|
||||
:param parameters: the parameters to add
|
||||
:type parameters: list of or one :py:class:`GPy.core.param.Param`
|
||||
:param [index]: index of where to put parameters
|
||||
|
||||
:param bool _ignore_added_names: whether the name of the parameter overrides a possibly existing field
|
||||
|
||||
Add all parameters to this param class, you can insert parameters
|
||||
at any given index using the :func:`list.insert` syntax
|
||||
"""
|
||||
if param in self._parameters_ and index is not None:
|
||||
self.remove_parameter(param)
|
||||
self.add_parameter(param, index)
|
||||
# elif param.has_parent():
|
||||
# raise HierarchyError, "parameter {} already in another model ({}), create new object (or copy) for adding".format(param._short(), param._highest_parent_._short())
|
||||
elif param not in self._parameters_:
|
||||
if param.has_parent():
|
||||
def visit(parent, self):
|
||||
if parent is self:
|
||||
raise HierarchyError, "You cannot add a parameter twice into the hierarchy"
|
||||
param.traverse_parents(visit, self)
|
||||
param._parent_.remove_parameter(param)
|
||||
# make sure the size is set
|
||||
if index is None:
|
||||
self.constraints.update(param.constraints, self.size)
|
||||
self.priors.update(param.priors, self.size)
|
||||
self._parameters_.append(param)
|
||||
else:
|
||||
start = sum(p.size for p in self._parameters_[:index])
|
||||
self.constraints.shift_right(start, param.size)
|
||||
self.priors.shift_right(start, param.size)
|
||||
self.constraints.update(param.constraints, start)
|
||||
self.priors.update(param.priors, start)
|
||||
self._parameters_.insert(index, param)
|
||||
|
||||
param.add_observer(self, self._pass_through_notify_observers, -np.inf)
|
||||
|
||||
parent = self
|
||||
while parent is not None:
|
||||
parent.size += param.size
|
||||
parent = parent._parent_
|
||||
|
||||
self._connect_parameters()
|
||||
|
||||
self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names)
|
||||
self._highest_parent_._notify_parent_change()
|
||||
self._highest_parent_._connect_fixes()
|
||||
|
||||
else:
|
||||
raise HierarchyError, """Parameter exists already and no copy made"""
|
||||
|
||||
|
||||
def add_parameters(self, *parameters):
|
||||
"""
|
||||
convenience method for adding several
|
||||
parameters without gradient specification
|
||||
"""
|
||||
[self.add_parameter(p) for p in parameters]
|
||||
|
||||
def remove_parameter(self, param):
|
||||
"""
|
||||
:param param: param object to remove from being a parameter of this parameterized object.
|
||||
"""
|
||||
if not param in self._parameters_:
|
||||
raise RuntimeError, "Parameter {} does not belong to this object {}, remove parameters directly from their respective parents".format(param._short(), self.name)
|
||||
|
||||
start = sum([p.size for p in self._parameters_[:param._parent_index_]])
|
||||
self._remove_parameter_name(param)
|
||||
self.size -= param.size
|
||||
del self._parameters_[param._parent_index_]
|
||||
|
||||
param._disconnect_parent()
|
||||
param.remove_observer(self, self._pass_through_notify_observers)
|
||||
self.constraints.shift_left(start, param.size)
|
||||
|
||||
self._connect_parameters()
|
||||
self._notify_parent_change()
|
||||
|
||||
parent = self._parent_
|
||||
while parent is not None:
|
||||
parent.size -= param.size
|
||||
parent = parent._parent_
|
||||
|
||||
self._highest_parent_._connect_parameters()
|
||||
self._highest_parent_._connect_fixes()
|
||||
self._highest_parent_._notify_parent_change()
|
||||
|
||||
def _connect_parameters(self, ignore_added_names=False):
|
||||
# connect parameterlist to this parameterized object
|
||||
# This just sets up the right connection for the params objects
|
||||
# to be used as parameters
|
||||
# it also sets the constraints for each parameter to the constraints
|
||||
# of their respective parents
|
||||
if not hasattr(self, "_parameters_") or len(self._parameters_) < 1:
|
||||
# no parameters for this class
|
||||
return
|
||||
if self.param_array.size != self.size:
|
||||
self.param_array = np.empty(self.size, dtype=np.float64)
|
||||
if self.gradient.size != self.size:
|
||||
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
|
||||
|
||||
old_size = 0
|
||||
self._param_slices_ = []
|
||||
for i, p in enumerate(self._parameters_):
|
||||
p._parent_ = self
|
||||
p._parent_index_ = i
|
||||
|
||||
pslice = slice(old_size, old_size + p.size)
|
||||
# first connect all children
|
||||
p._propagate_param_grad(self.param_array[pslice], self.full_gradient[pslice])
|
||||
# then connect children to self
|
||||
self.param_array[pslice] = p.param_array.flat # , requirements=['C', 'W']).ravel(order='C')
|
||||
self.full_gradient[pslice] = p.full_gradient.flat # , requirements=['C', 'W']).ravel(order='C')
|
||||
|
||||
if not p.param_array.flags['C_CONTIGUOUS']:
|
||||
raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS"
|
||||
|
||||
p.param_array.data = self.param_array[pslice].data
|
||||
p.full_gradient.data = self.full_gradient[pslice].data
|
||||
|
||||
self._param_slices_.append(pslice)
|
||||
|
||||
self._add_parameter_name(p, ignore_added_names=ignore_added_names)
|
||||
old_size += p.size
|
||||
def __setstate__(self, state):
|
||||
super(Parameterizable, self).__setstate__(state)
|
||||
self.logger = logging.getLogger(self.__class__.__name__)
|
||||
return self
|
||||
|
||||
#===========================================================================
|
||||
# notification system
|
||||
|
|
@ -945,24 +990,16 @@ class Parameterizable(OptimizationHandlable):
|
|||
self.parameters_changed()
|
||||
def _pass_through_notify_observers(self, me, which=None):
|
||||
self.notify_observers(which=which)
|
||||
|
||||
#===========================================================================
|
||||
# Pickling
|
||||
#===========================================================================
|
||||
def __setstate__(self, state):
|
||||
super(Parameterizable, self).__setstate__(state)
|
||||
self._connect_parameters()
|
||||
self._connect_fixes()
|
||||
self._notify_parent_change()
|
||||
|
||||
self.parameters_changed()
|
||||
|
||||
def copy(self):
|
||||
c = super(Parameterizable, self).copy()
|
||||
c._connect_parameters()
|
||||
c._connect_fixes()
|
||||
c._notify_parent_change()
|
||||
return c
|
||||
def _setup_observers(self):
|
||||
"""
|
||||
Setup the default observers
|
||||
|
||||
1: parameters_changed_notify
|
||||
2: pass through to parent, if present
|
||||
"""
|
||||
self.add_observer(self, self._parameters_changed_notification, -100)
|
||||
if self.has_parent():
|
||||
self.add_observer(self._parent_, self._parent_._pass_through_notify_observers, -np.inf)
|
||||
#===========================================================================
|
||||
# From being parentable, we have to define the parent_change notification
|
||||
#===========================================================================
|
||||
|
|
@ -970,7 +1007,7 @@ class Parameterizable(OptimizationHandlable):
|
|||
"""
|
||||
Notify all parameters that the parent has changed
|
||||
"""
|
||||
for p in self._parameters_:
|
||||
for p in self.parameters:
|
||||
p._parent_changed(self)
|
||||
|
||||
def parameters_changed(self):
|
||||
|
|
|
|||
|
|
@ -3,13 +3,10 @@
|
|||
|
||||
|
||||
import numpy; np = numpy
|
||||
import cPickle
|
||||
import itertools
|
||||
from re import compile, _pattern_type
|
||||
from param import ParamConcatenation
|
||||
from parameter_core import Pickleable, Parameterizable, adjust_name_for_printing
|
||||
from transformations import __fixed__
|
||||
from lists_and_dicts import ArrayList
|
||||
from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
|
||||
|
||||
class ParametersChangedMeta(type):
|
||||
def __call__(self, *args, **kw):
|
||||
|
|
@ -68,8 +65,7 @@ class Parameterized(Parameterizable):
|
|||
def __init__(self, name=None, parameters=[], *a, **kw):
|
||||
super(Parameterized, self).__init__(name=name, *a, **kw)
|
||||
self._in_init_ = True
|
||||
self._parameters_ = ArrayList()
|
||||
self.size = sum(p.size for p in self._parameters_)
|
||||
self.size = sum(p.size for p in self.parameters)
|
||||
self.add_observer(self, self._parameters_changed_notification, -100)
|
||||
if not self._has_fixes():
|
||||
self._fixes_ = None
|
||||
|
|
@ -86,7 +82,7 @@ class Parameterized(Parameterizable):
|
|||
iamroot=True
|
||||
node = pydot.Node(id(self), shape='box', label=self.name)#, color='white')
|
||||
G.add_node(node)
|
||||
for child in self._parameters_:
|
||||
for child in self.parameters:
|
||||
child_node = child.build_pydot(G)
|
||||
G.add_edge(pydot.Edge(node, child_node))#, color='white'))
|
||||
|
||||
|
|
@ -102,58 +98,133 @@ class Parameterized(Parameterizable):
|
|||
return node
|
||||
|
||||
#===========================================================================
|
||||
# Gradient control
|
||||
# Add remove parameters:
|
||||
#===========================================================================
|
||||
def _transform_gradients(self, g):
|
||||
if self.has_parent():
|
||||
return g
|
||||
[numpy.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
|
||||
if self._has_fixes(): return g[self._fixes_]
|
||||
return g
|
||||
|
||||
|
||||
#===========================================================================
|
||||
# Indexable
|
||||
#===========================================================================
|
||||
def _offset_for(self, param):
|
||||
# get the offset in the parameterized index array for param
|
||||
if param.has_parent():
|
||||
if param._parent_._get_original(param) in self._parameters_:
|
||||
return self._param_slices_[param._parent_._get_original(param)._parent_index_].start
|
||||
return self._offset_for(param._parent_) + param._parent_._offset_for(param)
|
||||
return 0
|
||||
|
||||
def _raveled_index_for(self, param):
|
||||
def add_parameter(self, param, index=None, _ignore_added_names=False):
|
||||
"""
|
||||
get the raveled index for a param
|
||||
that is an int array, containing the indexes for the flattened
|
||||
param inside this parameterized logic.
|
||||
"""
|
||||
if isinstance(param, ParamConcatenation):
|
||||
return numpy.hstack((self._raveled_index_for(p) for p in param.params))
|
||||
return param._raveled_index() + self._offset_for(param)
|
||||
:param parameters: the parameters to add
|
||||
:type parameters: list of or one :py:class:`GPy.core.param.Param`
|
||||
:param [index]: index of where to put parameters
|
||||
|
||||
def _raveled_index(self):
|
||||
"""
|
||||
get the raveled index for this object,
|
||||
this is not in the global view of things!
|
||||
"""
|
||||
return numpy.r_[:self.size]
|
||||
:param bool _ignore_added_names: whether the name of the parameter overrides a possibly existing field
|
||||
|
||||
#===========================================================================
|
||||
# Convenience for fixed, tied checking of param:
|
||||
#===========================================================================
|
||||
@property
|
||||
def is_fixed(self):
|
||||
for p in self._parameters_:
|
||||
if not p.is_fixed: return False
|
||||
return True
|
||||
Add all parameters to this param class, you can insert parameters
|
||||
at any given index using the :func:`list.insert` syntax
|
||||
"""
|
||||
if param in self.parameters and index is not None:
|
||||
self.remove_parameter(param)
|
||||
self.add_parameter(param, index)
|
||||
# elif param.has_parent():
|
||||
# raise HierarchyError, "parameter {} already in another model ({}), create new object (or copy) for adding".format(param._short(), param._highest_parent_._short())
|
||||
elif param not in self.parameters:
|
||||
if param.has_parent():
|
||||
def visit(parent, self):
|
||||
if parent is self:
|
||||
raise HierarchyError, "You cannot add a parameter twice into the hierarchy"
|
||||
param.traverse_parents(visit, self)
|
||||
param._parent_.remove_parameter(param)
|
||||
# make sure the size is set
|
||||
if index is None:
|
||||
self.constraints.update(param.constraints, self.size)
|
||||
self.priors.update(param.priors, self.size)
|
||||
self.parameters.append(param)
|
||||
else:
|
||||
start = sum(p.size for p in self.parameters[:index])
|
||||
self.constraints.shift_right(start, param.size)
|
||||
self.priors.shift_right(start, param.size)
|
||||
self.constraints.update(param.constraints, start)
|
||||
self.priors.update(param.priors, start)
|
||||
self.parameters.insert(index, param)
|
||||
|
||||
def _get_original(self, param):
|
||||
# if advanced indexing is activated it happens that the array is a copy
|
||||
# you can retrieve the original param through this method, by passing
|
||||
# the copy here
|
||||
return self._parameters_[param._parent_index_]
|
||||
param.add_observer(self, self._pass_through_notify_observers, -np.inf)
|
||||
|
||||
parent = self
|
||||
while parent is not None:
|
||||
parent.size += param.size
|
||||
parent = parent._parent_
|
||||
|
||||
self._connect_parameters()
|
||||
|
||||
self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names)
|
||||
self._highest_parent_._notify_parent_change()
|
||||
self._highest_parent_._connect_fixes()
|
||||
|
||||
else:
|
||||
raise HierarchyError, """Parameter exists already and no copy made"""
|
||||
|
||||
|
||||
def add_parameters(self, *parameters):
|
||||
"""
|
||||
convenience method for adding several
|
||||
parameters without gradient specification
|
||||
"""
|
||||
[self.add_parameter(p) for p in parameters]
|
||||
|
||||
def remove_parameter(self, param):
|
||||
"""
|
||||
:param param: param object to remove from being a parameter of this parameterized object.
|
||||
"""
|
||||
if not param in self.parameters:
|
||||
raise RuntimeError, "Parameter {} does not belong to this object {}, remove parameters directly from their respective parents".format(param._short(), self.name)
|
||||
|
||||
start = sum([p.size for p in self.parameters[:param._parent_index_]])
|
||||
self._remove_parameter_name(param)
|
||||
self.size -= param.size
|
||||
del self.parameters[param._parent_index_]
|
||||
|
||||
param._disconnect_parent()
|
||||
param.remove_observer(self, self._pass_through_notify_observers)
|
||||
self.constraints.shift_left(start, param.size)
|
||||
|
||||
self._connect_parameters()
|
||||
self._notify_parent_change()
|
||||
|
||||
parent = self._parent_
|
||||
while parent is not None:
|
||||
parent.size -= param.size
|
||||
parent = parent._parent_
|
||||
|
||||
self._highest_parent_._connect_parameters()
|
||||
self._highest_parent_._connect_fixes()
|
||||
self._highest_parent_._notify_parent_change()
|
||||
|
||||
def _connect_parameters(self, ignore_added_names=False):
|
||||
# connect parameterlist to this parameterized object
|
||||
# This just sets up the right connection for the params objects
|
||||
# to be used as parameters
|
||||
# it also sets the constraints for each parameter to the constraints
|
||||
# of their respective parents
|
||||
if not hasattr(self, "parameters") or len(self.parameters) < 1:
|
||||
# no parameters for this class
|
||||
return
|
||||
if self.param_array.size != self.size:
|
||||
self.param_array = np.empty(self.size, dtype=np.float64)
|
||||
if self.gradient.size != self.size:
|
||||
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
|
||||
|
||||
old_size = 0
|
||||
self._param_slices_ = []
|
||||
for i, p in enumerate(self.parameters):
|
||||
p._parent_ = self
|
||||
p._parent_index_ = i
|
||||
|
||||
pslice = slice(old_size, old_size + p.size)
|
||||
# first connect all children
|
||||
p._propagate_param_grad(self.param_array[pslice], self.gradient_full[pslice])
|
||||
# then connect children to self
|
||||
self.param_array[pslice] = p.param_array.flat # , requirements=['C', 'W']).ravel(order='C')
|
||||
self.gradient_full[pslice] = p.gradient_full.flat # , requirements=['C', 'W']).ravel(order='C')
|
||||
|
||||
if not p.param_array.flags['C_CONTIGUOUS']:
|
||||
raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS"
|
||||
|
||||
p.param_array.data = self.param_array[pslice].data
|
||||
p.gradient_full.data = self.gradient_full[pslice].data
|
||||
|
||||
self._param_slices_.append(pslice)
|
||||
|
||||
self._add_parameter_name(p, ignore_added_names=ignore_added_names)
|
||||
old_size += p.size
|
||||
|
||||
#===========================================================================
|
||||
# Get/set parameters:
|
||||
|
|
@ -200,10 +271,38 @@ class Parameterized(Parameterizable):
|
|||
|
||||
def __setattr__(self, name, val):
|
||||
# override the default behaviour, if setting a param, so broadcasting can by used
|
||||
if hasattr(self, "_parameters_"):
|
||||
pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False)
|
||||
if name in pnames: self._parameters_[pnames.index(name)][:] = val; return
|
||||
if hasattr(self, "parameters"):
|
||||
try:
|
||||
pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False)
|
||||
if name in pnames: self.parameters[pnames.index(name)][:] = val; return
|
||||
except AttributeError:
|
||||
pass
|
||||
object.__setattr__(self, name, val);
|
||||
|
||||
#===========================================================================
|
||||
# Pickling
|
||||
#===========================================================================
|
||||
def __setstate__(self, state):
|
||||
super(Parameterized, self).__setstate__(state)
|
||||
try:
|
||||
self._connect_parameters()
|
||||
self._connect_fixes()
|
||||
self._notify_parent_change()
|
||||
self.parameters_changed()
|
||||
except Exception as e:
|
||||
print "WARNING: caught exception {!s}, trying to continue".format(e)
|
||||
|
||||
def copy(self, memo=None):
|
||||
if memo is None:
|
||||
memo = {}
|
||||
memo[id(self.optimizer_array)] = None # and param_array
|
||||
memo[id(self.param_array)] = None # and param_array
|
||||
copy = super(Parameterized, self).copy(memo)
|
||||
copy._connect_parameters()
|
||||
copy._connect_fixes()
|
||||
copy._notify_parent_change()
|
||||
return copy
|
||||
|
||||
#===========================================================================
|
||||
# Printing:
|
||||
#===========================================================================
|
||||
|
|
@ -211,22 +310,22 @@ class Parameterized(Parameterizable):
|
|||
return self.hierarchy_name()
|
||||
@property
|
||||
def flattened_parameters(self):
|
||||
return [xi for x in self._parameters_ for xi in x.flattened_parameters]
|
||||
return [xi for x in self.parameters for xi in x.flattened_parameters]
|
||||
@property
|
||||
def _parameter_sizes_(self):
|
||||
return [x.size for x in self._parameters_]
|
||||
return [x.size for x in self.parameters]
|
||||
@property
|
||||
def parameter_shapes(self):
|
||||
return [xi for x in self._parameters_ for xi in x.parameter_shapes]
|
||||
return [xi for x in self.parameters for xi in x.parameter_shapes]
|
||||
@property
|
||||
def _constraints_str(self):
|
||||
return [cs for p in self._parameters_ for cs in p._constraints_str]
|
||||
return [cs for p in self.parameters for cs in p._constraints_str]
|
||||
@property
|
||||
def _priors_str(self):
|
||||
return [cs for p in self._parameters_ for cs in p._priors_str]
|
||||
return [cs for p in self.parameters for cs in p._priors_str]
|
||||
@property
|
||||
def _description_str(self):
|
||||
return [xi for x in self._parameters_ for xi in x._description_str]
|
||||
return [xi for x in self.parameters for xi in x._description_str]
|
||||
@property
|
||||
def _ties_str(self):
|
||||
return [','.join(x._ties_str) for x in self.flattened_parameters]
|
||||
|
|
@ -246,7 +345,7 @@ class Parameterized(Parameterizable):
|
|||
to_print = []
|
||||
for n, d, c, t, p in itertools.izip(names, desc, constrs, ts, prirs):
|
||||
to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p))
|
||||
# to_print = [format_spec.format(p=p, const=c, t=t) if isinstance(p, Param) else p.__str__(header=False) for p, c, t in itertools.izip(self._parameters_, constrs, ts)]
|
||||
# to_print = [format_spec.format(p=p, const=c, t=t) if isinstance(p, Param) else p.__str__(header=False) for p, c, t in itertools.izip(self.parameters, constrs, ts)]
|
||||
sep = '-' * (nl + sl + cl + + pl + tl + 8 * 2 + 3)
|
||||
if header:
|
||||
header = " {{0:<{0}s}} | {{1:^{1}s}} | {{2:^{2}s}} | {{3:^{3}s}} | {{4:^{4}s}}".format(nl, sl, cl, pl, tl).format(name, "Value", "Constraint", "Prior", "Tied to")
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ class VariationalPosterior(Parameterized):
|
|||
def _raveled_index(self):
|
||||
index = np.empty(dtype=int, shape=0)
|
||||
size = 0
|
||||
for p in self._parameters_:
|
||||
for p in self.parameters:
|
||||
index = np.hstack((index, p._raveled_index()+size))
|
||||
size += p._realsize_ if hasattr(p, '_realsize_') else p.size
|
||||
return index
|
||||
|
|
@ -96,10 +96,10 @@ class VariationalPosterior(Parameterized):
|
|||
dc = self.__dict__.copy()
|
||||
dc['mean'] = self.mean[s]
|
||||
dc['variance'] = self.variance[s]
|
||||
dc['_parameters_'] = copy.copy(self._parameters_)
|
||||
dc['parameters'] = copy.copy(self.parameters)
|
||||
n.__dict__.update(dc)
|
||||
n._parameters_[dc['mean']._parent_index_] = dc['mean']
|
||||
n._parameters_[dc['variance']._parent_index_] = dc['variance']
|
||||
n.parameters[dc['mean']._parent_index_] = dc['mean']
|
||||
n.parameters[dc['variance']._parent_index_] = dc['variance']
|
||||
n._gradient_array_ = None
|
||||
oversize = self.size - self.mean.size - self.variance.size
|
||||
n.size = n.mean.size + n.variance.size + oversize
|
||||
|
|
@ -150,11 +150,11 @@ class SpikeAndSlabPosterior(VariationalPosterior):
|
|||
dc['mean'] = self.mean[s]
|
||||
dc['variance'] = self.variance[s]
|
||||
dc['binary_prob'] = self.binary_prob[s]
|
||||
dc['_parameters_'] = copy.copy(self._parameters_)
|
||||
dc['parameters'] = copy.copy(self.parameters)
|
||||
n.__dict__.update(dc)
|
||||
n._parameters_[dc['mean']._parent_index_] = dc['mean']
|
||||
n._parameters_[dc['variance']._parent_index_] = dc['variance']
|
||||
n._parameters_[dc['binary_prob']._parent_index_] = dc['binary_prob']
|
||||
n.parameters[dc['mean']._parent_index_] = dc['mean']
|
||||
n.parameters[dc['variance']._parent_index_] = dc['variance']
|
||||
n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
|
||||
n.ndim = n.mean.ndim
|
||||
n.shape = n.mean.shape
|
||||
n.num_data = n.mean.shape[0]
|
||||
|
|
|
|||
|
|
@ -66,7 +66,11 @@ class SparseGP(GP):
|
|||
#gradients wrt Z
|
||||
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
|
||||
self.Z.gradient += self.kern.gradients_Z_expectations(
|
||||
self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
|
||||
self.grad_dict['dL_dpsi0'],
|
||||
self.grad_dict['dL_dpsi1'],
|
||||
self.grad_dict['dL_dpsi2'],
|
||||
Z=self.Z,
|
||||
variational_posterior=self.X)
|
||||
else:
|
||||
#gradients wrt kernel
|
||||
self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
|
||||
|
|
|
|||
|
|
@ -96,15 +96,11 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
|
|||
|
||||
# Optimize
|
||||
if optimize:
|
||||
#m.update_likelihood_approximation()
|
||||
# Parameters optimization:
|
||||
try:
|
||||
m.optimize('scg', messages=1)
|
||||
except Exception as e:
|
||||
return m
|
||||
|
||||
#m.pseudo_EM()
|
||||
|
||||
# Plot
|
||||
if plot:
|
||||
fig, axes = pb.subplots(2, 1)
|
||||
|
|
@ -133,10 +129,7 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti
|
|||
|
||||
# Optimize
|
||||
if optimize:
|
||||
#m.update_likelihood_approximation()
|
||||
# Parameters optimization:
|
||||
#m.optimize()
|
||||
m.pseudo_EM()
|
||||
m.optimize()
|
||||
|
||||
# Plot
|
||||
if plot:
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_induci
|
|||
m.kern.plot_ARD()
|
||||
return m
|
||||
|
||||
def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4, sigma=.2):
|
||||
def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=25, Q=4, sigma=.2):
|
||||
import GPy
|
||||
from GPy.util.datasets import swiss_roll_generated
|
||||
from GPy.models import BayesianGPLVM
|
||||
|
|
@ -144,16 +144,15 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4
|
|||
m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel)
|
||||
m.data_colors = c
|
||||
m.data_t = t
|
||||
m['noise_variance'] = Y.var() / 100.
|
||||
|
||||
|
||||
if optimize:
|
||||
m.optimize('scg', messages=verbose, max_iters=2e3)
|
||||
m.optimize('bfgs', messages=verbose, max_iters=2e3)
|
||||
|
||||
if plot:
|
||||
fig = plt.figure('fitted')
|
||||
ax = fig.add_subplot(111)
|
||||
s = m.input_sensitivity().argsort()[::-1][:2]
|
||||
ax.scatter(*m.X.T[s], c=c)
|
||||
ax.scatter(*m.X.mean.T[s], c=c)
|
||||
|
||||
return m
|
||||
|
||||
|
|
@ -172,14 +171,14 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
|
|||
m.data_labels = data['Y'][:N].argmax(axis=1)
|
||||
|
||||
if optimize:
|
||||
m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05)
|
||||
m.optimize('bfgs', messages=verbose, max_iters=max_iters, gtol=.05)
|
||||
|
||||
if plot:
|
||||
fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
|
||||
m.plot_latent(ax=latent_axes, labels=m.data_labels)
|
||||
data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0,:]))
|
||||
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean)[0:1,:], # @UnusedVariable
|
||||
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
|
||||
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels)
|
||||
raw_input('Press enter to finish')
|
||||
plt.close(fig)
|
||||
return m
|
||||
|
|
@ -303,9 +302,11 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
|
|||
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
||||
|
||||
inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool)
|
||||
m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k)
|
||||
m.inference_method = VarDTCMissingData()
|
||||
m.Y[inan] = _np.nan
|
||||
Y[inan] = _np.nan
|
||||
|
||||
m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing,
|
||||
inference_method=VarDTCMissingData(inan=inan), kernel=k)
|
||||
|
||||
m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
|
||||
m.likelihood.variance = .01
|
||||
m.parameters_changed()
|
||||
|
|
@ -338,7 +339,40 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
|
|||
print "Optimizing Model:"
|
||||
m.optimize(messages=verbose, max_iters=8e3, gtol=.1)
|
||||
if plot:
|
||||
m.plot_X_1d("MRD Latent Space 1D")
|
||||
m.X.plot("MRD Latent Space 1D")
|
||||
m.plot_scales("MRD Scales")
|
||||
return m
|
||||
|
||||
def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
|
||||
from GPy import kern
|
||||
from GPy.models import MRD
|
||||
from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
|
||||
|
||||
D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
|
||||
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
||||
|
||||
#Ylist = [Ylist[0]]
|
||||
k = kern.Linear(Q, ARD=True)
|
||||
inanlist = []
|
||||
|
||||
for Y in Ylist:
|
||||
inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool)
|
||||
inanlist.append(inan)
|
||||
Y[inan] = _np.nan
|
||||
|
||||
imlist = []
|
||||
for inan in inanlist:
|
||||
imlist.append(VarDTCMissingData(limit=1, inan=inan))
|
||||
|
||||
m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing,
|
||||
kernel=k, inference_method=imlist,
|
||||
initx="random", initz='permute', **kw)
|
||||
|
||||
if optimize:
|
||||
print "Optimizing Model:"
|
||||
m.optimize('bfgs', messages=verbose, max_iters=8e3, gtol=.1)
|
||||
if plot:
|
||||
m.X.plot("MRD Latent Space 1D")
|
||||
m.plot_scales("MRD Scales")
|
||||
return m
|
||||
|
||||
|
|
@ -351,18 +385,17 @@ def brendan_faces(optimize=True, verbose=True, plot=True):
|
|||
Yn = Y - Y.mean()
|
||||
Yn /= Yn.std()
|
||||
|
||||
m = GPy.models.GPLVM(Yn, Q)
|
||||
m = GPy.models.BayesianGPLVM(Yn, Q, num_inducing=20)
|
||||
|
||||
# optimize
|
||||
m.constrain('rbf|noise|white', GPy.transformations.LogexpClipped())
|
||||
|
||||
if optimize: m.optimize('scg', messages=verbose, max_iters=1000)
|
||||
if optimize: m.optimize('bfgs', messages=verbose, max_iters=1000)
|
||||
|
||||
if plot:
|
||||
ax = m.plot_latent(which_indices=(0, 1))
|
||||
y = m.likelihood.Y[0, :]
|
||||
y = m.Y[0, :]
|
||||
data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False)
|
||||
GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||
lvm = GPy.plotting.matplot_dep.visualize.lvm(m.X.mean[0, :].copy(), m, data_show, ax)
|
||||
raw_input('Press enter to finish')
|
||||
|
||||
return m
|
||||
|
|
@ -376,13 +409,14 @@ def olivetti_faces(optimize=True, verbose=True, plot=True):
|
|||
Yn = Y - Y.mean()
|
||||
Yn /= Yn.std()
|
||||
|
||||
m = GPy.models.GPLVM(Yn, Q)
|
||||
if optimize: m.optimize('scg', messages=verbose, max_iters=1000)
|
||||
m = GPy.models.BayesianGPLVM(Yn, Q, num_inducing=20)
|
||||
|
||||
if optimize: m.optimize('bfgs', messages=verbose, max_iters=1000)
|
||||
if plot:
|
||||
ax = m.plot_latent(which_indices=(0, 1))
|
||||
y = m.likelihood.Y[0, :]
|
||||
data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False)
|
||||
GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||
lvm = GPy.plotting.matplot_dep.visualize.lvm(m.X.mean[0, :].copy(), m, data_show, ax)
|
||||
raw_input('Press enter to finish')
|
||||
|
||||
return m
|
||||
|
|
@ -414,9 +448,10 @@ def stick(kernel=None, optimize=True, verbose=True, plot=True):
|
|||
ax = m.plot_latent()
|
||||
y = m.Y[0, :]
|
||||
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
|
||||
vis = GPy.plotting.matplot_dep.visualize.lvm(m.X[:1, :].copy(), m, data_show, latent_axes=ax)
|
||||
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[:1, :].copy(), m, data_show, latent_axes=ax)
|
||||
raw_input('Press enter to finish')
|
||||
|
||||
lvm_visualizer.close()
|
||||
data_show.close()
|
||||
return m
|
||||
|
||||
def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True):
|
||||
|
|
@ -464,9 +499,8 @@ def robot_wireless(optimize=True, verbose=True, plot=True):
|
|||
|
||||
data = GPy.util.datasets.robot_wireless()
|
||||
# optimize
|
||||
m = GPy.models.GPLVM(data['Y'], 2)
|
||||
m = GPy.models.BayesianGPLVM(data['Y'], 4, num_inducing=25)
|
||||
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
|
||||
m._set_params(m._get_params())
|
||||
if plot:
|
||||
m.plot_latent()
|
||||
|
||||
|
|
@ -482,21 +516,26 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
|
|||
Q = 6
|
||||
kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True)
|
||||
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
|
||||
|
||||
|
||||
m.data = data
|
||||
m.likelihood.variance = 0.001
|
||||
|
||||
|
||||
# optimize
|
||||
if optimize: m.optimize('bfgs', messages=verbose, max_iters=800, xtol=1e-300, ftol=1e-300)
|
||||
try:
|
||||
if optimize: m.optimize('bfgs', messages=verbose, max_iters=5e3, bfgs_factor=10)
|
||||
except KeyboardInterrupt:
|
||||
print "Keyboard interrupt, continuing to plot and return"
|
||||
|
||||
if plot:
|
||||
plt.clf, (latent_axes, sense_axes) = plt.subplots(1, 2)
|
||||
fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
|
||||
plt.sca(latent_axes)
|
||||
m.plot_latent(ax=latent_axes)
|
||||
y = m.Y[:1, :].copy()
|
||||
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y, connect=data['connect'])
|
||||
GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean[:1, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
|
||||
plt.draw()
|
||||
#raw_input('Press enter to finish')
|
||||
dim_select = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean[:1, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
|
||||
fig.canvas.draw()
|
||||
fig.canvas.show()
|
||||
raw_input('Press enter to finish')
|
||||
|
||||
return m
|
||||
|
||||
|
|
@ -515,9 +554,10 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose
|
|||
ax = m.plot_latent()
|
||||
y = m.Y[0, :]
|
||||
data_show = GPy.plotting.matplot_dep.visualize.skeleton_show(y[None, :], data['skel'])
|
||||
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[0].copy(), m, data_show, latent_axes=ax)
|
||||
raw_input('Press enter to finish')
|
||||
lvm_visualizer.close()
|
||||
data_show.close()
|
||||
|
||||
return m
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,39 @@ etc.
|
|||
|
||||
"""
|
||||
|
||||
class LatentFunctionInference(object):
|
||||
def on_optimization_start(self):
|
||||
"""
|
||||
This function gets called, just before the optimization loop to start.
|
||||
"""
|
||||
pass
|
||||
|
||||
def on_optimization_end(self):
|
||||
"""
|
||||
This function gets called, just after the optimization loop ended.
|
||||
"""
|
||||
pass
|
||||
|
||||
class InferenceMethodList(LatentFunctionInference, list):
|
||||
|
||||
def on_optimization_start(self):
|
||||
for inf in self:
|
||||
inf.on_optimization_start()
|
||||
|
||||
def on_optimization_end(self):
|
||||
for inf in self:
|
||||
inf.on_optimization_end()
|
||||
|
||||
def __getstate__(self):
|
||||
state = []
|
||||
for inf in self:
|
||||
state.append(inf)
|
||||
return state
|
||||
|
||||
def __setstate__(self, state):
|
||||
for inf in state:
|
||||
self.append(inf)
|
||||
|
||||
from exact_gaussian_inference import ExactGaussianInference
|
||||
from laplace import Laplace
|
||||
from GPy.inference.latent_function_inference.var_dtc import VarDTC
|
||||
|
|
@ -38,11 +71,26 @@ from var_dtc_gpu import VarDTC_GPU
|
|||
# class FullLatentFunctionData(object):
|
||||
#
|
||||
#
|
||||
# class LatentFunctionInference(object):
|
||||
# def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||
|
||||
# class EMLikeLatentFunctionInference(LatentFunctionInference):
|
||||
# def update_approximation(self):
|
||||
# """
|
||||
# This function gets called when the
|
||||
# """
|
||||
#
|
||||
# def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
|
||||
# """
|
||||
# Do inference on the latent functions given a covariance function `kern`,
|
||||
# inputs and outputs `X` and `Y`, and a likelihood `likelihood`.
|
||||
# inputs and outputs `X` and `Y`, inducing_inputs `Z`, and a likelihood `likelihood`.
|
||||
# Additional metadata for the outputs `Y` can be given in `Y_metadata`.
|
||||
# """
|
||||
# raise NotImplementedError, "Abstract base class for full inference"
|
||||
#
|
||||
# class VariationalLatentFunctionInference(LatentFunctionInference):
|
||||
# def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
|
||||
# """
|
||||
# Do inference on the latent functions given a covariance function `kern`,
|
||||
# inputs and outputs `X` and `Y`, inducing_inputs `Z`, and a likelihood `likelihood`.
|
||||
# Additional metadata for the outputs `Y` can be given in `Y_metadata`.
|
||||
# """
|
||||
# raise NotImplementedError, "Abstract base class for full inference"
|
||||
|
|
|
|||
|
|
@ -4,9 +4,10 @@
|
|||
from posterior import Posterior
|
||||
from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv
|
||||
import numpy as np
|
||||
from . import LatentFunctionInference
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
class DTC(object):
|
||||
class DTC(LatentFunctionInference):
|
||||
"""
|
||||
An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
|
||||
|
||||
|
|
|
|||
|
|
@ -5,10 +5,11 @@ from posterior import Posterior
|
|||
from ...util.linalg import pdinv, dpotrs, tdot
|
||||
from ...util import diag
|
||||
import numpy as np
|
||||
from . import LatentFunctionInference
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
|
||||
class ExactGaussianInference(object):
|
||||
class ExactGaussianInference(LatentFunctionInference):
|
||||
"""
|
||||
An object for inference when the likelihood is Gaussian.
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
import numpy as np
|
||||
from ...util.linalg import pdinv,jitchol,DSYR,tdot,dtrtrs, dpotrs
|
||||
from posterior import Posterior
|
||||
from . import LatentFunctionInference
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
class EP(object):
|
||||
class EP(LatentFunctionInference):
|
||||
def __init__(self, epsilon=1e-6, eta=1., delta=1.):
|
||||
"""
|
||||
The expectation-propagation algorithm.
|
||||
|
|
@ -21,14 +22,25 @@ class EP(object):
|
|||
|
||||
def reset(self):
|
||||
self.old_mutilde, self.old_vtilde = None, None
|
||||
self._ep_approximation = None
|
||||
|
||||
def on_optimization_start(self):
|
||||
self._ep_approximation = None
|
||||
|
||||
def on_optimization_end(self):
|
||||
# TODO: update approximation in the end as well? Maybe even with a switch?
|
||||
pass
|
||||
|
||||
def inference(self, kern, X, likelihood, Y, Y_metadata=None, Z=None):
|
||||
num_data, output_dim = X.shape
|
||||
num_data, output_dim = Y.shape
|
||||
assert output_dim ==1, "ep in 1D only (for now!)"
|
||||
|
||||
K = kern.K(X)
|
||||
|
||||
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self.expectation_propagation(K, Y, likelihood, Y_metadata)
|
||||
if self._ep_approximation is None:
|
||||
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
|
||||
else:
|
||||
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation
|
||||
|
||||
Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde))
|
||||
|
||||
|
|
@ -42,8 +54,6 @@ class EP(object):
|
|||
|
||||
return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
|
||||
|
||||
|
||||
|
||||
def expectation_propagation(self, K, Y, likelihood, Y_metadata):
|
||||
|
||||
num_data, data_dim = Y.shape
|
||||
|
|
@ -108,4 +118,3 @@ class EP(object):
|
|||
|
||||
mu_tilde = v_tilde/tau_tilde
|
||||
return mu, Sigma, mu_tilde, tau_tilde, Z_hat
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,59 @@
|
|||
import numpy as np
|
||||
from ...util.linalg import pdinv,jitchol,DSYR,tdot,dtrtrs, dpotrs
|
||||
from expectation_propagation import EP
|
||||
from ...util import diag
|
||||
from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify, DSYR
|
||||
from ...util.misc import param_to_array
|
||||
from ...core.parameterization.variational import VariationalPosterior
|
||||
from . import LatentFunctionInference
|
||||
from posterior import Posterior
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
class EPDTC(EP):
|
||||
#def __init__(self, epsilon=1e-6, eta=1., delta=1.):
|
||||
class EPDTC(LatentFunctionInference):
|
||||
const_jitter = 1e-6
|
||||
def __init__(self, epsilon=1e-6, eta=1., delta=1., limit=1):
|
||||
from ...util.caching import Cacher
|
||||
self.limit = limit
|
||||
self.get_trYYT = Cacher(self._get_trYYT, limit)
|
||||
self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
|
||||
|
||||
self.epsilon, self.eta, self.delta = epsilon, eta, delta
|
||||
self.reset()
|
||||
|
||||
def set_limit(self, limit):
|
||||
self.get_trYYT.limit = limit
|
||||
self.get_YYTfactor.limit = limit
|
||||
|
||||
def _get_trYYT(self, Y):
|
||||
return param_to_array(np.sum(np.square(Y)))
|
||||
|
||||
def __getstate__(self):
|
||||
# has to be overridden, as Cacher objects cannot be pickled.
|
||||
return self.limit
|
||||
|
||||
def __setstate__(self, state):
|
||||
# has to be overridden, as Cacher objects cannot be pickled.
|
||||
self.limit = state
|
||||
from ...util.caching import Cacher
|
||||
self.get_trYYT = Cacher(self._get_trYYT, self.limit)
|
||||
self.get_YYTfactor = Cacher(self._get_YYTfactor, self.limit)
|
||||
|
||||
def _get_YYTfactor(self, Y):
|
||||
"""
|
||||
find a matrix L which satisfies LLT = YYT.
|
||||
|
||||
Note that L may have fewer columns than Y.
|
||||
"""
|
||||
N, D = Y.shape
|
||||
if (N>=D):
|
||||
return param_to_array(Y)
|
||||
else:
|
||||
return jitchol(tdot(Y))
|
||||
|
||||
def get_VVTfactor(self, Y, prec):
|
||||
return Y * prec # TODO chache this, and make it effective
|
||||
|
||||
def reset(self):
|
||||
self.old_mutilde, self.old_vtilde = None, None
|
||||
self._ep_approximation = None
|
||||
|
||||
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
|
||||
num_data, output_dim = X.shape
|
||||
|
|
@ -14,26 +62,131 @@ class EPDTC(EP):
|
|||
Kmm = kern.K(Z)
|
||||
Kmn = kern.K(Z,X)
|
||||
|
||||
if self._ep_approximation is None:
|
||||
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
|
||||
else:
|
||||
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation
|
||||
|
||||
|
||||
if isinstance(X, VariationalPosterior):
|
||||
uncertain_inputs = True
|
||||
psi0 = kern.psi0(Z, X)
|
||||
psi1 = Kmn.T#kern.psi1(Z, X)
|
||||
psi2 = kern.psi2(Z, X)
|
||||
else:
|
||||
uncertain_inputs = False
|
||||
psi0 = kern.Kdiag(X)
|
||||
psi1 = Kmn.T#kern.K(X, Z)
|
||||
psi2 = None
|
||||
|
||||
#see whether we're using variational uncertain inputs
|
||||
|
||||
_, output_dim = Y.shape
|
||||
|
||||
#see whether we've got a different noise variance for each datum
|
||||
#beta = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
|
||||
beta = tau_tilde
|
||||
VVT_factor = beta[:,None]*mu_tilde[:,None]
|
||||
trYYT = self.get_trYYT(mu_tilde[:,None])
|
||||
|
||||
# do the inference:
|
||||
het_noise = beta.size > 1
|
||||
num_inducing = Z.shape[0]
|
||||
num_data = Y.shape[0]
|
||||
# kernel computations, using BGPLVM notation
|
||||
|
||||
Kmm = kern.K(Z).copy()
|
||||
diag.add(Kmm, self.const_jitter)
|
||||
Lm = jitchol(Kmm)
|
||||
Lmi = dtrtrs(Lm,np.eye(Lm.shape[0]))[0]
|
||||
Kmmi = np.dot(Lmi.T,Lmi)
|
||||
KmmiKmn = np.dot(Kmmi,Kmn)
|
||||
K = np.dot(Kmn.T,KmmiKmn)
|
||||
|
||||
# The rather complex computations of A
|
||||
if uncertain_inputs:
|
||||
if het_noise:
|
||||
psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0)
|
||||
else:
|
||||
psi2_beta = psi2.sum(0) * beta
|
||||
LmInv = dtrtri(Lm)
|
||||
A = LmInv.dot(psi2_beta.dot(LmInv.T))
|
||||
else:
|
||||
if het_noise:
|
||||
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
|
||||
else:
|
||||
tmp = psi1 * (np.sqrt(beta))
|
||||
tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
|
||||
A = tdot(tmp) #print A.sum()
|
||||
|
||||
# factor B
|
||||
B = np.eye(num_inducing) + A
|
||||
LB = jitchol(B)
|
||||
psi1Vf = np.dot(psi1.T, VVT_factor)
|
||||
# back substutue C into psi1Vf
|
||||
tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0)
|
||||
_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
|
||||
tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
|
||||
Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
|
||||
|
||||
# data fit and derivative of L w.r.t. Kmm
|
||||
delit = tdot(_LBi_Lmi_psi1Vf)
|
||||
data_fit = np.trace(delit)
|
||||
DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
|
||||
delit = -0.5 * DBi_plus_BiPBi
|
||||
delit += -0.5 * B * output_dim
|
||||
delit += output_dim * np.eye(num_inducing)
|
||||
# Compute dL_dKmm
|
||||
dL_dKmm = backsub_both_sides(Lm, delit)
|
||||
|
||||
# derivatives of L w.r.t. psi
|
||||
dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
|
||||
VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
|
||||
psi1, het_noise, uncertain_inputs)
|
||||
|
||||
# log marginal likelihood
|
||||
log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
|
||||
psi0, A, LB, trYYT, data_fit, VVT_factor)
|
||||
|
||||
#put the gradients in the right places
|
||||
dL_dR = _compute_dL_dR(likelihood,
|
||||
het_noise, uncertain_inputs, LB,
|
||||
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
|
||||
psi0, psi1, beta,
|
||||
data_fit, num_data, output_dim, trYYT, mu_tilde[:,None])
|
||||
|
||||
dL_dthetaL = 0#likelihood.exact_inference_gradients(dL_dR,Y_metadata)
|
||||
|
||||
if uncertain_inputs:
|
||||
grad_dict = {'dL_dKmm': dL_dKmm,
|
||||
'dL_dpsi0':dL_dpsi0,
|
||||
'dL_dpsi1':dL_dpsi1,
|
||||
'dL_dpsi2':dL_dpsi2,
|
||||
'dL_dthetaL':dL_dthetaL}
|
||||
else:
|
||||
grad_dict = {'dL_dKmm': dL_dKmm,
|
||||
'dL_dKdiag':dL_dpsi0,
|
||||
'dL_dKnm':dL_dpsi1,
|
||||
'dL_dthetaL':dL_dthetaL}
|
||||
|
||||
#get sufficient things for posterior prediction
|
||||
#TODO: do we really want to do this in the loop?
|
||||
if VVT_factor.shape[1] == Y.shape[1]:
|
||||
woodbury_vector = Cpsi1Vf # == Cpsi1V
|
||||
else:
|
||||
print 'foobar'
|
||||
psi1V = np.dot(mu_tilde[:,None].T*beta, psi1).T
|
||||
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
|
||||
tmp, _ = dpotrs(LB, tmp, lower=1)
|
||||
woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
|
||||
Bi, _ = dpotri(LB, lower=1)
|
||||
symmetrify(Bi)
|
||||
Bi = -dpotri(LB, lower=1)[0]
|
||||
diag.add(Bi, 1)
|
||||
|
||||
woodbury_inv = backsub_both_sides(Lm, Bi)
|
||||
|
||||
#construct a posterior object
|
||||
post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
|
||||
return post, log_marginal, grad_dict
|
||||
|
||||
|
||||
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
|
||||
|
||||
Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde))
|
||||
|
||||
alpha, _ = dpotrs(LW, mu_tilde, lower=1)
|
||||
|
||||
log_marginal = 0.5*(-num_data * log_2_pi - W_logdet - np.sum(alpha * mu_tilde)) # TODO: add log Z_hat??
|
||||
|
||||
dL_dK = 0.5 * (tdot(alpha[:,None]) - Wi)
|
||||
|
||||
dL_dthetaL = np.zeros(likelihood.size)#TODO: derivatives of the likelihood parameters
|
||||
|
||||
return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
|
||||
|
||||
|
||||
|
||||
|
|
@ -121,3 +274,69 @@ class EPDTC(EP):
|
|||
|
||||
mu_tilde = v_tilde/tau_tilde
|
||||
return mu, Sigma, mu_tilde, tau_tilde, Z_hat
|
||||
|
||||
def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs):
|
||||
dL_dpsi0 = -0.5 * output_dim * (beta[:,None] * np.ones([num_data, 1])).flatten()
|
||||
dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T)
|
||||
dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi)
|
||||
if het_noise:
|
||||
if uncertain_inputs:
|
||||
dL_dpsi2 = beta[:, None, None] * dL_dpsi2_beta[None, :, :]
|
||||
else:
|
||||
dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (psi1 * beta.reshape(num_data, 1)).T).T
|
||||
dL_dpsi2 = None
|
||||
else:
|
||||
dL_dpsi2 = beta * dL_dpsi2_beta
|
||||
if uncertain_inputs:
|
||||
# repeat for each of the N psi_2 matrices
|
||||
dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], num_data, axis=0)
|
||||
else:
|
||||
# subsume back into psi1 (==Kmn)
|
||||
dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2)
|
||||
dL_dpsi2 = None
|
||||
|
||||
return dL_dpsi0, dL_dpsi1, dL_dpsi2
|
||||
|
||||
|
||||
def _compute_dL_dR(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT, Y):
|
||||
# the partial derivative vector for the likelihood
|
||||
if likelihood.size == 0:
|
||||
# save computation here.
|
||||
dL_dR = None
|
||||
elif het_noise:
|
||||
if uncertain_inputs:
|
||||
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
|
||||
else:
|
||||
#from ...util.linalg import chol_inv
|
||||
#LBi = chol_inv(LB)
|
||||
LBi, _ = dtrtrs(LB,np.eye(LB.shape[0]))
|
||||
|
||||
Lmi_psi1, nil = dtrtrs(Lm, psi1.T, lower=1, trans=0)
|
||||
_LBi_Lmi_psi1, _ = dtrtrs(LB, Lmi_psi1, lower=1, trans=0)
|
||||
|
||||
dL_dR = -0.5 * beta + 0.5 * (beta*Y)**2
|
||||
dL_dR += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * beta**2
|
||||
|
||||
dL_dR += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*beta**2
|
||||
|
||||
dL_dR += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * Y * beta**2
|
||||
dL_dR += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * beta**2
|
||||
else:
|
||||
# likelihood is not heteroscedatic
|
||||
dL_dR = -0.5 * num_data * output_dim * beta + 0.5 * trYYT * beta ** 2
|
||||
dL_dR += 0.5 * output_dim * (psi0.sum() * beta ** 2 - np.trace(A) * beta)
|
||||
dL_dR += beta * (0.5 * np.sum(A * DBi_plus_BiPBi) - data_fit)
|
||||
return dL_dR
|
||||
|
||||
def _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, psi0, A, LB, trYYT, data_fit,Y):
|
||||
#compute log marginal likelihood
|
||||
if het_noise:
|
||||
lik_1 = -0.5 * num_data * output_dim * np.log(2. * np.pi) + 0.5 * np.sum(np.log(beta)) - 0.5 * np.sum(beta * np.square(Y).sum(axis=-1))
|
||||
lik_2 = -0.5 * output_dim * (np.sum(beta.flatten() * psi0) - np.trace(A))
|
||||
else:
|
||||
lik_1 = -0.5 * num_data * output_dim * (np.log(2. * np.pi) - np.log(beta)) - 0.5 * beta * trYYT
|
||||
lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A))
|
||||
lik_3 = -output_dim * (np.sum(np.log(np.diag(LB))))
|
||||
lik_4 = 0.5 * data_fit
|
||||
log_marginal = lik_1 + lik_2 + lik_3 + lik_4
|
||||
return log_marginal
|
||||
|
|
|
|||
|
|
@ -5,9 +5,10 @@ from posterior import Posterior
|
|||
from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv
|
||||
from ...util import diag
|
||||
import numpy as np
|
||||
from . import LatentFunctionInference
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
class FITC(object):
|
||||
class FITC(LatentFunctionInference):
|
||||
"""
|
||||
An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
|
||||
|
||||
|
|
|
|||
|
|
@ -16,8 +16,9 @@ from ...util.misc import param_to_array
|
|||
from posterior import Posterior
|
||||
import warnings
|
||||
from scipy import optimize
|
||||
from . import LatentFunctionInference
|
||||
|
||||
class Laplace(object):
|
||||
class Laplace(LatentFunctionInference):
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ class Posterior(object):
|
|||
"""
|
||||
if self._covariance is None:
|
||||
#LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1)
|
||||
self._covariance = self._K - (np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T).squeeze()
|
||||
self._covariance = (np.atleast_3d(self._K) - np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T).squeeze()
|
||||
#self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
|
||||
return self._covariance
|
||||
|
||||
|
|
|
|||
|
|
@ -7,9 +7,10 @@ from ...util import diag
|
|||
from ...core.parameterization.variational import VariationalPosterior
|
||||
import numpy as np
|
||||
from ...util.misc import param_to_array
|
||||
from . import LatentFunctionInference
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
class VarDTC(object):
|
||||
class VarDTC(LatentFunctionInference):
|
||||
"""
|
||||
An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
|
||||
|
||||
|
|
@ -190,7 +191,7 @@ class VarDTC(object):
|
|||
post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
|
||||
return post, log_marginal, grad_dict
|
||||
|
||||
class VarDTCMissingData(object):
|
||||
class VarDTCMissingData(LatentFunctionInference):
|
||||
const_jitter = 1e-6
|
||||
def __init__(self, limit=1, inan=None):
|
||||
from ...util.caching import Cacher
|
||||
|
|
@ -201,6 +202,17 @@ class VarDTCMissingData(object):
|
|||
def set_limit(self, limit):
|
||||
self._Y.limit = limit
|
||||
|
||||
def __getstate__(self):
|
||||
# has to be overridden, as Cacher objects cannot be pickled.
|
||||
return self._Y.limit, self._inan
|
||||
|
||||
def __setstate__(self, state):
|
||||
# has to be overridden, as Cacher objects cannot be pickled.
|
||||
from ...util.caching import Cacher
|
||||
self.limit = state[0]
|
||||
self._inan = state[1]
|
||||
self._Y = Cacher(self._subarray_computations, self.limit)
|
||||
|
||||
def _subarray_computations(self, Y):
|
||||
if self._inan is None:
|
||||
inan = np.isnan(Y)
|
||||
|
|
@ -271,7 +283,11 @@ class VarDTCMissingData(object):
|
|||
else: beta = beta_all
|
||||
|
||||
VVT_factor = (beta*y)
|
||||
VVT_factor_all[v, ind].flat = VVT_factor.flat
|
||||
try:
|
||||
VVT_factor_all[v, ind].flat = VVT_factor.flat
|
||||
except ValueError:
|
||||
mult = np.ravel_multi_index((v.nonzero()[0][:,None],ind[None,:]), VVT_factor_all.shape)
|
||||
VVT_factor_all.flat[mult] = VVT_factor
|
||||
output_dim = y.shape[1]
|
||||
|
||||
psi0 = psi0_all[v]
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ from ...util import diag
|
|||
from ...core.parameterization.variational import VariationalPosterior
|
||||
import numpy as np
|
||||
from ...util.misc import param_to_array
|
||||
from . import LatentFunctionInference
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
from ...util import gpu_init
|
||||
|
|
@ -19,7 +20,7 @@ try:
|
|||
except:
|
||||
pass
|
||||
|
||||
class VarDTC_GPU(object):
|
||||
class VarDTC_GPU(LatentFunctionInference):
|
||||
"""
|
||||
An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
|
||||
|
||||
|
|
|
|||
|
|
@ -7,9 +7,10 @@ from ...util import diag
|
|||
from ...core.parameterization.variational import VariationalPosterior
|
||||
import numpy as np
|
||||
from ...util.misc import param_to_array
|
||||
from . import LatentFunctionInference
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
class VarDTC_minibatch(object):
|
||||
class VarDTC_minibatch(LatentFunctionInference):
|
||||
"""
|
||||
An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
|
||||
|
||||
|
|
@ -70,12 +71,13 @@ class VarDTC_minibatch(object):
|
|||
#see whether we've got a different noise variance for each datum
|
||||
beta = 1./np.fmax(likelihood.variance, 1e-6)
|
||||
het_noise = beta.size > 1
|
||||
if het_noise:
|
||||
self.batchsize = 1
|
||||
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
|
||||
#self.YYTfactor = beta*self.get_YYTfactor(Y)
|
||||
YYT_factor = Y
|
||||
trYYT = self.get_trYYT(Y)
|
||||
|
||||
|
||||
psi2_full = np.zeros((num_inducing,num_inducing))
|
||||
psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM
|
||||
psi0_full = 0
|
||||
|
|
@ -104,19 +106,18 @@ class VarDTC_minibatch(object):
|
|||
YRY_full += (beta_slice*np.square(Y_slice).sum(axis=-1)).sum()
|
||||
else:
|
||||
psi0_full += psi0.sum()
|
||||
psi1Y_full += np.dot(Y_slice.T,psi1) # DxM
|
||||
|
||||
psi1Y_full += np.dot(Y_slice.T,psi1) # DxM
|
||||
|
||||
if uncertain_inputs:
|
||||
if het_noise:
|
||||
psi2_full += np.einsum('n,nmo->mo',beta_slice,psi2)
|
||||
psi2_full += beta_slice*psi2
|
||||
else:
|
||||
psi2_full += psi2.sum(axis=0)
|
||||
psi2_full += psi2
|
||||
else:
|
||||
if het_noise:
|
||||
psi2_full += np.einsum('n,nm,no->mo',beta_slice,psi1,psi1)
|
||||
psi2_full += beta_slice*np.outer(psi1,psi1)
|
||||
else:
|
||||
psi2_full += tdot(psi1.T)
|
||||
psi2_full += np.outer(psi1,psi1)
|
||||
|
||||
if not het_noise:
|
||||
psi0_full *= beta
|
||||
|
|
@ -223,7 +224,7 @@ class VarDTC_minibatch(object):
|
|||
psi2 = None
|
||||
|
||||
if het_noise:
|
||||
beta = beta[n_start:n_end]
|
||||
beta = beta[n_start] # assuming batchsize==1
|
||||
|
||||
betaY = beta*Y_slice
|
||||
betapsi1 = np.einsum('n,nm->nm',beta,psi1)
|
||||
|
|
@ -244,7 +245,7 @@ class VarDTC_minibatch(object):
|
|||
dL_dpsi1 = np.dot(betaY,v.T)
|
||||
|
||||
if uncertain_inputs:
|
||||
dL_dpsi2 = np.einsum('n,mo->nmo',beta * np.ones((n_end-n_start,)),dL_dpsi2R)
|
||||
dL_dpsi2 = beta* dL_dpsi2R
|
||||
else:
|
||||
dL_dpsi1 += np.dot(betapsi1,dL_dpsi2R)*2.
|
||||
dL_dpsi2 = None
|
||||
|
|
@ -262,11 +263,11 @@ class VarDTC_minibatch(object):
|
|||
dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0)-output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum(axis=-1)
|
||||
else:
|
||||
if uncertain_inputs:
|
||||
psiR = np.einsum('mo,nmo->',dL_dpsi2R,psi2)
|
||||
psiR = np.einsum('mo,mo->',dL_dpsi2R,psi2)
|
||||
else:
|
||||
psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R)
|
||||
|
||||
dL_dthetaL = ((np.square(betaY)).sum() + np.square(beta)*output_dim*(psi0.sum())-num_slice*output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum()
|
||||
dL_dthetaL = ((np.square(betaY)).sum() + beta*beta*output_dim*(psi0.sum())-num_slice*output_dim*beta)/2. - beta*beta*psiR- (betaY*np.dot(betapsi1,v)).sum()
|
||||
|
||||
if uncertain_inputs:
|
||||
grad_dict = {'dL_dpsi0':dL_dpsi0,
|
||||
|
|
@ -296,7 +297,7 @@ def update_gradients(model):
|
|||
kern_grad = model.kern.gradient.copy()
|
||||
|
||||
#gradients w.r.t. Z
|
||||
model.Z.gradient[:,model.kern.active_dims] = model.kern.gradients_X(dL_dKmm, model.Z)
|
||||
model.Z.gradient = model.kern.gradients_X(dL_dKmm, model.Z)
|
||||
|
||||
isEnd = False
|
||||
while not isEnd:
|
||||
|
|
@ -309,8 +310,8 @@ def update_gradients(model):
|
|||
kern_grad += model.kern.gradient
|
||||
|
||||
#gradients w.r.t. Z
|
||||
model.Z.gradient[:,model.kern.active_dims] += model.kern.gradients_Z_expectations(
|
||||
grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice)
|
||||
model.Z.gradient += model.kern.gradients_Z_expectations(
|
||||
dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice)
|
||||
|
||||
#gradients w.r.t. posterior parameters of X
|
||||
X_grad = model.kern.gradients_qX_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ class Add(CombinationKernel):
|
|||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
|
||||
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
from static import White, Bias
|
||||
target = np.zeros(Z.shape)
|
||||
for p1 in self.parts:
|
||||
|
|
@ -134,17 +134,17 @@ class Add(CombinationKernel):
|
|||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
else:
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||
target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
target += p1.gradients_Z_expectations(dL_psi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
return target
|
||||
|
||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
from static import White, Bias
|
||||
target_mu = np.zeros(variational_posterior.shape)
|
||||
target_S = np.zeros(variational_posterior.shape)
|
||||
for p1 in self._parameters_:
|
||||
for p1 in self.parameters:
|
||||
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
|
||||
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||
for p2 in self._parameters_:
|
||||
for p2 in self.parameters:
|
||||
if p2 is p1:
|
||||
continue
|
||||
if isinstance(p2, White):
|
||||
|
|
@ -160,7 +160,7 @@ class Add(CombinationKernel):
|
|||
|
||||
def add(self, other, name='sum'):
|
||||
if isinstance(other, Add):
|
||||
other_params = other._parameters_[:]
|
||||
other_params = other.parameters[:]
|
||||
for p in other_params:
|
||||
other.remove_parameter(p)
|
||||
self.add_parameters(*other_params)
|
||||
|
|
@ -170,4 +170,4 @@ class Add(CombinationKernel):
|
|||
return self
|
||||
|
||||
def input_sensitivity(self):
|
||||
return reduce(np.add, [k.input_sensitivity() for k in self.parts])
|
||||
return reduce(np.add, [k.input_sensitivity() for k in self.parts])
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ class Kern(Parameterized):
|
|||
self._sliced_X = 0
|
||||
self.useGPU = self._support_GPU and useGPU
|
||||
|
||||
@Cache_this(limit=10)
|
||||
@Cache_this(limit=20)
|
||||
def _slice_X(self, X):
|
||||
return X[:, self.active_dims]
|
||||
|
||||
|
|
@ -103,7 +103,7 @@ class Kern(Parameterized):
|
|||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
"""
|
||||
Returns the derivative of the objective wrt Z, using the chain rule
|
||||
through the expectation variables.
|
||||
|
|
@ -183,9 +183,9 @@ class Kern(Parameterized):
|
|||
assert isinstance(other, Kern), "only kernels can be added to kernels..."
|
||||
from prod import Prod
|
||||
#kernels = []
|
||||
#if isinstance(self, Prod): kernels.extend(self._parameters_)
|
||||
#if isinstance(self, Prod): kernels.extend(self.parameters)
|
||||
#else: kernels.append(self)
|
||||
#if isinstance(other, Prod): kernels.extend(other._parameters_)
|
||||
#if isinstance(other, Prod): kernels.extend(other.parameters)
|
||||
#else: kernels.append(other)
|
||||
return Prod([self, other], name)
|
||||
|
||||
|
|
@ -222,7 +222,7 @@ class CombinationKernel(Kern):
|
|||
|
||||
@property
|
||||
def parts(self):
|
||||
return self._parameters_
|
||||
return self.parameters
|
||||
|
||||
def get_input_dim_active_dims(self, kernels, extra_dims = None):
|
||||
#active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int))
|
||||
|
|
|
|||
|
|
@ -124,9 +124,9 @@ def _slice_update_gradients_expectations(f):
|
|||
|
||||
def _slice_gradients_Z_expectations(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
with _Slice_wrap(self, Z, variational_posterior) as s:
|
||||
ret = s.handle_return_array(f(self, dL_dpsi1, dL_dpsi2, s.X, s.X2))
|
||||
ret = s.handle_return_array(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2))
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
|
|
|
|||
|
|
@ -169,7 +169,7 @@ class Linear(Kern):
|
|||
else:
|
||||
self.variances.gradient += 2.*np.sum(dL_dpsi2 * self.psi2(Z, variational_posterior))/self.variances
|
||||
|
||||
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
gamma = variational_posterior.binary_prob
|
||||
mu = variational_posterior.mean
|
||||
|
|
|
|||
|
|
@ -9,12 +9,23 @@ import numpy as np
|
|||
from GPy.util.caching import Cache_this
|
||||
|
||||
@Cache_this(limit=1)
|
||||
def _Z_distances(Z):
|
||||
Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
||||
Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
||||
return Zhat, Zdist
|
||||
def psicomputations(variance, lengthscale, Z, mu, S, gamma):
|
||||
"""
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
gamma - NxQ
|
||||
"""
|
||||
# here are the "statistics" for psi0, psi1 and psi2
|
||||
# Produced intermediate results:
|
||||
# _psi1 NxM
|
||||
|
||||
psi0 = np.empty(mu.shape[0])
|
||||
psi0[:] = variance
|
||||
psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma)
|
||||
psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma)
|
||||
return psi0, psi1, psi2
|
||||
|
||||
@Cache_this(limit=1)
|
||||
def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
|
||||
"""
|
||||
Z - MxQ
|
||||
|
|
@ -22,15 +33,10 @@ def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
|
|||
S - NxQ
|
||||
gamma - NxQ
|
||||
"""
|
||||
# here are the "statistics" for psi1 and psi2
|
||||
# here are the "statistics" for psi1
|
||||
# Produced intermediate results:
|
||||
# _psi1 NxM
|
||||
# _dpsi1_dvariance NxM
|
||||
# _dpsi1_dlengthscale NxMxQ
|
||||
# _dpsi1_dZ NxMxQ
|
||||
# _dpsi1_dgamma NxMxQ
|
||||
# _dpsi1_dmu NxMxQ
|
||||
# _dpsi1_dS NxMxQ
|
||||
|
||||
|
||||
lengthscale2 = np.square(lengthscale)
|
||||
|
||||
|
|
@ -40,25 +46,15 @@ def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
|
|||
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
|
||||
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ
|
||||
_psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ
|
||||
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom)) # NxMxQ
|
||||
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
|
||||
_psi1_exponent1 = np.log(gamma[:,None,:]) - (_psi1_dist_sq + np.log(_psi1_denom))/2. # NxMxQ
|
||||
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) - (np.square(Z[None,:,:])/lengthscale2)/2. # NxMxQ
|
||||
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
|
||||
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
|
||||
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
|
||||
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
|
||||
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
|
||||
_psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
|
||||
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
|
||||
_dpsi1_dvariance = _psi1 / variance # NxM
|
||||
_dpsi1_dgamma = _psi1_q * (_psi1_exp_dist_sq/_psi1_denom_sqrt-_psi1_exp_Z) # NxMxQ
|
||||
_dpsi1_dmu = _psi1_q * (_psi1_exp_dist_sq * _psi1_dist * _psi1_common) # NxMxQ
|
||||
_dpsi1_dS = _psi1_q * (_psi1_exp_dist_sq * _psi1_common * 0.5 * (_psi1_dist_sq - 1.)) # NxMxQ
|
||||
_dpsi1_dZ = _psi1_q * (- _psi1_common * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z) # NxMxQ
|
||||
_dpsi1_dlengthscale = 2.*lengthscale*_psi1_q * (0.5*_psi1_common*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + 0.5*(1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z) # NxMxQ
|
||||
|
||||
return _psi1, _dpsi1_dvariance, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _dpsi1_dZ, _dpsi1_dlengthscale
|
||||
return _psi1
|
||||
|
||||
@Cache_this(limit=1)
|
||||
def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
|
||||
"""
|
||||
Z - MxQ
|
||||
|
|
@ -66,19 +62,14 @@ def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
|
|||
S - NxQ
|
||||
gamma - NxQ
|
||||
"""
|
||||
# here are the "statistics" for psi1 and psi2
|
||||
# here are the "statistics" for psi2
|
||||
# Produced intermediate results:
|
||||
# _psi2 NxMxM
|
||||
# _psi2_dvariance NxMxM
|
||||
# _psi2_dlengthscale NxMxMxQ
|
||||
# _psi2_dZ NxMxMxQ
|
||||
# _psi2_dgamma NxMxMxQ
|
||||
# _psi2_dmu NxMxMxQ
|
||||
# _psi2_dS NxMxMxQ
|
||||
# _psi2 MxM
|
||||
|
||||
lengthscale2 = np.square(lengthscale)
|
||||
|
||||
_psi2_Zhat, _psi2_Zdist = _Z_distances(Z)
|
||||
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
||||
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
||||
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
|
||||
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
|
||||
|
||||
|
|
@ -93,15 +84,130 @@ def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
|
|||
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
|
||||
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
|
||||
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
|
||||
_psi2_q = np.square(variance) * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
|
||||
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
|
||||
|
||||
return _psi2
|
||||
|
||||
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||
ARD = (len(lengthscale)!=1)
|
||||
|
||||
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
|
||||
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
|
||||
|
||||
dL_dlengscale = dl_psi1 + dl_psi2
|
||||
if not ARD:
|
||||
dL_dlengscale = dL_dlengscale.sum()
|
||||
|
||||
dL_dgamma = dgamma_psi1 + dgamma_psi2
|
||||
dL_dmu = dmu_psi1 + dmu_psi2
|
||||
dL_dS = dS_psi1 + dS_psi2
|
||||
dL_dZ = dZ_psi1 + dZ_psi2
|
||||
|
||||
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma
|
||||
|
||||
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma):
|
||||
"""
|
||||
dL_dpsi1 - NxM
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
gamma - NxQ
|
||||
"""
|
||||
# here are the "statistics" for psi1
|
||||
# Produced intermediate results: dL_dparams w.r.t. psi1
|
||||
# _dL_dvariance 1
|
||||
# _dL_dlengthscale Q
|
||||
# _dL_dZ MxQ
|
||||
# _dL_dgamma NxQ
|
||||
# _dL_dmu NxQ
|
||||
# _dL_dS NxQ
|
||||
|
||||
lengthscale2 = np.square(lengthscale)
|
||||
|
||||
# psi1
|
||||
_psi1_denom = S / lengthscale2 + 1. # NxQ
|
||||
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ
|
||||
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
|
||||
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom[:,None,:]) # NxMxQ
|
||||
_psi1_common = gamma / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #NxQ
|
||||
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom[:, None,:])) # NxMxQ
|
||||
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
|
||||
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
|
||||
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
|
||||
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
|
||||
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
|
||||
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
|
||||
_psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
|
||||
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
|
||||
_dL_dvariance = np.einsum('nm,nm->',dL_dpsi1, _psi1)/variance # 1
|
||||
_dL_dgamma = np.einsum('nm,nmq,nmq->nq',dL_dpsi1, _psi1_q, (_psi1_exp_dist_sq/_psi1_denom_sqrt[:,None,:]-_psi1_exp_Z)) # NxQ
|
||||
_dL_dmu = np.einsum('nm, nmq, nmq, nmq, nq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_dist,_psi1_common) # NxQ
|
||||
_dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ
|
||||
_dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z))
|
||||
_dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z))
|
||||
|
||||
# _dpsi1_dmu = _psi1_q * (_psi1_exp_dist_sq * _psi1_dist * _psi1_common) # NxMxQ
|
||||
# _dpsi1_dS = _psi1_q * (_psi1_exp_dist_sq * _psi1_common * 0.5 * (_psi1_dist_sq - 1.)) # NxMxQ
|
||||
# _dpsi1_dZ = _psi1_q * (- _psi1_common * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z) # NxMxQ
|
||||
# _dpsi1_dlengthscale = 2.*lengthscale*_psi1_q * (0.5*_psi1_common*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + 0.5*(1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z) # NxMxQ
|
||||
|
||||
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
|
||||
|
||||
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma):
|
||||
"""
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
gamma - NxQ
|
||||
dL_dpsi2 - MxM
|
||||
"""
|
||||
# here are the "statistics" for psi2
|
||||
# Produced the derivatives w.r.t. psi2:
|
||||
# _dL_dvariance 1
|
||||
# _dL_dlengthscale Q
|
||||
# _dL_dZ MxQ
|
||||
# _dL_dgamma NxQ
|
||||
# _dL_dmu NxQ
|
||||
# _dL_dS NxQ
|
||||
|
||||
lengthscale2 = np.square(lengthscale)
|
||||
|
||||
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
||||
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
||||
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
|
||||
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
|
||||
|
||||
# psi2
|
||||
_psi2_denom = 2.*S / lengthscale2 + 1. # NxQ
|
||||
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
|
||||
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
|
||||
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom[:,None,None,:])
|
||||
_psi2_common = gamma/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # NxQ
|
||||
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom[:,None,None,:])+np.log(gamma[:,None,None,:]) #N,M,M,Q
|
||||
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
|
||||
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
|
||||
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
|
||||
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
|
||||
_psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
|
||||
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
|
||||
_psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
|
||||
_psi2 = np.square(variance) * np.exp(_psi2_exp_sum) # N,M,M
|
||||
_dpsi2_dvariance = 2. * _psi2/variance # NxMxM
|
||||
_dpsi2_dgamma = _psi2_q * (_psi2_exp_dist_sq/_psi2_denom_sqrt - _psi2_exp_Z) # NxMxMxQ
|
||||
_dpsi2_dmu = _psi2_q * (-2.*_psi2_common*_psi2_mudist * _psi2_exp_dist_sq) # NxMxMxQ
|
||||
_dpsi2_dS = _psi2_q * (_psi2_common * (2.*_psi2_mudist_sq - 1.) * _psi2_exp_dist_sq) # NxMxMxQ
|
||||
_dpsi2_dZ = 2.*_psi2_q * (_psi2_common*(-_psi2_Zdist*_psi2_denom+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z) # NxMxMxQ
|
||||
_dpsi2_dlengthscale = 2.*lengthscale* _psi2_q * (_psi2_common*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) # NxMxMxQ
|
||||
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
|
||||
_dL_dvariance = np.einsum('mo,mo->',dL_dpsi2,_psi2)*2./variance
|
||||
_dL_dgamma = np.einsum('mo,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,(_psi2_exp_dist_sq/_psi2_denom_sqrt[:,None,None,:] - _psi2_exp_Z))
|
||||
_dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq)
|
||||
_dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq)
|
||||
_dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z))
|
||||
# print _psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq #+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z)
|
||||
_dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z))
|
||||
|
||||
|
||||
# _dpsi2_dvariance = 2. * _psi2/variance # NxMxM
|
||||
# _dpsi2_dgamma = _psi2_q * (_psi2_exp_dist_sq/_psi2_denom_sqrt - _psi2_exp_Z) # NxMxMxQ
|
||||
# _dpsi2_dmu = _psi2_q * (-2.*_psi2_common*_psi2_mudist * _psi2_exp_dist_sq) # NxMxMxQ
|
||||
# _dpsi2_dS = _psi2_q * (_psi2_common * (2.*_psi2_mudist_sq - 1.) * _psi2_exp_dist_sq) # NxMxMxQ
|
||||
# _dpsi2_dZ = 2.*_psi2_q * (_psi2_common*(-_psi2_Zdist*_psi2_denom+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z) # NxMxMxQ
|
||||
# _dpsi2_dlengthscale = 2.*lengthscale* _psi2_q * (_psi2_common*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) # NxMxMxQ
|
||||
|
||||
return _psi2, _dpsi2_dvariance, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _dpsi2_dZ, _dpsi2_dlengthscale
|
||||
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
|
||||
|
|
|
|||
|
|
@ -42,9 +42,11 @@ class RBF(Stationary):
|
|||
#---------------------------------------#
|
||||
|
||||
def psi0(self, Z, variational_posterior):
|
||||
if self.useGPU:
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
if self.useGPU:
|
||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0]
|
||||
else:
|
||||
return ssrbf_psi_comp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0]
|
||||
else:
|
||||
return self.Kdiag(variational_posterior.mean)
|
||||
|
||||
|
|
@ -53,7 +55,7 @@ class RBF(Stationary):
|
|||
if self.useGPU:
|
||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1]
|
||||
else:
|
||||
psi1, _, _, _, _, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
return ssrbf_psi_comp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1]
|
||||
else:
|
||||
_, _, _, psi1 = self._psi1computations(Z, variational_posterior)
|
||||
return psi1
|
||||
|
|
@ -63,7 +65,7 @@ class RBF(Stationary):
|
|||
if self.useGPU:
|
||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2]
|
||||
else:
|
||||
psi2, _, _, _, _, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
return ssrbf_psi_comp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2]
|
||||
else:
|
||||
_, _, _, _, psi2 = self._psi2computations(Z, variational_posterior)
|
||||
return psi2
|
||||
|
|
@ -74,26 +76,30 @@ class RBF(Stationary):
|
|||
if self.useGPU:
|
||||
self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||
else:
|
||||
# dL_dvar, dL_dlengscale, dL_dZ, dL_dgamma, dL_dmu, dL_dS = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||
dL_dvar, dL_dlengscale, _, _, _, _ = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||
self.variance.gradient = dL_dvar
|
||||
self.lengthscale.gradient = dL_dlengscale
|
||||
|
||||
_, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
_, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
|
||||
#contributions from psi0:
|
||||
self.variance.gradient = np.sum(dL_dpsi0)
|
||||
|
||||
#from psi1
|
||||
self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance)
|
||||
if self.ARD:
|
||||
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
|
||||
else:
|
||||
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum()
|
||||
|
||||
#from psi2
|
||||
self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum()
|
||||
if self.ARD:
|
||||
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
|
||||
else:
|
||||
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum()
|
||||
# _, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
# _, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
#
|
||||
# #contributions from psi0:
|
||||
# self.variance.gradient = np.sum(dL_dpsi0)
|
||||
#
|
||||
# #from psi1
|
||||
# self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance)
|
||||
# if self.ARD:
|
||||
# self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
|
||||
# else:
|
||||
# self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum()
|
||||
#
|
||||
# #from psi2
|
||||
# self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum()
|
||||
# if self.ARD:
|
||||
# self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
|
||||
# else:
|
||||
# self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum()
|
||||
|
||||
elif isinstance(variational_posterior, variational.NormalPosterior):
|
||||
l2 = self.lengthscale**2
|
||||
|
|
@ -126,22 +132,25 @@ class RBF(Stationary):
|
|||
else:
|
||||
raise ValueError, "unknown distriubtion received for psi-statistics"
|
||||
|
||||
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
# Spike-and-Slab GPLVM
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
if self.useGPU:
|
||||
return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||
else:
|
||||
_, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
_, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
|
||||
#psi1
|
||||
grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0)
|
||||
|
||||
#psi2
|
||||
grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1)
|
||||
|
||||
return grad
|
||||
_, _, dL_dZ, _, _, _ = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||
return dL_dZ
|
||||
|
||||
# _, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
# _, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
#
|
||||
# #psi1
|
||||
# grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0)
|
||||
#
|
||||
# #psi2
|
||||
# grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1)
|
||||
#
|
||||
# return grad
|
||||
|
||||
elif isinstance(variational_posterior, variational.NormalPosterior):
|
||||
l2 = self.lengthscale **2
|
||||
|
|
@ -167,26 +176,29 @@ class RBF(Stationary):
|
|||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
if self.useGPU:
|
||||
return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||
else:
|
||||
ndata = variational_posterior.mean.shape[0]
|
||||
|
||||
_, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
_, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
|
||||
#psi1
|
||||
grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
|
||||
grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
|
||||
grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
|
||||
|
||||
#psi2
|
||||
grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
|
||||
grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
|
||||
grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1)
|
||||
|
||||
if self.group_spike_prob:
|
||||
grad_gamma[:] = grad_gamma.mean(axis=0)
|
||||
|
||||
return grad_mu, grad_S, grad_gamma
|
||||
else:
|
||||
_, _, _, dL_dmu, dL_dS, dL_dgamma = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||
return dL_dmu, dL_dS, dL_dgamma
|
||||
|
||||
# ndata = variational_posterior.mean.shape[0]
|
||||
#
|
||||
# _, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
# _, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
#
|
||||
# #psi1
|
||||
# grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
|
||||
# grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
|
||||
# grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
|
||||
#
|
||||
# #psi2
|
||||
# grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
|
||||
# grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
|
||||
# grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1)
|
||||
#
|
||||
# if self.group_spike_prob:
|
||||
# grad_gamma[:] = grad_gamma.mean(axis=0)
|
||||
#
|
||||
# return grad_mu, grad_S, grad_gamma
|
||||
|
||||
elif isinstance(variational_posterior, variational.NormalPosterior):
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ class Static(Kern):
|
|||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
return np.zeros(X.shape)
|
||||
|
||||
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
return np.zeros(Z.shape)
|
||||
|
||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
|
|
|
|||
|
|
@ -180,7 +180,7 @@ class Stationary(Kern):
|
|||
return np.zeros(X.shape)
|
||||
|
||||
def input_sensitivity(self):
|
||||
return np.ones(self.input_dim)/self.lengthscale
|
||||
return np.ones(self.input_dim)/self.lengthscale**2
|
||||
|
||||
class Exponential(Stationary):
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Exponential'):
|
||||
|
|
|
|||
|
|
@ -227,3 +227,6 @@ class Bernoulli(Likelihood):
|
|||
ns = np.ones_like(gp, dtype=int)
|
||||
Ysim = np.random.binomial(ns, self.gp_link.transf(gp))
|
||||
return Ysim.reshape(orig_shape)
|
||||
|
||||
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ from ..util import linalg
|
|||
from ..core.parameterization.variational import NormalPosterior, NormalPrior, VariationalPosterior
|
||||
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients
|
||||
from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
|
||||
import logging
|
||||
|
||||
class BayesianGPLVM(SparseGP):
|
||||
"""
|
||||
|
|
@ -25,8 +26,10 @@ class BayesianGPLVM(SparseGP):
|
|||
"""
|
||||
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
||||
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs):
|
||||
self.logger = logging.getLogger(self.__class__.__name__)
|
||||
if X == None:
|
||||
from ..util.initialization import initialize_latent
|
||||
self.logger.info("initializing latent space X with method {}".format(init))
|
||||
X, fracs = initialize_latent(init, input_dim, Y)
|
||||
else:
|
||||
fracs = np.ones(input_dim)
|
||||
|
|
@ -36,7 +39,6 @@ class BayesianGPLVM(SparseGP):
|
|||
if X_variance is None:
|
||||
X_variance = np.random.uniform(0,.1,X.shape)
|
||||
|
||||
|
||||
if Z is None:
|
||||
Z = np.random.permutation(X.copy())[:num_inducing]
|
||||
assert Z.shape[1] == X.shape[1]
|
||||
|
|
@ -52,11 +54,14 @@ class BayesianGPLVM(SparseGP):
|
|||
X = NormalPosterior(X, X_variance)
|
||||
|
||||
if inference_method is None:
|
||||
if np.any(np.isnan(Y)):
|
||||
inan = np.isnan(Y)
|
||||
if np.any(inan):
|
||||
from ..inference.latent_function_inference.var_dtc import VarDTCMissingData
|
||||
inference_method = VarDTCMissingData()
|
||||
self.logger.debug("creating inference_method with var_dtc missing data")
|
||||
inference_method = VarDTCMissingData(inan=inan)
|
||||
else:
|
||||
from ..inference.latent_function_inference.var_dtc import VarDTC
|
||||
self.logger.debug("creating inference_method var_dtc")
|
||||
inference_method = VarDTC()
|
||||
|
||||
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
|
||||
|
|
@ -83,7 +88,7 @@ class BayesianGPLVM(SparseGP):
|
|||
resolution=50, ax=None, marker='o', s=40,
|
||||
fignum=None, plot_inducing=True, legend=True,
|
||||
plot_limits=None,
|
||||
aspect='auto', updates=False, **kwargs):
|
||||
aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}):
|
||||
import sys
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import dim_reduction_plots
|
||||
|
|
@ -91,7 +96,7 @@ class BayesianGPLVM(SparseGP):
|
|||
return dim_reduction_plots.plot_latent(self, labels, which_indices,
|
||||
resolution, ax, marker, s,
|
||||
fignum, plot_inducing, legend,
|
||||
plot_limits, aspect, updates, **kwargs)
|
||||
plot_limits, aspect, updates, predict_kwargs, imshow_kwargs)
|
||||
|
||||
def do_test_latents(self, Y):
|
||||
"""
|
||||
|
|
@ -100,36 +105,41 @@ class BayesianGPLVM(SparseGP):
|
|||
Notes:
|
||||
This will only work with a univariate Gaussian likelihood (for now)
|
||||
"""
|
||||
assert not self.likelihood.is_heteroscedastic
|
||||
N_test = Y.shape[0]
|
||||
input_dim = self.Z.shape[1]
|
||||
|
||||
means = np.zeros((N_test, input_dim))
|
||||
covars = np.zeros((N_test, input_dim))
|
||||
|
||||
dpsi0 = -0.5 * self.input_dim * self.likelihood.precision
|
||||
dpsi2 = self.dL_dpsi2[0][None, :, :] # TODO: this may change if we ignore het. likelihoods
|
||||
V = self.likelihood.precision * Y
|
||||
|
||||
dpsi0 = -0.5 * self.input_dim / self.likelihood.variance
|
||||
dpsi2 = self.grad_dict['dL_dpsi2'][0][None, :, :] # TODO: this may change if we ignore het. likelihoods
|
||||
V = Y/self.likelihood.variance
|
||||
|
||||
#compute CPsi1V
|
||||
if self.Cpsi1V is None:
|
||||
psi1V = np.dot(self.psi1.T, self.likelihood.V)
|
||||
tmp, _ = linalg.dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0)
|
||||
tmp, _ = linalg.dpotrs(self.LB, tmp, lower=1)
|
||||
self.Cpsi1V, _ = linalg.dtrtrs(self._Lm, tmp, lower=1, trans=1)
|
||||
#if self.Cpsi1V is None:
|
||||
# psi1V = np.dot(self.psi1.T, self.likelihood.V)
|
||||
# tmp, _ = linalg.dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0)
|
||||
# tmp, _ = linalg.dpotrs(self.LB, tmp, lower=1)
|
||||
# self.Cpsi1V, _ = linalg.dtrtrs(self._Lm, tmp, lower=1, trans=1)
|
||||
|
||||
dpsi1 = np.dot(self.Cpsi1V, V.T)
|
||||
dpsi1 = np.dot(self.posterior.woodbury_vector, V.T)
|
||||
|
||||
start = np.zeros(self.input_dim * 2)
|
||||
#start = np.zeros(self.input_dim * 2)
|
||||
|
||||
|
||||
from scipy.optimize import minimize
|
||||
|
||||
for n, dpsi1_n in enumerate(dpsi1.T[:, :, None]):
|
||||
args = (self.kern, self.Z, dpsi0, dpsi1_n.T, dpsi2)
|
||||
xopt, fopt, neval, status = SCG(f=latent_cost, gradf=latent_grad, x=start, optargs=args, display=False)
|
||||
|
||||
args = (input_dim, self.kern.copy(), self.Z, dpsi0, dpsi1_n.T, dpsi2)
|
||||
res = minimize(latent_cost_and_grad, jac=True, x0=np.hstack((means[n], covars[n])), args=args, method='BFGS')
|
||||
xopt = res.x
|
||||
mu, log_S = xopt.reshape(2, 1, -1)
|
||||
means[n] = mu[0].copy()
|
||||
covars[n] = np.exp(log_S[0]).copy()
|
||||
|
||||
return means, covars
|
||||
X = NormalPosterior(means, covars)
|
||||
|
||||
return X
|
||||
|
||||
def dmu_dX(self, Xnew):
|
||||
"""
|
||||
|
|
@ -161,57 +171,26 @@ class BayesianGPLVM(SparseGP):
|
|||
return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs)
|
||||
|
||||
|
||||
def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
|
||||
def latent_cost_and_grad(mu_S, input_dim, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
|
||||
"""
|
||||
objective function for fitting the latent variables for test points
|
||||
(negative log-likelihood: should be minimised!)
|
||||
"""
|
||||
mu, log_S = mu_S.reshape(2, 1, -1)
|
||||
mu = mu_S[:input_dim][None]
|
||||
log_S = mu_S[input_dim:][None]
|
||||
S = np.exp(log_S)
|
||||
|
||||
psi0 = kern.psi0(Z, mu, S)
|
||||
psi1 = kern.psi1(Z, mu, S)
|
||||
psi2 = kern.psi2(Z, mu, S)
|
||||
X = NormalPosterior(mu, S)
|
||||
|
||||
lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
|
||||
psi0 = kern.psi0(Z, X)
|
||||
psi1 = kern.psi1(Z, X)
|
||||
psi2 = kern.psi2(Z, X)
|
||||
|
||||
mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S)
|
||||
mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S)
|
||||
mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S)
|
||||
|
||||
dmu = mu0 + mu1 + mu2 - mu
|
||||
lik = dL_dpsi0 * psi0.sum() + np.einsum('ij,kj->...', dL_dpsi1, psi1) + np.einsum('ijk,lkj->...', dL_dpsi2, psi2) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
|
||||
|
||||
dLdmu, dLdS = kern.gradients_qX_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, X)
|
||||
dmu = dLdmu - mu
|
||||
# dS = S0 + S1 + S2 -0.5 + .5/S
|
||||
dlnS = S * (S0 + S1 + S2 - 0.5) + .5
|
||||
dlnS = S * (dLdS - 0.5) + .5
|
||||
|
||||
return -lik, -np.hstack((dmu.flatten(), dlnS.flatten()))
|
||||
|
||||
def latent_cost(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
|
||||
"""
|
||||
objective function for fitting the latent variables (negative log-likelihood: should be minimised!)
|
||||
This is the same as latent_cost_and_grad but only for the objective
|
||||
"""
|
||||
mu, log_S = mu_S.reshape(2, 1, -1)
|
||||
S = np.exp(log_S)
|
||||
|
||||
psi0 = kern.psi0(Z, mu, S)
|
||||
psi1 = kern.psi1(Z, mu, S)
|
||||
psi2 = kern.psi2(Z, mu, S)
|
||||
|
||||
lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
|
||||
return -float(lik)
|
||||
|
||||
def latent_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
|
||||
"""
|
||||
This is the same as latent_cost_and_grad but only for the grad
|
||||
"""
|
||||
mu, log_S = mu_S.reshape(2, 1, -1)
|
||||
S = np.exp(log_S)
|
||||
|
||||
mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S)
|
||||
mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S)
|
||||
mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S)
|
||||
|
||||
dmu = mu0 + mu1 + mu2 - mu
|
||||
# dS = S0 + S1 + S2 -0.5 + .5/S
|
||||
dlnS = S * (S0 + S1 + S2 - 0.5) + .5
|
||||
|
||||
return -np.hstack((dmu.flatten(), dlnS.flatten()))
|
||||
|
|
|
|||
|
|
@ -2,23 +2,30 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
import itertools
|
||||
import pylab
|
||||
import itertools, logging
|
||||
|
||||
from ..core import Model
|
||||
from ..kern import Kern
|
||||
from ..core.parameterization.variational import NormalPosterior, NormalPrior
|
||||
from ..core.parameterization import Param, Parameterized
|
||||
from ..core.parameterization.observable_array import ObsAr
|
||||
from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC
|
||||
from ..inference.latent_function_inference import InferenceMethodList
|
||||
from ..likelihoods import Gaussian
|
||||
from GPy.util.initialization import initialize_latent
|
||||
from ..util.initialization import initialize_latent
|
||||
from ..core.sparse_gp import SparseGP, GP
|
||||
|
||||
class MRD(Model):
|
||||
class MRD(SparseGP):
|
||||
"""
|
||||
!WARNING: This is bleeding edge code and still in development.
|
||||
Functionality may change fundamentally during development!
|
||||
|
||||
Apply MRD to all given datasets Y in Ylist.
|
||||
|
||||
Y_i in [n x p_i]
|
||||
|
||||
If Ylist is a dictionary, the keys of the dictionary are the names, and the
|
||||
values are the different datasets to compare.
|
||||
|
||||
The samples n in the datasets need
|
||||
to match up, whereas the dimensionality p_d can differ.
|
||||
|
||||
|
|
@ -39,40 +46,77 @@ class MRD(Model):
|
|||
:param num_inducing: number of inducing inputs to use
|
||||
:param Z: initial inducing inputs
|
||||
:param kernel: list of kernels or kernel to copy for each output
|
||||
:type kernel: [GPy.kern.kern] | GPy.kern.kern | None (default)
|
||||
:param :class:`~GPy.inference.latent_function_inference inference_method: the inference method to use
|
||||
:param :class:`~GPy.likelihoods.likelihood.Likelihood` likelihood: the likelihood to use
|
||||
:type kernel: [GPy.kernels.kernels] | GPy.kernels.kernels | None (default)
|
||||
:param :class:`~GPy.inference.latent_function_inference inference_method:
|
||||
InferenceMethodList of inferences, or one inference method for all
|
||||
:param :class:`~GPy.likelihoodss.likelihoods.likelihoods` likelihoods: the likelihoods to use
|
||||
:param str name: the name of this model
|
||||
:param [str] Ynames: the names for the datasets given, must be of equal length as Ylist or None
|
||||
"""
|
||||
def __init__(self, Ylist, input_dim, X=None, X_variance=None,
|
||||
initx = 'PCA', initz = 'permute',
|
||||
num_inducing=10, Z=None, kernel=None,
|
||||
inference_method=None, likelihood=None, name='mrd', Ynames=None):
|
||||
super(MRD, self).__init__(name)
|
||||
inference_method=None, likelihoods=None, name='mrd', Ynames=None):
|
||||
super(GP, self).__init__(name)
|
||||
|
||||
self.logger = logging.getLogger(self.__class__.__name__)
|
||||
self.input_dim = input_dim
|
||||
self.num_inducing = num_inducing
|
||||
|
||||
self.Ylist = Ylist
|
||||
if isinstance(Ylist, dict):
|
||||
Ynames, Ylist = zip(*Ylist.items())
|
||||
|
||||
self.logger.debug("creating observable arrays")
|
||||
self.Ylist = [ObsAr(Y) for Y in Ylist]
|
||||
|
||||
if Ynames is None:
|
||||
self.logger.debug("creating Ynames")
|
||||
Ynames = ['Y{}'.format(i) for i in range(len(Ylist))]
|
||||
self.names = Ynames
|
||||
assert len(self.names) == len(self.Ylist), "one name per dataset, or None if Ylist is a dict"
|
||||
|
||||
if inference_method is None:
|
||||
self.inference_method= InferenceMethodList()
|
||||
warned = False
|
||||
for y in Ylist:
|
||||
inan = np.isnan(y)
|
||||
if np.any(inan):
|
||||
if not warned:
|
||||
self.logger.warn("WARNING: NaN values detected, make sure initx method can cope with NaN values or provide starting latent space X")
|
||||
warned = True
|
||||
self.inference_method.append(VarDTCMissingData(limit=1, inan=inan))
|
||||
else:
|
||||
self.inference_method.append(VarDTC(limit=1))
|
||||
self.logger.debug("created inference method <{}>".format(hex(id(self.inference_method[-1]))))
|
||||
else:
|
||||
if not isinstance(inference_method, InferenceMethodList):
|
||||
self.logger.debug("making inference_method an InferenceMethodList")
|
||||
inference_method = InferenceMethodList(inference_method)
|
||||
self.inference_method = inference_method
|
||||
|
||||
|
||||
self._in_init_ = True
|
||||
X, fracs = self._init_X(initx, Ylist)
|
||||
if X is None:
|
||||
X, fracs = self._init_X(initx, Ylist)
|
||||
else:
|
||||
fracs = [X.var(0)]*len(Ylist)
|
||||
self.Z = Param('inducing inputs', self._init_Z(initz, X))
|
||||
self.num_inducing = self.Z.shape[0] # ensure M==N if M>N
|
||||
|
||||
# sort out the kernels
|
||||
self.logger.info("building kernels")
|
||||
if kernel is None:
|
||||
from ..kern import RBF
|
||||
self.kern = [RBF(input_dim, ARD=1, lengthscale=fracs[i], name='rbf'.format(i)) for i in range(len(Ylist))]
|
||||
kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i]) for i in range(len(Ylist))]
|
||||
elif isinstance(kernel, Kern):
|
||||
self.kern = []
|
||||
kernels = []
|
||||
for i in range(len(Ylist)):
|
||||
k = kernel.copy()
|
||||
self.kern.append(k)
|
||||
kernels.append(k)
|
||||
else:
|
||||
assert len(kernel) == len(Ylist), "need one kernel per output"
|
||||
assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!"
|
||||
self.kern = kernel
|
||||
kernels = kernel
|
||||
|
||||
if X_variance is None:
|
||||
X_variance = np.random.uniform(0.1, 0.2, X.shape)
|
||||
|
|
@ -80,32 +124,28 @@ class MRD(Model):
|
|||
self.variational_prior = NormalPrior()
|
||||
self.X = NormalPosterior(X, X_variance)
|
||||
|
||||
if likelihood is None:
|
||||
self.likelihood = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
|
||||
else: self.likelihood = likelihood
|
||||
|
||||
if inference_method is None:
|
||||
self.inference_method= []
|
||||
for y in Ylist:
|
||||
if np.any(np.isnan(y)):
|
||||
self.inference_method.append(VarDTCMissingData(limit=1))
|
||||
else:
|
||||
self.inference_method.append(VarDTC(limit=1))
|
||||
else:
|
||||
self.inference_method = inference_method
|
||||
self.inference_method.set_limit(len(Ylist))
|
||||
if likelihoods is None:
|
||||
likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
|
||||
else: likelihoods = likelihoods
|
||||
|
||||
self.logger.info("adding X and Z")
|
||||
self.add_parameters(self.X, self.Z)
|
||||
|
||||
if Ynames is None:
|
||||
Ynames = ['Y{}'.format(i) for i in range(len(Ylist))]
|
||||
self.bgplvms = []
|
||||
self.num_data = Ylist[0].shape[0]
|
||||
|
||||
for i, n, k, l in itertools.izip(itertools.count(), Ynames, self.kern, self.likelihood):
|
||||
for i, n, k, l, Y in itertools.izip(itertools.count(), Ynames, kernels, likelihoods, Ylist):
|
||||
assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another"
|
||||
p = Parameterized(name=n)
|
||||
p.add_parameter(k)
|
||||
p.kern = k
|
||||
p.add_parameter(l)
|
||||
setattr(self, 'Y{}'.format(i), p)
|
||||
p.likelihood = l
|
||||
self.add_parameter(p)
|
||||
self.bgplvms.append(p)
|
||||
|
||||
self.posterior = None
|
||||
self.logger.info("init done")
|
||||
self._in_init_ = False
|
||||
|
||||
def parameters_changed(self):
|
||||
|
|
@ -113,14 +153,15 @@ class MRD(Model):
|
|||
self.posteriors = []
|
||||
self.Z.gradient[:] = 0.
|
||||
self.X.gradient[:] = 0.
|
||||
|
||||
for y, k, l, i in itertools.izip(self.Ylist, self.kern, self.likelihood, self.inference_method):
|
||||
for y, b, i in itertools.izip(self.Ylist, self.bgplvms, self.inference_method):
|
||||
self.logger.info('working on im <{}>'.format(hex(id(i))))
|
||||
k, l = b.kern, b.likelihood
|
||||
posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y)
|
||||
|
||||
self.posteriors.append(posterior)
|
||||
self._log_marginal_likelihood += lml
|
||||
|
||||
# likelihood gradients
|
||||
# likelihoods gradients
|
||||
l.update_gradients(grad_dict.pop('dL_dthetaL'))
|
||||
|
||||
#gradients wrt kernel
|
||||
|
|
@ -133,12 +174,19 @@ class MRD(Model):
|
|||
#gradients wrt Z
|
||||
self.Z.gradient += k.gradients_X(dL_dKmm, self.Z)
|
||||
self.Z.gradient += k.gradients_Z_expectations(
|
||||
grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
|
||||
grad_dict['dL_dpsi0'],
|
||||
grad_dict['dL_dpsi1'],
|
||||
grad_dict['dL_dpsi2'],
|
||||
Z=self.Z, variational_posterior=self.X)
|
||||
|
||||
dL_dmean, dL_dS = k.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **grad_dict)
|
||||
self.X.mean.gradient += dL_dmean
|
||||
self.X.variance.gradient += dL_dS
|
||||
|
||||
self.posterior = self.posteriors[0]
|
||||
self.kern = self.bgplvms[0].kern
|
||||
self.likelihood = self.bgplvms[0].likelihood
|
||||
|
||||
# update for the KL divergence
|
||||
self.variational_prior.update_gradients_KL(self.X)
|
||||
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
|
||||
|
|
@ -151,7 +199,7 @@ class MRD(Model):
|
|||
Ylist = self.Ylist
|
||||
if init in "PCA_concat":
|
||||
X, fracs = initialize_latent('PCA', self.input_dim, np.hstack(Ylist))
|
||||
fracs = [fracs]*self.input_dim
|
||||
fracs = [fracs]*len(Ylist)
|
||||
elif init in "PCA_single":
|
||||
X = np.zeros((Ylist[0].shape[0], self.input_dim))
|
||||
fracs = []
|
||||
|
|
@ -162,7 +210,7 @@ class MRD(Model):
|
|||
else: # init == 'random':
|
||||
X = np.random.randn(Ylist[0].shape[0], self.input_dim)
|
||||
fracs = X.var(0)
|
||||
fracs = [fracs]*self.input_dim
|
||||
fracs = [fracs]*len(Ylist)
|
||||
X -= X.mean()
|
||||
X /= X.std()
|
||||
return X, fracs
|
||||
|
|
@ -177,10 +225,12 @@ class MRD(Model):
|
|||
return Z
|
||||
|
||||
def _handle_plotting(self, fignum, axes, plotf, sharex=False, sharey=False):
|
||||
import matplotlib.pyplot as plt
|
||||
if axes is None:
|
||||
fig = pylab.figure(num=fignum)
|
||||
fig = plt.figure(num=fignum)
|
||||
sharex_ax = None
|
||||
sharey_ax = None
|
||||
plots = []
|
||||
for i, g in enumerate(self.bgplvms):
|
||||
try:
|
||||
if sharex:
|
||||
|
|
@ -197,26 +247,36 @@ class MRD(Model):
|
|||
ax = axes[i]
|
||||
else:
|
||||
raise ValueError("Need one axes per latent dimension input_dim")
|
||||
plotf(i, g, ax)
|
||||
plots.append(plotf(i, g, ax))
|
||||
if sharey_ax is not None:
|
||||
pylab.setp(ax.get_yticklabels(), visible=False)
|
||||
pylab.draw()
|
||||
plt.setp(ax.get_yticklabels(), visible=False)
|
||||
plt.draw()
|
||||
if axes is None:
|
||||
fig.tight_layout()
|
||||
return fig
|
||||
else:
|
||||
return pylab.gcf()
|
||||
try:
|
||||
fig.tight_layout()
|
||||
except:
|
||||
pass
|
||||
return plots
|
||||
|
||||
def plot_X(self, fignum=None, ax=None):
|
||||
fig = self._handle_plotting(fignum, ax, lambda i, g, ax: ax.imshow(g.X))
|
||||
return fig
|
||||
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, Yindex=0):
|
||||
"""
|
||||
Prediction for data set Yindex[default=0].
|
||||
This predicts the output mean and variance for the dataset given in Ylist[Yindex]
|
||||
"""
|
||||
self.posterior = self.posteriors[Yindex]
|
||||
self.kern = self.bgplvms[0].kern
|
||||
self.likelihood = self.bgplvms[0].likelihood
|
||||
return super(MRD, self).predict(Xnew, full_cov, Y_metadata, kern)
|
||||
|
||||
def plot_predict(self, fignum=None, ax=None, sharex=False, sharey=False, **kwargs):
|
||||
fig = self._handle_plotting(fignum,
|
||||
ax,
|
||||
lambda i, g, ax: ax.imshow(g. predict(g.X)[0], **kwargs),
|
||||
sharex=sharex, sharey=sharey)
|
||||
return fig
|
||||
#===============================================================================
|
||||
# TODO: Predict! Maybe even change to several bgplvms, which share an X?
|
||||
#===============================================================================
|
||||
# def plot_predict(self, fignum=None, ax=None, sharex=False, sharey=False, **kwargs):
|
||||
# fig = self._handle_plotting(fignum,
|
||||
# ax,
|
||||
# lambda i, g, ax: ax.imshow(g.predict(g.X)[0], **kwargs),
|
||||
# sharex=sharex, sharey=sharey)
|
||||
# return fig
|
||||
|
||||
def plot_scales(self, fignum=None, ax=None, titles=None, sharex=False, sharey=True, *args, **kwargs):
|
||||
"""
|
||||
|
|
@ -228,28 +288,55 @@ class MRD(Model):
|
|||
"""
|
||||
if titles is None:
|
||||
titles = [r'${}$'.format(name) for name in self.names]
|
||||
ymax = reduce(max, [np.ceil(max(g.input_sensitivity())) for g in self.bgplvms])
|
||||
ymax = reduce(max, [np.ceil(max(g.kern.input_sensitivity())) for g in self.bgplvms])
|
||||
def plotf(i, g, ax):
|
||||
ax.set_ylim([0,ymax])
|
||||
g.kern.plot_ARD(ax=ax, title=titles[i], *args, **kwargs)
|
||||
return g.kern.plot_ARD(ax=ax, title=titles[i], *args, **kwargs)
|
||||
fig = self._handle_plotting(fignum, ax, plotf, sharex=sharex, sharey=sharey)
|
||||
return fig
|
||||
|
||||
def plot_latent(self, fignum=None, ax=None, *args, **kwargs):
|
||||
fig = self.gref.plot_latent(fignum=fignum, ax=ax, *args, **kwargs) # self._handle_plotting(fignum, ax, lambda i, g, ax: g.plot_latent(ax=ax, *args, **kwargs))
|
||||
return fig
|
||||
def plot_latent(self, labels=None, which_indices=None,
|
||||
resolution=50, ax=None, marker='o', s=40,
|
||||
fignum=None, plot_inducing=True, legend=True,
|
||||
plot_limits=None,
|
||||
aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}):
|
||||
"""
|
||||
see plotting.matplot_dep.dim_reduction_plots.plot_latent
|
||||
if predict_kwargs is None, will plot latent spaces for 0th dataset (and kernel), otherwise give
|
||||
predict_kwargs=dict(Yindex='index') for plotting only the latent space of dataset with 'index'.
|
||||
"""
|
||||
import sys
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from matplotlib import pyplot as plt
|
||||
from ..plotting.matplot_dep import dim_reduction_plots
|
||||
if "Yindex" not in predict_kwargs:
|
||||
predict_kwargs['Yindex'] = 0
|
||||
if ax is None:
|
||||
fig = plt.figure(num=fignum)
|
||||
ax = fig.add_subplot(111)
|
||||
else:
|
||||
fig = ax.figure
|
||||
plot = dim_reduction_plots.plot_latent(self, labels, which_indices,
|
||||
resolution, ax, marker, s,
|
||||
fignum, plot_inducing, legend,
|
||||
plot_limits, aspect, updates, predict_kwargs, imshow_kwargs)
|
||||
ax.set_title(self.bgplvms[predict_kwargs['Yindex']].name)
|
||||
try:
|
||||
fig.tight_layout()
|
||||
except:
|
||||
pass
|
||||
|
||||
def _debug_plot(self):
|
||||
self.plot_X_1d()
|
||||
fig = pylab.figure("MRD DEBUG PLOT", figsize=(4 * len(self.bgplvms), 9))
|
||||
fig.clf()
|
||||
axes = [fig.add_subplot(3, len(self.bgplvms), i + 1) for i in range(len(self.bgplvms))]
|
||||
self.plot_X(ax=axes)
|
||||
axes = [fig.add_subplot(3, len(self.bgplvms), i + len(self.bgplvms) + 1) for i in range(len(self.bgplvms))]
|
||||
self.plot_latent(ax=axes)
|
||||
axes = [fig.add_subplot(3, len(self.bgplvms), i + 2 * len(self.bgplvms) + 1) for i in range(len(self.bgplvms))]
|
||||
self.plot_scales(ax=axes)
|
||||
pylab.draw()
|
||||
fig.tight_layout()
|
||||
return plot
|
||||
|
||||
def __getstate__(self):
|
||||
state = super(MRD, self).__getstate__()
|
||||
del state['kern']
|
||||
del state['likelihood']
|
||||
return state
|
||||
|
||||
def __setstate__(self, state):
|
||||
# TODO:
|
||||
super(MRD, self).__setstate__(state)
|
||||
self.kern = self.bgplvms[0].kern
|
||||
self.likelihood = self.bgplvms[0].likelihood
|
||||
self.parameters_changed()
|
||||
|
|
@ -11,7 +11,7 @@ from ..likelihoods import Gaussian
|
|||
from ..inference.optimization import SCG
|
||||
from ..util import linalg
|
||||
from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior
|
||||
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients
|
||||
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
|
||||
from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
|
||||
|
||||
|
||||
|
|
@ -41,7 +41,7 @@ class SSGPLVM(SparseGP):
|
|||
if X_variance is None: # The variance of the variational approximation (S)
|
||||
X_variance = np.random.uniform(0,.1,X.shape)
|
||||
|
||||
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
|
||||
gamma = np.empty_like(X, order='F') # The posterior probabilities of the binary variable in the variational approximation
|
||||
gamma[:] = 0.5 + 0.01 * np.random.randn(X.shape[0], input_dim)
|
||||
|
||||
if group_spike:
|
||||
|
|
@ -60,12 +60,15 @@ class SSGPLVM(SparseGP):
|
|||
pi = np.empty((input_dim))
|
||||
pi[:] = 0.5
|
||||
self.variational_prior = SpikeAndSlabPrior(pi=pi) # the prior probability of the latent binary variable b
|
||||
|
||||
X = np.asfortranarray(X)
|
||||
X_variance = np.asfortranarray(X_variance)
|
||||
gamma = np.asfortranarray(gamma)
|
||||
X = SpikeAndSlabPosterior(X, X_variance, gamma)
|
||||
|
||||
if group_spike:
|
||||
kernel.group_spike_prob = True
|
||||
self.variational_prior.group_spike_prob = True
|
||||
|
||||
|
||||
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
|
||||
self.add_parameter(self.X, index=0)
|
||||
|
|
@ -76,7 +79,7 @@ class SSGPLVM(SparseGP):
|
|||
X.mean.gradient, X.variance.gradient, X.binary_prob.gradient = X_grad
|
||||
|
||||
def parameters_changed(self):
|
||||
if isinstance(self.inference_method, VarDTC_GPU):
|
||||
if isinstance(self.inference_method, VarDTC_GPU) or isinstance(self.inference_method, VarDTC_minibatch):
|
||||
update_gradients(self)
|
||||
return
|
||||
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ def plot_latent(model, labels=None, which_indices=None,
|
|||
resolution=50, ax=None, marker='o', s=40,
|
||||
fignum=None, plot_inducing=False, legend=True,
|
||||
plot_limits=None,
|
||||
aspect='auto', updates=False, **kwargs):
|
||||
aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}):
|
||||
"""
|
||||
:param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc)
|
||||
:param resolution: the resolution of the grid on which to evaluate the predictive variance
|
||||
|
|
@ -60,7 +60,7 @@ def plot_latent(model, labels=None, which_indices=None,
|
|||
def plot_function(x):
|
||||
Xtest_full = np.zeros((x.shape[0], model.X.shape[1]))
|
||||
Xtest_full[:, [input_1, input_2]] = x
|
||||
_, var = model.predict(Xtest_full)
|
||||
_, var = model.predict(Xtest_full, **predict_kwargs)
|
||||
var = var[:, :1]
|
||||
return np.log(var)
|
||||
|
||||
|
|
@ -81,7 +81,7 @@ def plot_latent(model, labels=None, which_indices=None,
|
|||
view = ImshowController(ax, plot_function,
|
||||
(xmin, ymin, xmax, ymax),
|
||||
resolution, aspect=aspect, interpolation='bilinear',
|
||||
cmap=pb.cm.binary, **kwargs)
|
||||
cmap=pb.cm.binary, **imshow_kwargs)
|
||||
|
||||
# make sure labels are in order of input:
|
||||
ulabels = []
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
|
|||
|
||||
for i in range(ard_params.shape[0]):
|
||||
c = Tango.nextMedium()
|
||||
bars.append(plot_bars(fig, ax, x, ard_params[i,:], c, kernel._parameters_[i].name, bottom=bottom))
|
||||
bars.append(plot_bars(fig, ax, x, ard_params[i,:], c, kernel.parameters[i].name, bottom=bottom))
|
||||
bottom += ard_params[i,:]
|
||||
|
||||
ax.set_xlim(-.5, kernel.input_dim - .5)
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
|
||||
for d in which_data_ycols:
|
||||
plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol)
|
||||
plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5)
|
||||
if not plot_raw: plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5)
|
||||
|
||||
#optionally plot some samples
|
||||
if samples: #NOTE not tested with fixed_inputs
|
||||
|
|
@ -151,7 +151,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
for d in which_data_ycols:
|
||||
m_d = m[:,d].reshape(resolution, resolution).T
|
||||
plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
||||
plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
|
||||
if not plot_raw: plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
|
||||
|
||||
#set the limits of the plot to some sensible values
|
||||
ax.set_xlim(xmin[0], xmax[0])
|
||||
|
|
|
|||
|
|
@ -88,7 +88,6 @@ class vector_show(matplotlib_show):
|
|||
|
||||
|
||||
class lvm(matplotlib_show):
|
||||
|
||||
def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0,1]):
|
||||
"""Visualize a latent variable model
|
||||
|
||||
|
|
@ -99,10 +98,11 @@ class lvm(matplotlib_show):
|
|||
"""
|
||||
if vals is None:
|
||||
if isinstance(model.X, VariationalPosterior):
|
||||
vals = param_to_array(model.X.mean)
|
||||
vals = model.X.mean.values
|
||||
else:
|
||||
vals = param_to_array(model.X)
|
||||
|
||||
vals = model.X.values
|
||||
if len(vals.shape)==1:
|
||||
vals = vals[None,:]
|
||||
matplotlib_show.__init__(self, vals, axes=latent_axes)
|
||||
|
||||
if isinstance(latent_axes,mpl.axes.Axes):
|
||||
|
|
@ -133,7 +133,7 @@ class lvm(matplotlib_show):
|
|||
|
||||
def modify(self, vals):
|
||||
"""When latent values are modified update the latent representation and ulso update the output visualization."""
|
||||
self.vals = vals.copy()
|
||||
self.vals = vals.view(np.ndarray).copy()
|
||||
y = self.model.predict(self.vals)[0]
|
||||
self.data_visualize.modify(y)
|
||||
self.latent_handle.set_data(self.vals[0,self.latent_index[0]], self.vals[0,self.latent_index[1]])
|
||||
|
|
@ -146,7 +146,6 @@ class lvm(matplotlib_show):
|
|||
pass
|
||||
|
||||
def on_click(self, event):
|
||||
print 'click!'
|
||||
if event.inaxes!=self.latent_axes: return
|
||||
self.move_on = not self.move_on
|
||||
self.called = True
|
||||
|
|
@ -219,11 +218,11 @@ class lvm_dimselect(lvm):
|
|||
self.labels = labels
|
||||
lvm.__init__(self,vals,model,data_visualize,latent_axes,sense_axes,latent_index)
|
||||
self.show_sensitivities()
|
||||
print "use left and right mouse butons to select dimensions"
|
||||
print self.latent_values
|
||||
print "use left and right mouse buttons to select dimensions"
|
||||
|
||||
|
||||
def on_click(self, event):
|
||||
|
||||
if event.inaxes==self.sense_axes:
|
||||
new_index = max(0,min(int(np.round(event.xdata-0.5)),self.model.input_dim-1))
|
||||
if event.button == 1:
|
||||
|
|
@ -249,6 +248,7 @@ class lvm_dimselect(lvm):
|
|||
|
||||
|
||||
def on_leave(self,event):
|
||||
print type(self.latent_values)
|
||||
latent_values = self.latent_values.copy()
|
||||
y = self.model.predict(latent_values[None,:])[0]
|
||||
self.data_visualize.modify(y)
|
||||
|
|
@ -393,14 +393,13 @@ class mocap_data_show_vpython(vpython_show):
|
|||
def process_values(self):
|
||||
raise NotImplementedError, "this needs to be implemented to use the data_show class"
|
||||
|
||||
|
||||
class mocap_data_show(matplotlib_show):
|
||||
"""Base class for visualizing motion capture data."""
|
||||
|
||||
def __init__(self, vals, axes=None, connect=None):
|
||||
if axes==None:
|
||||
fig = plt.figure()
|
||||
axes = fig.add_subplot(111, projection='3d')
|
||||
axes = fig.add_subplot(111, projection='3d',aspect='equal')
|
||||
matplotlib_show.__init__(self, vals, axes)
|
||||
|
||||
self.connect = connect
|
||||
|
|
@ -445,11 +444,12 @@ class mocap_data_show(matplotlib_show):
|
|||
def process_values(self):
|
||||
raise NotImplementedError, "this needs to be implemented to use the data_show class"
|
||||
|
||||
def initialize_axes(self):
|
||||
def initialize_axes(self, boundary=0.05):
|
||||
"""Set up the axes with the right limits and scaling."""
|
||||
self.x_lim = np.array([self.vals[:, 0].min(), self.vals[:, 0].max()])
|
||||
self.y_lim = np.array([self.vals[:, 1].min(), self.vals[:, 1].max()])
|
||||
self.z_lim = np.array([self.vals[:, 2].min(), self.vals[:, 2].max()])
|
||||
bs = [(self.vals[:, i].max()-self.vals[:, i].min())*boundary for i in xrange(3)]
|
||||
self.x_lim = np.array([self.vals[:, 0].min()-bs[0], self.vals[:, 0].max()+bs[0]])
|
||||
self.y_lim = np.array([self.vals[:, 1].min()-bs[1], self.vals[:, 1].max()+bs[1]])
|
||||
self.z_lim = np.array([self.vals[:, 2].min()-bs[2], self.vals[:, 2].max()+bs[2]])
|
||||
|
||||
def initialize_axes_modify(self):
|
||||
self.points_handle.remove()
|
||||
|
|
@ -472,6 +472,8 @@ class mocap_data_show(matplotlib_show):
|
|||
class stick_show(mocap_data_show):
|
||||
"""Show a three dimensional point cloud as a figure. Connect elements of the figure together using the matrix connect."""
|
||||
def __init__(self, vals, connect=None, axes=None):
|
||||
if len(vals.shape)==1:
|
||||
vals = vals[None,:]
|
||||
mocap_data_show.__init__(self, vals, axes=axes, connect=connect)
|
||||
|
||||
def process_values(self):
|
||||
|
|
|
|||
|
|
@ -94,22 +94,18 @@ class MiscTests(unittest.TestCase):
|
|||
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
||||
|
||||
m.kern.lengthscale.randomize()
|
||||
m._trigger_params_changed()
|
||||
m2.kern.lengthscale = m.kern.lengthscale
|
||||
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
||||
|
||||
m.kern.lengthscale.randomize()
|
||||
m._trigger_params_changed()
|
||||
m2['.*lengthscale'] = m.kern.lengthscale
|
||||
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
||||
|
||||
m.kern.lengthscale.randomize()
|
||||
m._trigger_params_changed()
|
||||
m2['.*lengthscale'] = m.kern['.*lengthscale']
|
||||
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
||||
|
||||
m.kern.lengthscale.randomize()
|
||||
m._trigger_params_changed()
|
||||
m2.kern.lengthscale = m.kern['.*lengthscale']
|
||||
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
||||
|
||||
|
|
@ -130,6 +126,23 @@ class MiscTests(unittest.TestCase):
|
|||
m2.kern[:] = m.kern[''].values()
|
||||
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
||||
|
||||
def test_big_model(self):
|
||||
m = GPy.examples.dimensionality_reduction.mrd_simulation(optimize=0, plot=0, plot_sim=0)
|
||||
m.X.fix()
|
||||
print m
|
||||
m.unfix()
|
||||
m.checkgrad()
|
||||
print m
|
||||
m.fix()
|
||||
print m
|
||||
m.inducing_inputs.unfix()
|
||||
print m
|
||||
m.checkgrad()
|
||||
m.unfix()
|
||||
m.checkgrad()
|
||||
m.checkgrad()
|
||||
print m
|
||||
|
||||
def test_model_set_params(self):
|
||||
m = GPy.models.GPRegression(self.X, self.Y)
|
||||
lengthscale = np.random.uniform()
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ class ParameterizedTest(unittest.TestCase):
|
|||
self.assertListEqual(self.test1.kern.param_array.tolist(), val[:2].tolist())
|
||||
|
||||
def test_add_parameter_already_in_hirarchy(self):
|
||||
self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0])
|
||||
self.assertRaises(HierarchyError, self.test1.add_parameter, self.white.parameters[0])
|
||||
|
||||
def test_default_constraints(self):
|
||||
self.assertIs(self.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops)
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@ Created on 13 Mar 2014
|
|||
@author: maxz
|
||||
'''
|
||||
import unittest, itertools
|
||||
import cPickle as pickle
|
||||
#import cPickle as pickle
|
||||
import pickle
|
||||
import numpy as np
|
||||
from GPy.core.parameterization.index_operations import ParameterIndexOperations,\
|
||||
ParameterIndexOperationsView
|
||||
|
|
@ -15,8 +16,7 @@ from GPy.core.parameterization.priors import Gaussian
|
|||
from GPy.kern._src.rbf import RBF
|
||||
from GPy.kern._src.linear import Linear
|
||||
from GPy.kern._src.static import Bias, White
|
||||
from GPy.examples.dimensionality_reduction import mrd_simulation,\
|
||||
bgplvm_simulation
|
||||
from GPy.examples.dimensionality_reduction import mrd_simulation
|
||||
from GPy.examples.regression import toy_rbf_1d_50
|
||||
from GPy.core.parameterization.variational import NormalPosterior
|
||||
from GPy.models.gp_regression import GPRegression
|
||||
|
|
@ -89,28 +89,29 @@ class Test(ListDictTestCase):
|
|||
self.assertIs(pcopy.constraints, pcopy.rbf.lengthscale.constraints._param_index_ops)
|
||||
self.assertIs(pcopy.constraints, pcopy.linear.constraints._param_index_ops)
|
||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist())
|
||||
pcopy.gradient = 10 # gradient does not get copied anymore
|
||||
self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
|
||||
self.assertSequenceEqual(str(par), str(pcopy))
|
||||
self.assertIsNot(par.param_array, pcopy.param_array)
|
||||
self.assertIsNot(par.full_gradient, pcopy.full_gradient)
|
||||
self.assertIsNot(par.gradient_full, pcopy.gradient_full)
|
||||
with tempfile.TemporaryFile('w+b') as f:
|
||||
par.pickle(f)
|
||||
f.seek(0)
|
||||
pcopy = pickle.load(f)
|
||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||
pcopy.gradient = 10
|
||||
np.testing.assert_allclose(par.linear.full_gradient, pcopy.linear.full_gradient)
|
||||
np.testing.assert_allclose(pcopy.linear.full_gradient, 10)
|
||||
np.testing.assert_allclose(par.linear.gradient_full, pcopy.linear.gradient_full)
|
||||
np.testing.assert_allclose(pcopy.linear.gradient_full, 10)
|
||||
self.assertSequenceEqual(str(par), str(pcopy))
|
||||
|
||||
def test_model(self):
|
||||
par = toy_rbf_1d_50(optimize=0, plot=0)
|
||||
pcopy = par.copy()
|
||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist())
|
||||
self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
|
||||
self.assertSequenceEqual(str(par), str(pcopy))
|
||||
self.assertIsNot(par.param_array, pcopy.param_array)
|
||||
self.assertIsNot(par.full_gradient, pcopy.full_gradient)
|
||||
self.assertIsNot(par.gradient_full, pcopy.gradient_full)
|
||||
self.assertTrue(pcopy.checkgrad())
|
||||
self.assert_(np.any(pcopy.gradient!=0.0))
|
||||
with tempfile.TemporaryFile('w+b') as f:
|
||||
|
|
@ -118,18 +119,18 @@ class Test(ListDictTestCase):
|
|||
f.seek(0)
|
||||
pcopy = pickle.load(f)
|
||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||
np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient)
|
||||
np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
|
||||
self.assertSequenceEqual(str(par), str(pcopy))
|
||||
self.assert_(pcopy.checkgrad())
|
||||
|
||||
def test_modelrecreation(self):
|
||||
par = toy_rbf_1d_50(optimize=0, plot=0)
|
||||
pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy())
|
||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist())
|
||||
np.testing.assert_allclose(par.param_array, pcopy.param_array)
|
||||
np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
|
||||
self.assertSequenceEqual(str(par), str(pcopy))
|
||||
self.assertIsNot(par.param_array, pcopy.param_array)
|
||||
self.assertIsNot(par.full_gradient, pcopy.full_gradient)
|
||||
self.assertIsNot(par.gradient_full, pcopy.gradient_full)
|
||||
self.assertTrue(pcopy.checkgrad())
|
||||
self.assert_(np.any(pcopy.gradient!=0.0))
|
||||
pcopy.optimize('bfgs')
|
||||
|
|
@ -139,8 +140,8 @@ class Test(ListDictTestCase):
|
|||
par.pickle(f)
|
||||
f.seek(0)
|
||||
pcopy = pickle.load(f)
|
||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||
np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient)
|
||||
np.testing.assert_allclose(par.param_array, pcopy.param_array)
|
||||
np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
|
||||
self.assertSequenceEqual(str(par), str(pcopy))
|
||||
self.assert_(pcopy.checkgrad())
|
||||
|
||||
|
|
@ -150,19 +151,20 @@ class Test(ListDictTestCase):
|
|||
par = NormalPosterior(X,Xv)
|
||||
par.gradient = 10
|
||||
pcopy = par.copy()
|
||||
pcopy.gradient = 10
|
||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist())
|
||||
self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
|
||||
self.assertSequenceEqual(str(par), str(pcopy))
|
||||
self.assertIsNot(par.param_array, pcopy.param_array)
|
||||
self.assertIsNot(par.full_gradient, pcopy.full_gradient)
|
||||
self.assertIsNot(par.gradient_full, pcopy.gradient_full)
|
||||
with tempfile.TemporaryFile('w+b') as f:
|
||||
par.pickle(f)
|
||||
f.seek(0)
|
||||
pcopy = pickle.load(f)
|
||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||
pcopy.gradient = 10
|
||||
np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient)
|
||||
np.testing.assert_allclose(pcopy.mean.full_gradient, 10)
|
||||
np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
|
||||
np.testing.assert_allclose(pcopy.mean.gradient_full, 10)
|
||||
self.assertSequenceEqual(str(par), str(pcopy))
|
||||
|
||||
def test_model_concat(self):
|
||||
|
|
@ -170,10 +172,11 @@ class Test(ListDictTestCase):
|
|||
par.randomize()
|
||||
pcopy = par.copy()
|
||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist())
|
||||
self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
|
||||
self.assertSequenceEqual(str(par), str(pcopy))
|
||||
self.assertIsNot(par.param_array, pcopy.param_array)
|
||||
self.assertIsNot(par.full_gradient, pcopy.full_gradient)
|
||||
self.assertIsNot(par.gradient_full, pcopy.gradient_full)
|
||||
self.assertTrue(par.checkgrad())
|
||||
self.assertTrue(pcopy.checkgrad())
|
||||
self.assert_(np.any(pcopy.gradient!=0.0))
|
||||
with tempfile.TemporaryFile('w+b') as f:
|
||||
|
|
@ -181,7 +184,7 @@ class Test(ListDictTestCase):
|
|||
f.seek(0)
|
||||
pcopy = pickle.load(f)
|
||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||
np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient)
|
||||
np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
|
||||
self.assertSequenceEqual(str(par), str(pcopy))
|
||||
self.assert_(pcopy.checkgrad())
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,7 @@
|
|||
from ..core.parameterization.parameter_core import Observable
|
||||
import itertools, collections, weakref
|
||||
import collections, weakref, logging
|
||||
|
||||
class Cacher(object):
|
||||
|
||||
|
||||
def __init__(self, operation, limit=5, ignore_args=(), force_kwargs=()):
|
||||
"""
|
||||
Parameters:
|
||||
|
|
@ -12,13 +10,15 @@ class Cacher(object):
|
|||
:param int limit: depth of cacher
|
||||
:param [int] ignore_args: list of indices, pointing at arguments to ignore in *args of operation(*args). This includes self!
|
||||
:param [str] force_kwargs: list of kwarg names (strings). If a kwarg with that name is given, the cacher will force recompute and wont cache anything.
|
||||
:param int verbose: verbosity level. 0: no print outs, 1: casual print outs, 2: debug level print outs
|
||||
"""
|
||||
self.limit = int(limit)
|
||||
self.ignore_args = ignore_args
|
||||
self.force_kwargs = force_kwargs
|
||||
self.operation=operation
|
||||
self.operation = operation
|
||||
self.order = collections.deque()
|
||||
self.cached_inputs = {} # point from cache_ids to a list of [ind_ids], which where used in cache cache_id
|
||||
self.cached_inputs = {} # point from cache_ids to a list of [ind_ids], which where used in cache cache_id
|
||||
self.logger = logging.getLogger("cache")
|
||||
|
||||
#=======================================================================
|
||||
# point from each ind_id to [ref(obj), cache_ids]
|
||||
|
|
@ -27,53 +27,75 @@ class Cacher(object):
|
|||
self.cached_input_ids = {}
|
||||
#=======================================================================
|
||||
|
||||
self.cached_outputs = {} # point from cache_ids to outputs
|
||||
self.inputs_changed = {} # point from cache_ids to bools
|
||||
self.cached_outputs = {} # point from cache_ids to outputs
|
||||
self.inputs_changed = {} # point from cache_ids to bools
|
||||
|
||||
def combine_args_kw(self, args, kw):
|
||||
def id(self, obj):
|
||||
"""returns the self.id of an object, to be used in caching individual self.ids"""
|
||||
return hex(id(obj))
|
||||
|
||||
def combine_inputs(self, args, kw):
|
||||
"Combines the args and kw in a unique way, such that ordering of kwargs does not lead to recompute"
|
||||
self.logger.debug("combining args and kw")
|
||||
return args + tuple(c[1] for c in sorted(kw.items(), key=lambda x: x[0]))
|
||||
|
||||
def preprocess(self, combined_args_kw, ignore_args):
|
||||
"get the cacheid (conc. string of argument ids in order) ignoring ignore_args"
|
||||
return "".join(str(id(a)) for i,a in enumerate(combined_args_kw) if i not in ignore_args)
|
||||
def prepare_cache_id(self, combined_args_kw, ignore_args):
|
||||
"get the cacheid (conc. string of argument self.ids in order) ignoring ignore_args"
|
||||
cache_id = "".join(self.id(a) for i, a in enumerate(combined_args_kw) if i not in ignore_args)
|
||||
self.logger.debug("cache_id={} was created".format(cache_id))
|
||||
return cache_id
|
||||
|
||||
def ensure_cache_length(self, cache_id):
|
||||
"Ensures the cache is within its limits and has one place free"
|
||||
self.logger.debug("cache length gets ensured")
|
||||
if len(self.order) == self.limit:
|
||||
self.logger.debug("cache limit of l={} was reached".format(self.limit))
|
||||
# we have reached the limit, so lets release one element
|
||||
cache_id = self.order.popleft()
|
||||
self.logger.debug("cach_id '{}' gets removed".format(cache_id))
|
||||
combined_args_kw = self.cached_inputs[cache_id]
|
||||
for ind in combined_args_kw:
|
||||
ind_id = id(ind)
|
||||
ref, cache_ids = self.cached_input_ids[ind_id]
|
||||
if len(cache_ids) == 1 and ref() is not None:
|
||||
ref().remove_observer(self, self.on_cache_changed)
|
||||
del self.cached_input_ids[ind_id]
|
||||
else:
|
||||
cache_ids.remove(cache_id)
|
||||
self.cached_input_ids[ind_id] = [ref, cache_ids]
|
||||
if ind is not None:
|
||||
ind_id = self.id(ind)
|
||||
tmp = self.cached_input_ids.get(ind_id, None)
|
||||
if tmp is not None:
|
||||
ref, cache_ids = tmp
|
||||
if len(cache_ids) == 1 and ref() is not None:
|
||||
ref().remove_observer(self, self.on_cache_changed)
|
||||
del self.cached_input_ids[ind_id]
|
||||
else:
|
||||
cache_ids.remove(cache_id)
|
||||
self.cached_input_ids[ind_id] = [ref, cache_ids]
|
||||
self.logger.debug("removing caches")
|
||||
del self.cached_outputs[cache_id]
|
||||
del self.inputs_changed[cache_id]
|
||||
del self.cached_inputs[cache_id]
|
||||
|
||||
def add_to_cache(self, cache_id, combined_args_kw, output):
|
||||
def add_to_cache(self, cache_id, inputs, output):
|
||||
"""This adds cache_id to the cache, with inputs and output"""
|
||||
self.inputs_changed[cache_id] = False
|
||||
self.cached_outputs[cache_id] = output
|
||||
self.order.append(cache_id)
|
||||
self.cached_inputs[cache_id] = combined_args_kw
|
||||
for a in combined_args_kw:
|
||||
ind_id = id(a)
|
||||
v = self.cached_input_ids.get(ind_id, [weakref.ref(a), []])
|
||||
v[1].append(cache_id)
|
||||
if len(v[1]) == 1:
|
||||
a.add_observer(self, self.on_cache_changed)
|
||||
self.cached_input_ids[ind_id] = v
|
||||
self.cached_inputs[cache_id] = inputs
|
||||
for a in inputs:
|
||||
if a is not None:
|
||||
ind_id = self.id(a)
|
||||
v = self.cached_input_ids.get(ind_id, [weakref.ref(a), []])
|
||||
self.logger.debug("cache_id '{}' gets stored".format(cache_id))
|
||||
v[1].append(cache_id)
|
||||
if len(v[1]) == 1:
|
||||
self.logger.debug("adding observer to object {}".format(repr(a)))
|
||||
a.add_observer(self, self.on_cache_changed)
|
||||
self.cached_input_ids[ind_id] = v
|
||||
|
||||
def __call__(self, *args, **kw):
|
||||
"""
|
||||
A wrapper function for self.operation,
|
||||
"""
|
||||
#=======================================================================
|
||||
# !WARNING CACHE OFFSWITCH!
|
||||
# return self.operation(*args, **kw)
|
||||
#=======================================================================
|
||||
|
||||
# 1: Check whether we have forced recompute arguments:
|
||||
if len(self.force_kwargs) != 0:
|
||||
|
|
@ -81,27 +103,33 @@ class Cacher(object):
|
|||
if k in kw and kw[k] is not None:
|
||||
return self.operation(*args, **kw)
|
||||
|
||||
# 2: preprocess and get the unique id string for this call
|
||||
combined_args_kw = self.combine_args_kw(args, kw)
|
||||
cache_id = self.preprocess(combined_args_kw, self.ignore_args)
|
||||
|
||||
# 2: prepare_cache_id and get the unique self.id string for this call
|
||||
inputs = self.combine_inputs(args, kw)
|
||||
cache_id = self.prepare_cache_id(inputs, self.ignore_args)
|
||||
# 2: if anything is not cachable, we will just return the operation, without caching
|
||||
if reduce(lambda a,b: a or (not isinstance(b, Observable)), combined_args_kw, False):
|
||||
if reduce(lambda a, b: a or (not (isinstance(b, Observable) or b is None)), inputs, False):
|
||||
self.logger.info("some inputs are not observable: returning without caching")
|
||||
self.logger.debug(str(map(lambda x: isinstance(x, Observable) or x is None, inputs)))
|
||||
self.logger.debug(str(map(repr, inputs)))
|
||||
return self.operation(*args, **kw)
|
||||
# 3&4: check whether this cache_id has been cached, then has it changed?
|
||||
try:
|
||||
if(self.inputs_changed[cache_id]):
|
||||
# 4: This happens, when one element has changed for this cache id
|
||||
self.logger.debug("{} already seen, but inputs changed. refreshing cacher".format(cache_id))
|
||||
# 4: This happens, when elements have changed for this cache self.id
|
||||
self.inputs_changed[cache_id] = False
|
||||
self.cached_outputs[cache_id] = self.operation(*args, **kw)
|
||||
except KeyError:
|
||||
self.logger.info("{} never seen, creating cache entry".format(cache_id))
|
||||
# 3: This is when we never saw this chache_id:
|
||||
self.ensure_cache_length(cache_id)
|
||||
self.add_to_cache(cache_id, combined_args_kw, self.operation(*args, **kw))
|
||||
self.add_to_cache(cache_id, inputs, self.operation(*args, **kw))
|
||||
except:
|
||||
self.logger.error("an error occurred while trying to run caching for {}, resetting".format(cache_id))
|
||||
self.reset()
|
||||
raise
|
||||
# 5: We have seen this cache_id and it is cached:
|
||||
self.logger.info("returning cache {}".format(cache_id))
|
||||
return self.cached_outputs[cache_id]
|
||||
|
||||
def on_cache_changed(self, direct, which=None):
|
||||
|
|
@ -110,10 +138,13 @@ class Cacher(object):
|
|||
|
||||
this function gets 'hooked up' to the inputs when we cache them, and upon their elements being changed we update here.
|
||||
"""
|
||||
for ind_id in [id(direct), id(which)]:
|
||||
_, cache_ids = self.cached_input_ids.get(ind_id, [None, []])
|
||||
for cache_id in cache_ids:
|
||||
self.inputs_changed[cache_id] = True
|
||||
for what in [direct, which]:
|
||||
if what is not None:
|
||||
ind_id = self.id(what)
|
||||
_, cache_ids = self.cached_input_ids.get(ind_id, [None, []])
|
||||
for cache_id in cache_ids:
|
||||
self.logger.info("callback from {} changed inputs from {}".format(ind_id, self.inputs_changed[cache_id]))
|
||||
self.inputs_changed[cache_id] = True
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
|
|
@ -150,7 +181,7 @@ class Cacher_wrap(object):
|
|||
return partial(self, obj)
|
||||
def __call__(self, *args, **kwargs):
|
||||
obj = args[0]
|
||||
#import ipdb;ipdb.set_trace()
|
||||
# import ipdb;ipdb.set_trace()
|
||||
try:
|
||||
caches = obj.__cachers
|
||||
except AttributeError:
|
||||
|
|
|
|||
|
|
@ -671,7 +671,7 @@ def osu_run1(data_set='osu_run1', sample_every=4):
|
|||
return data_details_return({'Y': Y, 'connect' : connect}, data_set)
|
||||
|
||||
def swiss_roll_generated(num_samples=1000, sigma=0.0):
|
||||
with open(os.path.join(data_path, 'swiss_roll.pickle')) as f:
|
||||
with open(os.path.join(os.path.dirname(__file__), 'datasets', 'swiss_roll.pickle')) as f:
|
||||
data = pickle.load(f)
|
||||
Na = data['Y'].shape[0]
|
||||
perm = np.random.permutation(np.r_[:Na])[:num_samples]
|
||||
|
|
@ -723,14 +723,20 @@ def hapmap3(data_set='hapmap3'):
|
|||
import bz2
|
||||
except ImportError as i:
|
||||
raise i, "Need pandas for hapmap dataset, make sure to install pandas (http://pandas.pydata.org/) before loading the hapmap dataset"
|
||||
if not data_available(data_set):
|
||||
download_data(data_set)
|
||||
|
||||
dirpath = os.path.join(data_path,'hapmap3')
|
||||
hapmap_file_name = 'hapmap3_r2_b36_fwd.consensus.qc.poly'
|
||||
unpacked_files = [os.path.join(dirpath, hapmap_file_name+ending) for ending in ['.ped', '.map']]
|
||||
unpacked_files_exist = reduce(lambda a, b:a and b, map(os.path.exists, unpacked_files))
|
||||
|
||||
if not unpacked_files_exist and not data_available(data_set):
|
||||
download_data(data_set)
|
||||
|
||||
preprocessed_data_paths = [os.path.join(dirpath,hapmap_file_name + file_name) for file_name in \
|
||||
['.snps.pickle',
|
||||
'.info.pickle',
|
||||
'.nan.pickle']]
|
||||
|
||||
if not reduce(lambda a,b: a and b, map(os.path.exists, preprocessed_data_paths)):
|
||||
if not overide_manual_authorize and not prompt_user("Preprocessing requires ~25GB "
|
||||
"of memory and can take a (very) long time, continue? [Y/n]"):
|
||||
|
|
@ -744,8 +750,7 @@ def hapmap3(data_set='hapmap3'):
|
|||
perc="="*int(20.*progress/100.))
|
||||
stdout.write(status); stdout.flush()
|
||||
return status
|
||||
unpacked_files = [os.path.join(dirpath, hapmap_file_name+ending) for ending in ['.ped', '.map']]
|
||||
if not reduce(lambda a,b: a and b, map(os.path.exists, unpacked_files)):
|
||||
if not unpacked_files_exist:
|
||||
status=write_status('unpacking...', 0, '')
|
||||
curr = 0
|
||||
for newfilepath in unpacked_files:
|
||||
|
|
@ -762,6 +767,7 @@ def hapmap3(data_set='hapmap3'):
|
|||
status=write_status('unpacking...', curr+12.*file_processed/(file_size), status)
|
||||
curr += 12
|
||||
status=write_status('unpacking...', curr, status)
|
||||
os.remove(filepath)
|
||||
status=write_status('reading .ped...', 25, status)
|
||||
# Preprocess data:
|
||||
snpstrnp = np.loadtxt(unpacked_files[0], dtype=str)
|
||||
|
|
@ -832,7 +838,7 @@ def hapmap3(data_set='hapmap3'):
|
|||
def singlecell(data_set='singlecell'):
|
||||
if not data_available(data_set):
|
||||
download_data(data_set)
|
||||
|
||||
|
||||
from pandas import read_csv
|
||||
dirpath = os.path.join(data_path, data_set)
|
||||
filename = os.path.join(dirpath, 'singlecell.csv')
|
||||
|
|
|
|||
75
GPy/util/datasets/swiss_roll.pickle
Normal file
75
GPy/util/datasets/swiss_roll.pickle
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -8,7 +8,7 @@ import numpy as np
|
|||
from GPy.util.pca import pca
|
||||
|
||||
def initialize_latent(init, input_dim, Y):
|
||||
Xr = np.random.randn(Y.shape[0], input_dim)
|
||||
Xr = np.asfortranarray(np.random.randn(Y.shape[0], input_dim))
|
||||
if init == 'PCA':
|
||||
p = pca(Y)
|
||||
PC = p.project(Y, min(input_dim, Y.shape[1]))
|
||||
|
|
@ -20,4 +20,4 @@ def initialize_latent(init, input_dim, Y):
|
|||
Xr -= Xr.mean(0)
|
||||
Xr /= Xr.var(0)
|
||||
|
||||
return Xr, var/var.max()
|
||||
return Xr, var/var.max()
|
||||
|
|
|
|||
|
|
@ -123,7 +123,7 @@ def dtrtrs(A, B, lower=1, trans=0, unitdiag=0):
|
|||
:returns:
|
||||
|
||||
"""
|
||||
A = force_F_ordered(A)
|
||||
A = np.asfortranarray(A)
|
||||
#Note: B does not seem to need to be F ordered!
|
||||
return lapack.dtrtrs(A, B, lower=lower, trans=trans, unitdiag=unitdiag)
|
||||
|
||||
|
|
|
|||
|
|
@ -106,12 +106,14 @@ class pca(object):
|
|||
ulabels.append(lab)
|
||||
nlabels = len(ulabels)
|
||||
if colors is None:
|
||||
colors = [cmap(float(i) / nlabels) for i in range(nlabels)]
|
||||
colors = iter([cmap(float(i) / nlabels) for i in range(nlabels)])
|
||||
else:
|
||||
colors = iter(colors)
|
||||
X_ = self.project(X, self.Q)[:,dimensions]
|
||||
kwargs.update(dict(s=s))
|
||||
plots = list()
|
||||
for i, l in enumerate(ulabels):
|
||||
kwargs.update(dict(color=colors[i], marker=marker[i % len(marker)]))
|
||||
kwargs.update(dict(color=colors.next(), marker=marker[i % len(marker)]))
|
||||
plots.append(ax.scatter(*X_[labels == l, :].T, label=str(l), **kwargs))
|
||||
ax.set_xlabel(r"PC$_1$")
|
||||
ax.set_ylabel(r"PC$_2$")
|
||||
|
|
|
|||
|
|
@ -4,9 +4,9 @@
|
|||
.. moduleauthor:: Max Zwiessele <ibinbei@gmail.com>
|
||||
|
||||
'''
|
||||
__updated__ = '2013-12-02'
|
||||
__updated__ = '2014-05-21'
|
||||
|
||||
import numpy as np
|
||||
import numpy as np, logging
|
||||
|
||||
def common_subarrays(X, axis=0):
|
||||
"""
|
||||
|
|
@ -14,11 +14,11 @@ def common_subarrays(X, axis=0):
|
|||
Common subarrays are returned as a dictionary of <subarray, [index]> pairs, where
|
||||
the subarray is a tuple representing the subarray and the index is the index
|
||||
for the subarray in X, where index is the index to the remaining axis.
|
||||
|
||||
|
||||
:param :class:`np.ndarray` X: 2d array to check for common subarrays in
|
||||
:param int axis: axis to apply subarray detection over.
|
||||
When the index is 0, compare rows -- columns, otherwise.
|
||||
|
||||
|
||||
Examples:
|
||||
=========
|
||||
|
||||
|
|
@ -48,7 +48,17 @@ def common_subarrays(X, axis=0):
|
|||
assert X.ndim == 2 and axis in (0,1), "Only implemented for 2D arrays"
|
||||
subarrays = defaultdict(list)
|
||||
cnt = count()
|
||||
np.apply_along_axis(lambda x: iadd(subarrays[tuple(x)], [cnt.next()]), 1-axis, X)
|
||||
logger = logging.getLogger("common_subarrays")
|
||||
def accumulate(x, s, c):
|
||||
logger.debug("creating tuple")
|
||||
t = tuple(x)
|
||||
logger.debug("tuple done")
|
||||
col = c.next()
|
||||
iadd(s[t], [col])
|
||||
logger.debug("added col {}".format(col))
|
||||
return None
|
||||
if axis == 0: [accumulate(x, subarrays, cnt) for x in X]
|
||||
else: [accumulate(x, subarrays, cnt) for x in X.T]
|
||||
return subarrays
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue