mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-07 19:12:40 +02:00
some progress for parameter tie
This commit is contained in:
parent
140354c02d
commit
1110cc31e6
5 changed files with 208 additions and 70 deletions
|
|
@ -21,6 +21,10 @@ class Model(Parameterized):
|
||||||
self.optimization_runs = []
|
self.optimization_runs = []
|
||||||
self.sampling_runs = []
|
self.sampling_runs = []
|
||||||
self.preferred_optimizer = 'bfgs'
|
self.preferred_optimizer = 'bfgs'
|
||||||
|
from .parameterization.ties_and_remappings import Tie
|
||||||
|
self.tie = Tie()
|
||||||
|
self.add_parameter(self.tie, -1)
|
||||||
|
self.add_observer(self.tie, self.tie._parameters_changed_notification, priority=-500)
|
||||||
|
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
raise NotImplementedError, "this needs to be implemented to use the model class"
|
raise NotImplementedError, "this needs to be implemented to use the model class"
|
||||||
|
|
|
||||||
|
|
@ -511,6 +511,22 @@ class Indexable(Nameable, Observable):
|
||||||
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
|
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
|
||||||
return ret
|
return ret
|
||||||
return 0.
|
return 0.
|
||||||
|
|
||||||
|
#===========================================================================
|
||||||
|
# Tie parameters together
|
||||||
|
#===========================================================================
|
||||||
|
|
||||||
|
def _has_ties(self):
|
||||||
|
if self._highest_parent_.tie.tied_param is None:
|
||||||
|
return False
|
||||||
|
if self.has_parent():
|
||||||
|
return self._highest_parent_.tie.label_buf[self._highest_parent_._raveled_index_for(self)].sum()>0
|
||||||
|
return True
|
||||||
|
|
||||||
|
def tie_together(self):
|
||||||
|
self._highest_parent_.tie.add_tied_parameter(self)
|
||||||
|
self._highest_parent_._set_fixed(self,self._raveled_index())
|
||||||
|
self._trigger_params_changed()
|
||||||
|
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# Constrain operations -> done
|
# Constrain operations -> done
|
||||||
|
|
@ -653,7 +669,7 @@ class OptimizationHandlable(Indexable):
|
||||||
will be set accordingly. It has to be set with an array, retrieved from
|
will be set accordingly. It has to be set with an array, retrieved from
|
||||||
this method, as e.g. fixing will resize the array.
|
this method, as e.g. fixing will resize the array.
|
||||||
|
|
||||||
The optimizer should only interfere with this array, such that transofrmations
|
The optimizer should only interfere with this array, such that transformations
|
||||||
are secured.
|
are secured.
|
||||||
"""
|
"""
|
||||||
if self.__dict__.get('_optimizer_copy_', None) is None or self.size != self._optimizer_copy_.size:
|
if self.__dict__.get('_optimizer_copy_', None) is None or self.size != self._optimizer_copy_.size:
|
||||||
|
|
@ -662,12 +678,13 @@ class OptimizationHandlable(Indexable):
|
||||||
if not self._optimizer_copy_transformed:
|
if not self._optimizer_copy_transformed:
|
||||||
self._optimizer_copy_.flat = self.param_array.flat
|
self._optimizer_copy_.flat = self.param_array.flat
|
||||||
[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||||
if self.has_parent() and self.constraints[__fixed__].size != 0:
|
if self.has_parent() and (self.constraints[__fixed__].size != 0 or self._has_ties()):
|
||||||
fixes = np.ones(self.size).astype(bool)
|
fixes = np.ones(self.size).astype(bool)
|
||||||
fixes[self.constraints[__fixed__]] = FIXED
|
fixes[self.constraints[__fixed__]] = FIXED
|
||||||
return self._optimizer_copy_[fixes]
|
return self._optimizer_copy_[np.logical_and(fixes, self._highest_parent_.tie.getTieFlag(self))]
|
||||||
elif self._has_fixes():
|
elif self._has_fixes():
|
||||||
return self._optimizer_copy_[self._fixes_]
|
return self._optimizer_copy_[self._fixes_]
|
||||||
|
|
||||||
self._optimizer_copy_transformed = True
|
self._optimizer_copy_transformed = True
|
||||||
|
|
||||||
return self._optimizer_copy_
|
return self._optimizer_copy_
|
||||||
|
|
@ -694,6 +711,7 @@ class OptimizationHandlable(Indexable):
|
||||||
self.param_array.flat[f] = p
|
self.param_array.flat[f] = p
|
||||||
[np.put(self.param_array, ind[f[ind]], c.f(self.param_array.flat[ind[f[ind]]]))
|
[np.put(self.param_array, ind[f[ind]], c.f(self.param_array.flat[ind[f[ind]]]))
|
||||||
for c, ind in self.constraints.iteritems() if c != __fixed__]
|
for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||||
|
self._highest_parent_.tie.propagate_val()
|
||||||
|
|
||||||
self._optimizer_copy_transformed = False
|
self._optimizer_copy_transformed = False
|
||||||
self._trigger_params_changed()
|
self._trigger_params_changed()
|
||||||
|
|
@ -726,6 +744,7 @@ class OptimizationHandlable(Indexable):
|
||||||
Transform the gradients by multiplying the gradient factor for each
|
Transform the gradients by multiplying the gradient factor for each
|
||||||
constraint to it.
|
constraint to it.
|
||||||
"""
|
"""
|
||||||
|
self._highest_parent_.tie.collate_gradient()
|
||||||
[np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
|
[np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
|
||||||
if self._has_fixes(): return g[self._fixes_]
|
if self._has_fixes(): return g[self._fixes_]
|
||||||
return g
|
return g
|
||||||
|
|
|
||||||
|
|
@ -31,59 +31,193 @@ class Fix(Remapping):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Tie(Remapping):
|
class Tie(Parameterized):
|
||||||
def __init__(self, value, name):
|
"""
|
||||||
|
The new parameter tie framework. (under development)
|
||||||
|
|
||||||
|
All the parameters tied together get a new parameter inside the *Tie* object.
|
||||||
|
Its value should always be equal to all the tied parameters, and its gradient
|
||||||
|
is the sum of all the tied parameters.
|
||||||
|
|
||||||
|
=====Implementation Details=====
|
||||||
|
The *Tie* object should only exist on the top of param tree (the highest parent).
|
||||||
|
|
||||||
|
self.label_buf:
|
||||||
|
It uses a label buffer that has the same length as all the parameters (self._highest_parent_.param_array).
|
||||||
|
The buffer keeps track of all the tied parameters. All the tied parameters have a label (an interger) higher
|
||||||
|
than 0, and the parameters that have the same label are tied together.
|
||||||
|
|
||||||
|
self.buf_index:
|
||||||
|
An auxiliary index list for the global index of the tie parameter inside the *Tie* object.
|
||||||
|
|
||||||
|
================================
|
||||||
|
|
||||||
|
TODO:
|
||||||
|
* EVERYTHING
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, name='tie'):
|
||||||
super(Tie, self).__init__(name)
|
super(Tie, self).__init__(name)
|
||||||
self.tied_parameters = []
|
self.tied_param = None
|
||||||
self.value = Param('val', value)
|
# The buffer keeps track of tie status
|
||||||
self.add_parameter(self.value)
|
self.label_buf = None
|
||||||
|
# The global indices of the 'tied' param
|
||||||
|
self.buf_idx = None
|
||||||
|
# A boolean array indicating non-tied parameters
|
||||||
|
self._tie_ = None
|
||||||
|
|
||||||
|
def getTieFlag(self, p=None):
|
||||||
|
if self.tied_param is None:
|
||||||
|
if self._tie_ is None or self._tie_.size != self._highest_parent_.param_array.size:
|
||||||
|
self._tie_ = np.ones((self._highest_parent_.param_array.size,),dtype=np.bool)
|
||||||
|
if p is not None:
|
||||||
|
return self._tie_[p._highest_parent_._raveled_index_for(p)]
|
||||||
|
return self._tie_
|
||||||
|
|
||||||
|
def _init_labelBuf(self):
|
||||||
|
if self.label_buf is None:
|
||||||
|
self.label_buf = np.zeros(self._highest_parent_.param_array.shape, dtype=np.int)
|
||||||
|
if self._tie_ is None or self._tie_.size != self._highest_parent_.param_array.size:
|
||||||
|
self._tie_ = np.ones((self._highest_parent_.param_array.size,),dtype=np.bool)
|
||||||
|
|
||||||
|
def _updateTieFlag(self):
|
||||||
|
if self._tie_.size != self.label_buf.size:
|
||||||
|
self._tie_ = np.ones((self._highest_parent_.param_array.size,),dtype=np.bool)
|
||||||
|
self._tie_[self.label_buf>0] = False
|
||||||
|
self._tie_[self.buf_idx] = True
|
||||||
|
|
||||||
def add_tied_parameter(self, p):
|
def add_tied_parameter(self, p, p2=None):
|
||||||
self.tied_parameters.append(p)
|
|
||||||
p.add_observer(self, self.callback)
|
|
||||||
self.parameters_changed()
|
|
||||||
|
|
||||||
def callback(self, param=None, which=None):
|
|
||||||
"""
|
"""
|
||||||
This gets called whenever any of the tied parameters changes. we spend
|
Tie the list of parameters p together (p2==None) or
|
||||||
considerable effort working out what has changed and to what value.
|
Tie the list of parameters p with the list of parameters p2 (p2!=None)
|
||||||
Then we store that value in self.value, and broadcast it everywhere
|
|
||||||
with parameters_changed.
|
|
||||||
"""
|
"""
|
||||||
if which is self:return
|
self._init_labelBuf()
|
||||||
index = self._highest_parent_.constraints[self]
|
if p2 is None:
|
||||||
if len(index)==0:
|
idx = self._highest_parent_._raveled_index_for(p)
|
||||||
return # nothing to tie together, this tie exists without any tied parameters
|
val = self._sync_val_group(idx)
|
||||||
self.collate_gradient()
|
if np.all(self.label_buf[idx]==0):
|
||||||
vals = self._highest_parent_.param_array[index]
|
# None of p has been tied before.
|
||||||
uvals = np.unique(vals)
|
tie_idx = self._expandTieParam(1)
|
||||||
if len(uvals)==1:
|
print tie_idx
|
||||||
#all of the tied things are at the same value
|
tie_id = self.label_buf.max()+1
|
||||||
if np.all(self.value==uvals[0]):
|
self.label_buf[tie_idx] = tie_id
|
||||||
return # DO NOT DO ANY CHANGES IF THE TIED PART IS NOT CHANGED!
|
else:
|
||||||
self.value[...] = uvals[0]
|
b = self.label_buf[idx]
|
||||||
elif len(uvals)==2:
|
ids = np.unique(b[b>0])
|
||||||
#only *one* of the tied things has changed. it must be different to self.value
|
tie_id, tie_idx = self._merge_tie_param(ids)
|
||||||
newval = uvals[uvals != self.value*1]
|
self._highest_parent_.param_array[tie_idx] = val
|
||||||
self.value[...] = newval
|
idx = self._highest_parent_._raveled_index_for(p)
|
||||||
|
self.label_buf[idx] = tie_id
|
||||||
else:
|
else:
|
||||||
#more than one of the tied things changed. panic.
|
pass
|
||||||
raise ValueError, "something is wrong with the tieing"
|
self._updateTieFlag()
|
||||||
|
|
||||||
|
def _merge_tie_param(self, ids):
|
||||||
|
"""Merge the tie parameters with ids in the list."""
|
||||||
|
if len(ids)==1:
|
||||||
|
id_final_idx = self.buf_idx[self.label_buf[self.buf_idx]==ids[0]][0]
|
||||||
|
return ids[0],id_final_idx
|
||||||
|
id_final = ids[0]
|
||||||
|
ids_rm = ids[1:]
|
||||||
|
label_buf_param = self.label_buf[self.buf_idx]
|
||||||
|
idx_param = [np.where(label_buf_param==i)[0][0] for i in ids_rm]
|
||||||
|
self._removeTieParam(idx_param)
|
||||||
|
[np.put(self.label_buf, np.where(self.label_buf==i), id_final) for i in ids_rm]
|
||||||
|
id_final_idx = self.buf_idx[self.label_buf[self.buf_idx]==id_final][0]
|
||||||
|
return id_final, id_final_idx
|
||||||
|
|
||||||
|
def _sync_val_group(self, idx):
|
||||||
|
self._highest_parent_.param_array[idx] = self._highest_parent_.param_array[idx].mean()
|
||||||
|
return self._highest_parent_.param_array[idx][0]
|
||||||
|
|
||||||
|
def _expandTieParam(self, num):
|
||||||
|
"""Expand the tie param with the number of *num* parameters"""
|
||||||
|
if self.tied_param is None:
|
||||||
|
new_buf = np.empty((num,))
|
||||||
|
else:
|
||||||
|
new_buf = np.empty((self.tied_param.size+num,))
|
||||||
|
new_buf[:self.tied_param.size] = self.tied_param.param_array.copy()
|
||||||
|
self.remove_parameter(self.tied_param)
|
||||||
|
self.tied_param = Param('tied',new_buf)
|
||||||
|
self.add_parameter(self.tied_param)
|
||||||
|
buf_idx_new = self._highest_parent_._raveled_index_for(self.tied_param)
|
||||||
|
self._expand_label_buf(self.buf_idx, buf_idx_new)
|
||||||
|
self.buf_idx = buf_idx_new
|
||||||
|
return self.buf_idx[-num:]
|
||||||
|
|
||||||
|
def _removeTieParam(self, idx):
|
||||||
|
"""idx within tied_param"""
|
||||||
|
new_buf = np.empty((self.tied_param.size-len(idx),))
|
||||||
|
bool_list = np.ones((self.tied_param.size,),dtype=np.bool)
|
||||||
|
bool_list[idx] = False
|
||||||
|
new_buf[:] = self.tied_param.param_array[bool_list]
|
||||||
|
self.remove_parameter(self.tied_param)
|
||||||
|
self.tied_param = Param('tied',new_buf)
|
||||||
|
self.add_parameter(self.tied_param)
|
||||||
|
buf_idx_new = self._highest_parent_._raveled_index_for(self.tied_param)
|
||||||
|
self._shrink_label_buf(self.buf_idx, buf_idx_new, bool_list)
|
||||||
|
self.buf_idx = buf_idx_new
|
||||||
|
|
||||||
|
def _expand_label_buf(self, idx_old, idx_new):
|
||||||
|
"""Expand label buffer accordingly"""
|
||||||
|
if idx_old is None:
|
||||||
|
self.label_buf = np.zeros(self._highest_parent_.param_array.shape, dtype=np.int)
|
||||||
|
else:
|
||||||
|
bool_old = np.zeros((self.label_buf.size,),dtype=np.bool)
|
||||||
|
bool_old[idx_old] = True
|
||||||
|
bool_new = np.zeros((self._highest_parent_.param_array.size,),dtype=np.bool)
|
||||||
|
bool_new[idx_new] = True
|
||||||
|
label_buf_new = np.zeros(self._highest_parent_.param_array.shape, dtype=np.int)
|
||||||
|
label_buf_new[np.logical_not(bool_new)] = self.label_buf[np.logical_not(bool_old)]
|
||||||
|
label_buf_new[idx_new[:len(idx_old)]] = self.label_buf[idx_old]
|
||||||
|
self.label_buf = label_buf_new
|
||||||
|
|
||||||
|
def _shrink_label_buf(self, idx_old, idx_new, bool_list):
|
||||||
|
bool_old = np.zeros((self.label_buf.size,),dtype=np.bool)
|
||||||
|
bool_old[idx_old] = True
|
||||||
|
bool_new = np.zeros((self._highest_parent_.param_array.size,),dtype=np.bool)
|
||||||
|
bool_new[idx_new] = True
|
||||||
|
label_buf_new = np.empty(self._highest_parent_.param_array.shape, dtype=np.int)
|
||||||
|
label_buf_new[np.logical_not(bool_new)] = self.label_buf[np.logical_not(bool_old)]
|
||||||
|
label_buf_new[idx_new] = self.label_buf[idx_old[bool_list]]
|
||||||
|
self.label_buf = label_buf_new
|
||||||
|
|
||||||
|
def _check_change(self):
|
||||||
|
changed = False
|
||||||
|
if self.tied_param is not None:
|
||||||
|
for i in xrange(self.tied_param.size):
|
||||||
|
b0 = self.label_buf==self.label_buf[self.buf_idx[i]]
|
||||||
|
b = self._highest_parent_.param_array[b0]!=self.tied_param[i]
|
||||||
|
if b.sum()==0:
|
||||||
|
print 'XXX'
|
||||||
|
continue
|
||||||
|
elif b.sum()==1:
|
||||||
|
print '!!!'
|
||||||
|
val = self._highest_parent_.param_array[b0][b][0]
|
||||||
|
self._highest_parent_.param_array[b0] = val
|
||||||
|
else:
|
||||||
|
print '@@@'
|
||||||
|
self._highest_parent_.param_array[b0] = self.tied_param[i]
|
||||||
|
changed = True
|
||||||
|
return changed
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
#ensure all out parameters have the correct value, as specified by our mapping
|
#ensure all out parameters have the correct value, as specified by our mapping
|
||||||
index = self._highest_parent_.constraints[self]
|
changed = self._check_change()
|
||||||
if np.all(self._highest_parent_.param_array[index]==self.value):
|
if changed:
|
||||||
return # STOP TRIGGER THE UPDATE LOOP MULTIPLE TIMES!!!
|
self._highest_parent_._trigger_params_changed()
|
||||||
self._highest_parent_.param_array[index] = self.mapping()
|
|
||||||
[p.notify_observers(which=self) for p in self.tied_parameters]
|
|
||||||
self.collate_gradient()
|
self.collate_gradient()
|
||||||
|
|
||||||
def mapping(self):
|
|
||||||
return self.value
|
|
||||||
|
|
||||||
def collate_gradient(self):
|
def collate_gradient(self):
|
||||||
index = self._highest_parent_.constraints[self]
|
if self.tied_param is not None:
|
||||||
self.value.gradient = np.sum(self._highest_parent_.gradient[index])
|
self.tied_param.gradient = 0.
|
||||||
|
[np.put(self.tied_param.gradient, i, self._highest_parent_.gradient[self.label_buf==self.label_buf[self.buf_idx[i]]].sum())
|
||||||
|
for i in xrange(self.tied_param.size)]
|
||||||
|
|
||||||
|
def propagate_val(self):
|
||||||
|
if self.tied_param is not None:
|
||||||
|
for i in xrange(self.tied_param.size):
|
||||||
|
self._highest_parent_.param_array[self.label_buf==self.label_buf[self.buf_idx[i]]] = self.tied_param[i]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -387,7 +387,7 @@ def silhouette(max_iters=100, optimize=True, plot=True):
|
||||||
print m
|
print m
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True):
|
def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True, checkgrad=True):
|
||||||
"""Run a 1D example of a sparse GP regression."""
|
"""Run a 1D example of a sparse GP regression."""
|
||||||
# sample inputs and outputs
|
# sample inputs and outputs
|
||||||
X = np.random.uniform(-3., 3., (num_samples, 1))
|
X = np.random.uniform(-3., 3., (num_samples, 1))
|
||||||
|
|
@ -396,7 +396,9 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, opti
|
||||||
rbf = GPy.kern.RBF(1)
|
rbf = GPy.kern.RBF(1)
|
||||||
# create simple GP Model
|
# create simple GP Model
|
||||||
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
|
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
|
||||||
m.checkgrad(verbose=1)
|
|
||||||
|
if checkgrad:
|
||||||
|
m.checkgrad(verbose=1)
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
m.optimize('tnc', messages=1, max_iters=max_iters)
|
m.optimize('tnc', messages=1, max_iters=max_iters)
|
||||||
|
|
|
||||||
|
|
@ -68,26 +68,5 @@ try:
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def jitchol(A, L, cublas_handle, maxtries=5):
|
|
||||||
try:
|
|
||||||
cublas.cublasDcopy(cublas_handle, A.size, A.gpudata, 1, L.gpudata, 1)
|
|
||||||
culinalg.cho_factor(L,'L')
|
|
||||||
except culaExceptions:
|
|
||||||
|
|
||||||
|
|
||||||
diagA = np.diag(A)
|
|
||||||
if np.any(diagA <= 0.):
|
|
||||||
raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
|
|
||||||
jitter = diagA.mean() * 1e-6
|
|
||||||
while maxtries > 0 and np.isfinite(jitter):
|
|
||||||
print 'Warning: adding jitter of {:.10e}'.format(jitter)
|
|
||||||
try:
|
|
||||||
return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
|
|
||||||
except:
|
|
||||||
jitter *= 10
|
|
||||||
finally:
|
|
||||||
maxtries -= 1
|
|
||||||
raise linalg.LinAlgError, "not positive definite, even with jitter."
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue