some progress for parameter tie

This commit is contained in:
Zhenwen Dai 2014-08-29 18:52:53 +01:00
parent 140354c02d
commit 1110cc31e6
5 changed files with 208 additions and 70 deletions

View file

@ -21,6 +21,10 @@ class Model(Parameterized):
self.optimization_runs = [] self.optimization_runs = []
self.sampling_runs = [] self.sampling_runs = []
self.preferred_optimizer = 'bfgs' self.preferred_optimizer = 'bfgs'
from .parameterization.ties_and_remappings import Tie
self.tie = Tie()
self.add_parameter(self.tie, -1)
self.add_observer(self.tie, self.tie._parameters_changed_notification, priority=-500)
def log_likelihood(self): def log_likelihood(self):
raise NotImplementedError, "this needs to be implemented to use the model class" raise NotImplementedError, "this needs to be implemented to use the model class"

View file

@ -512,6 +512,22 @@ class Indexable(Nameable, Observable):
return ret return ret
return 0. return 0.
#===========================================================================
# Tie parameters together
#===========================================================================
def _has_ties(self):
if self._highest_parent_.tie.tied_param is None:
return False
if self.has_parent():
return self._highest_parent_.tie.label_buf[self._highest_parent_._raveled_index_for(self)].sum()>0
return True
def tie_together(self):
self._highest_parent_.tie.add_tied_parameter(self)
self._highest_parent_._set_fixed(self,self._raveled_index())
self._trigger_params_changed()
#=========================================================================== #===========================================================================
# Constrain operations -> done # Constrain operations -> done
#=========================================================================== #===========================================================================
@ -653,7 +669,7 @@ class OptimizationHandlable(Indexable):
will be set accordingly. It has to be set with an array, retrieved from will be set accordingly. It has to be set with an array, retrieved from
this method, as e.g. fixing will resize the array. this method, as e.g. fixing will resize the array.
The optimizer should only interfere with this array, such that transofrmations The optimizer should only interfere with this array, such that transformations
are secured. are secured.
""" """
if self.__dict__.get('_optimizer_copy_', None) is None or self.size != self._optimizer_copy_.size: if self.__dict__.get('_optimizer_copy_', None) is None or self.size != self._optimizer_copy_.size:
@ -662,12 +678,13 @@ class OptimizationHandlable(Indexable):
if not self._optimizer_copy_transformed: if not self._optimizer_copy_transformed:
self._optimizer_copy_.flat = self.param_array.flat self._optimizer_copy_.flat = self.param_array.flat
[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] [np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
if self.has_parent() and self.constraints[__fixed__].size != 0: if self.has_parent() and (self.constraints[__fixed__].size != 0 or self._has_ties()):
fixes = np.ones(self.size).astype(bool) fixes = np.ones(self.size).astype(bool)
fixes[self.constraints[__fixed__]] = FIXED fixes[self.constraints[__fixed__]] = FIXED
return self._optimizer_copy_[fixes] return self._optimizer_copy_[np.logical_and(fixes, self._highest_parent_.tie.getTieFlag(self))]
elif self._has_fixes(): elif self._has_fixes():
return self._optimizer_copy_[self._fixes_] return self._optimizer_copy_[self._fixes_]
self._optimizer_copy_transformed = True self._optimizer_copy_transformed = True
return self._optimizer_copy_ return self._optimizer_copy_
@ -694,6 +711,7 @@ class OptimizationHandlable(Indexable):
self.param_array.flat[f] = p self.param_array.flat[f] = p
[np.put(self.param_array, ind[f[ind]], c.f(self.param_array.flat[ind[f[ind]]])) [np.put(self.param_array, ind[f[ind]], c.f(self.param_array.flat[ind[f[ind]]]))
for c, ind in self.constraints.iteritems() if c != __fixed__] for c, ind in self.constraints.iteritems() if c != __fixed__]
self._highest_parent_.tie.propagate_val()
self._optimizer_copy_transformed = False self._optimizer_copy_transformed = False
self._trigger_params_changed() self._trigger_params_changed()
@ -726,6 +744,7 @@ class OptimizationHandlable(Indexable):
Transform the gradients by multiplying the gradient factor for each Transform the gradients by multiplying the gradient factor for each
constraint to it. constraint to it.
""" """
self._highest_parent_.tie.collate_gradient()
[np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__] [np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
if self._has_fixes(): return g[self._fixes_] if self._has_fixes(): return g[self._fixes_]
return g return g

View file

@ -31,59 +31,193 @@ class Fix(Remapping):
class Tie(Remapping): class Tie(Parameterized):
def __init__(self, value, name): """
The new parameter tie framework. (under development)
All the parameters tied together get a new parameter inside the *Tie* object.
Its value should always be equal to all the tied parameters, and its gradient
is the sum of all the tied parameters.
=====Implementation Details=====
The *Tie* object should only exist on the top of param tree (the highest parent).
self.label_buf:
It uses a label buffer that has the same length as all the parameters (self._highest_parent_.param_array).
The buffer keeps track of all the tied parameters. All the tied parameters have a label (an interger) higher
than 0, and the parameters that have the same label are tied together.
self.buf_index:
An auxiliary index list for the global index of the tie parameter inside the *Tie* object.
================================
TODO:
* EVERYTHING
"""
def __init__(self, name='tie'):
super(Tie, self).__init__(name) super(Tie, self).__init__(name)
self.tied_parameters = [] self.tied_param = None
self.value = Param('val', value) # The buffer keeps track of tie status
self.add_parameter(self.value) self.label_buf = None
# The global indices of the 'tied' param
self.buf_idx = None
# A boolean array indicating non-tied parameters
self._tie_ = None
def add_tied_parameter(self, p): def getTieFlag(self, p=None):
self.tied_parameters.append(p) if self.tied_param is None:
p.add_observer(self, self.callback) if self._tie_ is None or self._tie_.size != self._highest_parent_.param_array.size:
self.parameters_changed() self._tie_ = np.ones((self._highest_parent_.param_array.size,),dtype=np.bool)
if p is not None:
return self._tie_[p._highest_parent_._raveled_index_for(p)]
return self._tie_
def callback(self, param=None, which=None): def _init_labelBuf(self):
if self.label_buf is None:
self.label_buf = np.zeros(self._highest_parent_.param_array.shape, dtype=np.int)
if self._tie_ is None or self._tie_.size != self._highest_parent_.param_array.size:
self._tie_ = np.ones((self._highest_parent_.param_array.size,),dtype=np.bool)
def _updateTieFlag(self):
if self._tie_.size != self.label_buf.size:
self._tie_ = np.ones((self._highest_parent_.param_array.size,),dtype=np.bool)
self._tie_[self.label_buf>0] = False
self._tie_[self.buf_idx] = True
def add_tied_parameter(self, p, p2=None):
""" """
This gets called whenever any of the tied parameters changes. we spend Tie the list of parameters p together (p2==None) or
considerable effort working out what has changed and to what value. Tie the list of parameters p with the list of parameters p2 (p2!=None)
Then we store that value in self.value, and broadcast it everywhere
with parameters_changed.
""" """
if which is self:return self._init_labelBuf()
index = self._highest_parent_.constraints[self] if p2 is None:
if len(index)==0: idx = self._highest_parent_._raveled_index_for(p)
return # nothing to tie together, this tie exists without any tied parameters val = self._sync_val_group(idx)
self.collate_gradient() if np.all(self.label_buf[idx]==0):
vals = self._highest_parent_.param_array[index] # None of p has been tied before.
uvals = np.unique(vals) tie_idx = self._expandTieParam(1)
if len(uvals)==1: print tie_idx
#all of the tied things are at the same value tie_id = self.label_buf.max()+1
if np.all(self.value==uvals[0]): self.label_buf[tie_idx] = tie_id
return # DO NOT DO ANY CHANGES IF THE TIED PART IS NOT CHANGED! else:
self.value[...] = uvals[0] b = self.label_buf[idx]
elif len(uvals)==2: ids = np.unique(b[b>0])
#only *one* of the tied things has changed. it must be different to self.value tie_id, tie_idx = self._merge_tie_param(ids)
newval = uvals[uvals != self.value*1] self._highest_parent_.param_array[tie_idx] = val
self.value[...] = newval idx = self._highest_parent_._raveled_index_for(p)
self.label_buf[idx] = tie_id
else: else:
#more than one of the tied things changed. panic. pass
raise ValueError, "something is wrong with the tieing" self._updateTieFlag()
def _merge_tie_param(self, ids):
"""Merge the tie parameters with ids in the list."""
if len(ids)==1:
id_final_idx = self.buf_idx[self.label_buf[self.buf_idx]==ids[0]][0]
return ids[0],id_final_idx
id_final = ids[0]
ids_rm = ids[1:]
label_buf_param = self.label_buf[self.buf_idx]
idx_param = [np.where(label_buf_param==i)[0][0] for i in ids_rm]
self._removeTieParam(idx_param)
[np.put(self.label_buf, np.where(self.label_buf==i), id_final) for i in ids_rm]
id_final_idx = self.buf_idx[self.label_buf[self.buf_idx]==id_final][0]
return id_final, id_final_idx
def _sync_val_group(self, idx):
self._highest_parent_.param_array[idx] = self._highest_parent_.param_array[idx].mean()
return self._highest_parent_.param_array[idx][0]
def _expandTieParam(self, num):
"""Expand the tie param with the number of *num* parameters"""
if self.tied_param is None:
new_buf = np.empty((num,))
else:
new_buf = np.empty((self.tied_param.size+num,))
new_buf[:self.tied_param.size] = self.tied_param.param_array.copy()
self.remove_parameter(self.tied_param)
self.tied_param = Param('tied',new_buf)
self.add_parameter(self.tied_param)
buf_idx_new = self._highest_parent_._raveled_index_for(self.tied_param)
self._expand_label_buf(self.buf_idx, buf_idx_new)
self.buf_idx = buf_idx_new
return self.buf_idx[-num:]
def _removeTieParam(self, idx):
"""idx within tied_param"""
new_buf = np.empty((self.tied_param.size-len(idx),))
bool_list = np.ones((self.tied_param.size,),dtype=np.bool)
bool_list[idx] = False
new_buf[:] = self.tied_param.param_array[bool_list]
self.remove_parameter(self.tied_param)
self.tied_param = Param('tied',new_buf)
self.add_parameter(self.tied_param)
buf_idx_new = self._highest_parent_._raveled_index_for(self.tied_param)
self._shrink_label_buf(self.buf_idx, buf_idx_new, bool_list)
self.buf_idx = buf_idx_new
def _expand_label_buf(self, idx_old, idx_new):
"""Expand label buffer accordingly"""
if idx_old is None:
self.label_buf = np.zeros(self._highest_parent_.param_array.shape, dtype=np.int)
else:
bool_old = np.zeros((self.label_buf.size,),dtype=np.bool)
bool_old[idx_old] = True
bool_new = np.zeros((self._highest_parent_.param_array.size,),dtype=np.bool)
bool_new[idx_new] = True
label_buf_new = np.zeros(self._highest_parent_.param_array.shape, dtype=np.int)
label_buf_new[np.logical_not(bool_new)] = self.label_buf[np.logical_not(bool_old)]
label_buf_new[idx_new[:len(idx_old)]] = self.label_buf[idx_old]
self.label_buf = label_buf_new
def _shrink_label_buf(self, idx_old, idx_new, bool_list):
bool_old = np.zeros((self.label_buf.size,),dtype=np.bool)
bool_old[idx_old] = True
bool_new = np.zeros((self._highest_parent_.param_array.size,),dtype=np.bool)
bool_new[idx_new] = True
label_buf_new = np.empty(self._highest_parent_.param_array.shape, dtype=np.int)
label_buf_new[np.logical_not(bool_new)] = self.label_buf[np.logical_not(bool_old)]
label_buf_new[idx_new] = self.label_buf[idx_old[bool_list]]
self.label_buf = label_buf_new
def _check_change(self):
changed = False
if self.tied_param is not None:
for i in xrange(self.tied_param.size):
b0 = self.label_buf==self.label_buf[self.buf_idx[i]]
b = self._highest_parent_.param_array[b0]!=self.tied_param[i]
if b.sum()==0:
print 'XXX'
continue
elif b.sum()==1:
print '!!!'
val = self._highest_parent_.param_array[b0][b][0]
self._highest_parent_.param_array[b0] = val
else:
print '@@@'
self._highest_parent_.param_array[b0] = self.tied_param[i]
changed = True
return changed
def parameters_changed(self): def parameters_changed(self):
#ensure all out parameters have the correct value, as specified by our mapping #ensure all out parameters have the correct value, as specified by our mapping
index = self._highest_parent_.constraints[self] changed = self._check_change()
if np.all(self._highest_parent_.param_array[index]==self.value): if changed:
return # STOP TRIGGER THE UPDATE LOOP MULTIPLE TIMES!!! self._highest_parent_._trigger_params_changed()
self._highest_parent_.param_array[index] = self.mapping()
[p.notify_observers(which=self) for p in self.tied_parameters]
self.collate_gradient() self.collate_gradient()
def mapping(self):
return self.value
def collate_gradient(self): def collate_gradient(self):
index = self._highest_parent_.constraints[self] if self.tied_param is not None:
self.value.gradient = np.sum(self._highest_parent_.gradient[index]) self.tied_param.gradient = 0.
[np.put(self.tied_param.gradient, i, self._highest_parent_.gradient[self.label_buf==self.label_buf[self.buf_idx[i]]].sum())
for i in xrange(self.tied_param.size)]
def propagate_val(self):
if self.tied_param is not None:
for i in xrange(self.tied_param.size):
self._highest_parent_.param_array[self.label_buf==self.label_buf[self.buf_idx[i]]] = self.tied_param[i]

View file

@ -387,7 +387,7 @@ def silhouette(max_iters=100, optimize=True, plot=True):
print m print m
return m return m
def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True): def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True, checkgrad=True):
"""Run a 1D example of a sparse GP regression.""" """Run a 1D example of a sparse GP regression."""
# sample inputs and outputs # sample inputs and outputs
X = np.random.uniform(-3., 3., (num_samples, 1)) X = np.random.uniform(-3., 3., (num_samples, 1))
@ -396,7 +396,9 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, opti
rbf = GPy.kern.RBF(1) rbf = GPy.kern.RBF(1)
# create simple GP Model # create simple GP Model
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing) m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
m.checkgrad(verbose=1)
if checkgrad:
m.checkgrad(verbose=1)
if optimize: if optimize:
m.optimize('tnc', messages=1, max_iters=max_iters) m.optimize('tnc', messages=1, max_iters=max_iters)

View file

@ -68,26 +68,5 @@ try:
except: except:
pass pass
def jitchol(A, L, cublas_handle, maxtries=5):
try:
cublas.cublasDcopy(cublas_handle, A.size, A.gpudata, 1, L.gpudata, 1)
culinalg.cho_factor(L,'L')
except culaExceptions:
diagA = np.diag(A)
if np.any(diagA <= 0.):
raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
jitter = diagA.mean() * 1e-6
while maxtries > 0 and np.isfinite(jitter):
print 'Warning: adding jitter of {:.10e}'.format(jitter)
try:
return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
except:
jitter *= 10
finally:
maxtries -= 1
raise linalg.LinAlgError, "not positive definite, even with jitter."