some progress for parameter tie

This commit is contained in:
Zhenwen Dai 2014-08-29 18:52:53 +01:00
parent 140354c02d
commit 1110cc31e6
5 changed files with 208 additions and 70 deletions

View file

@ -21,6 +21,10 @@ class Model(Parameterized):
self.optimization_runs = []
self.sampling_runs = []
self.preferred_optimizer = 'bfgs'
from .parameterization.ties_and_remappings import Tie
self.tie = Tie()
self.add_parameter(self.tie, -1)
self.add_observer(self.tie, self.tie._parameters_changed_notification, priority=-500)
def log_likelihood(self):
raise NotImplementedError, "this needs to be implemented to use the model class"

View file

@ -511,6 +511,22 @@ class Indexable(Nameable, Observable):
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
return ret
return 0.
#===========================================================================
# Tie parameters together
#===========================================================================
def _has_ties(self):
if self._highest_parent_.tie.tied_param is None:
return False
if self.has_parent():
return self._highest_parent_.tie.label_buf[self._highest_parent_._raveled_index_for(self)].sum()>0
return True
def tie_together(self):
self._highest_parent_.tie.add_tied_parameter(self)
self._highest_parent_._set_fixed(self,self._raveled_index())
self._trigger_params_changed()
#===========================================================================
# Constrain operations -> done
@ -653,7 +669,7 @@ class OptimizationHandlable(Indexable):
will be set accordingly. It has to be set with an array, retrieved from
this method, as e.g. fixing will resize the array.
The optimizer should only interfere with this array, such that transofrmations
The optimizer should only interfere with this array, such that transformations
are secured.
"""
if self.__dict__.get('_optimizer_copy_', None) is None or self.size != self._optimizer_copy_.size:
@ -662,12 +678,13 @@ class OptimizationHandlable(Indexable):
if not self._optimizer_copy_transformed:
self._optimizer_copy_.flat = self.param_array.flat
[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
if self.has_parent() and self.constraints[__fixed__].size != 0:
if self.has_parent() and (self.constraints[__fixed__].size != 0 or self._has_ties()):
fixes = np.ones(self.size).astype(bool)
fixes[self.constraints[__fixed__]] = FIXED
return self._optimizer_copy_[fixes]
return self._optimizer_copy_[np.logical_and(fixes, self._highest_parent_.tie.getTieFlag(self))]
elif self._has_fixes():
return self._optimizer_copy_[self._fixes_]
return self._optimizer_copy_[self._fixes_]
self._optimizer_copy_transformed = True
return self._optimizer_copy_
@ -694,6 +711,7 @@ class OptimizationHandlable(Indexable):
self.param_array.flat[f] = p
[np.put(self.param_array, ind[f[ind]], c.f(self.param_array.flat[ind[f[ind]]]))
for c, ind in self.constraints.iteritems() if c != __fixed__]
self._highest_parent_.tie.propagate_val()
self._optimizer_copy_transformed = False
self._trigger_params_changed()
@ -726,6 +744,7 @@ class OptimizationHandlable(Indexable):
Transform the gradients by multiplying the gradient factor for each
constraint to it.
"""
self._highest_parent_.tie.collate_gradient()
[np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
if self._has_fixes(): return g[self._fixes_]
return g

View file

@ -31,59 +31,193 @@ class Fix(Remapping):
class Tie(Remapping):
def __init__(self, value, name):
class Tie(Parameterized):
"""
The new parameter tie framework. (under development)
All the parameters tied together get a new parameter inside the *Tie* object.
Its value should always be equal to all the tied parameters, and its gradient
is the sum of all the tied parameters.
=====Implementation Details=====
The *Tie* object should only exist on the top of param tree (the highest parent).
self.label_buf:
It uses a label buffer that has the same length as all the parameters (self._highest_parent_.param_array).
The buffer keeps track of all the tied parameters. All the tied parameters have a label (an interger) higher
than 0, and the parameters that have the same label are tied together.
self.buf_index:
An auxiliary index list for the global index of the tie parameter inside the *Tie* object.
================================
TODO:
* EVERYTHING
"""
def __init__(self, name='tie'):
super(Tie, self).__init__(name)
self.tied_parameters = []
self.value = Param('val', value)
self.add_parameter(self.value)
self.tied_param = None
# The buffer keeps track of tie status
self.label_buf = None
# The global indices of the 'tied' param
self.buf_idx = None
# A boolean array indicating non-tied parameters
self._tie_ = None
def getTieFlag(self, p=None):
if self.tied_param is None:
if self._tie_ is None or self._tie_.size != self._highest_parent_.param_array.size:
self._tie_ = np.ones((self._highest_parent_.param_array.size,),dtype=np.bool)
if p is not None:
return self._tie_[p._highest_parent_._raveled_index_for(p)]
return self._tie_
def _init_labelBuf(self):
if self.label_buf is None:
self.label_buf = np.zeros(self._highest_parent_.param_array.shape, dtype=np.int)
if self._tie_ is None or self._tie_.size != self._highest_parent_.param_array.size:
self._tie_ = np.ones((self._highest_parent_.param_array.size,),dtype=np.bool)
def _updateTieFlag(self):
if self._tie_.size != self.label_buf.size:
self._tie_ = np.ones((self._highest_parent_.param_array.size,),dtype=np.bool)
self._tie_[self.label_buf>0] = False
self._tie_[self.buf_idx] = True
def add_tied_parameter(self, p):
self.tied_parameters.append(p)
p.add_observer(self, self.callback)
self.parameters_changed()
def callback(self, param=None, which=None):
def add_tied_parameter(self, p, p2=None):
"""
This gets called whenever any of the tied parameters changes. we spend
considerable effort working out what has changed and to what value.
Then we store that value in self.value, and broadcast it everywhere
with parameters_changed.
Tie the list of parameters p together (p2==None) or
Tie the list of parameters p with the list of parameters p2 (p2!=None)
"""
if which is self:return
index = self._highest_parent_.constraints[self]
if len(index)==0:
return # nothing to tie together, this tie exists without any tied parameters
self.collate_gradient()
vals = self._highest_parent_.param_array[index]
uvals = np.unique(vals)
if len(uvals)==1:
#all of the tied things are at the same value
if np.all(self.value==uvals[0]):
return # DO NOT DO ANY CHANGES IF THE TIED PART IS NOT CHANGED!
self.value[...] = uvals[0]
elif len(uvals)==2:
#only *one* of the tied things has changed. it must be different to self.value
newval = uvals[uvals != self.value*1]
self.value[...] = newval
self._init_labelBuf()
if p2 is None:
idx = self._highest_parent_._raveled_index_for(p)
val = self._sync_val_group(idx)
if np.all(self.label_buf[idx]==0):
# None of p has been tied before.
tie_idx = self._expandTieParam(1)
print tie_idx
tie_id = self.label_buf.max()+1
self.label_buf[tie_idx] = tie_id
else:
b = self.label_buf[idx]
ids = np.unique(b[b>0])
tie_id, tie_idx = self._merge_tie_param(ids)
self._highest_parent_.param_array[tie_idx] = val
idx = self._highest_parent_._raveled_index_for(p)
self.label_buf[idx] = tie_id
else:
#more than one of the tied things changed. panic.
raise ValueError, "something is wrong with the tieing"
pass
self._updateTieFlag()
def _merge_tie_param(self, ids):
"""Merge the tie parameters with ids in the list."""
if len(ids)==1:
id_final_idx = self.buf_idx[self.label_buf[self.buf_idx]==ids[0]][0]
return ids[0],id_final_idx
id_final = ids[0]
ids_rm = ids[1:]
label_buf_param = self.label_buf[self.buf_idx]
idx_param = [np.where(label_buf_param==i)[0][0] for i in ids_rm]
self._removeTieParam(idx_param)
[np.put(self.label_buf, np.where(self.label_buf==i), id_final) for i in ids_rm]
id_final_idx = self.buf_idx[self.label_buf[self.buf_idx]==id_final][0]
return id_final, id_final_idx
def _sync_val_group(self, idx):
self._highest_parent_.param_array[idx] = self._highest_parent_.param_array[idx].mean()
return self._highest_parent_.param_array[idx][0]
def _expandTieParam(self, num):
"""Expand the tie param with the number of *num* parameters"""
if self.tied_param is None:
new_buf = np.empty((num,))
else:
new_buf = np.empty((self.tied_param.size+num,))
new_buf[:self.tied_param.size] = self.tied_param.param_array.copy()
self.remove_parameter(self.tied_param)
self.tied_param = Param('tied',new_buf)
self.add_parameter(self.tied_param)
buf_idx_new = self._highest_parent_._raveled_index_for(self.tied_param)
self._expand_label_buf(self.buf_idx, buf_idx_new)
self.buf_idx = buf_idx_new
return self.buf_idx[-num:]
def _removeTieParam(self, idx):
"""idx within tied_param"""
new_buf = np.empty((self.tied_param.size-len(idx),))
bool_list = np.ones((self.tied_param.size,),dtype=np.bool)
bool_list[idx] = False
new_buf[:] = self.tied_param.param_array[bool_list]
self.remove_parameter(self.tied_param)
self.tied_param = Param('tied',new_buf)
self.add_parameter(self.tied_param)
buf_idx_new = self._highest_parent_._raveled_index_for(self.tied_param)
self._shrink_label_buf(self.buf_idx, buf_idx_new, bool_list)
self.buf_idx = buf_idx_new
def _expand_label_buf(self, idx_old, idx_new):
"""Expand label buffer accordingly"""
if idx_old is None:
self.label_buf = np.zeros(self._highest_parent_.param_array.shape, dtype=np.int)
else:
bool_old = np.zeros((self.label_buf.size,),dtype=np.bool)
bool_old[idx_old] = True
bool_new = np.zeros((self._highest_parent_.param_array.size,),dtype=np.bool)
bool_new[idx_new] = True
label_buf_new = np.zeros(self._highest_parent_.param_array.shape, dtype=np.int)
label_buf_new[np.logical_not(bool_new)] = self.label_buf[np.logical_not(bool_old)]
label_buf_new[idx_new[:len(idx_old)]] = self.label_buf[idx_old]
self.label_buf = label_buf_new
def _shrink_label_buf(self, idx_old, idx_new, bool_list):
bool_old = np.zeros((self.label_buf.size,),dtype=np.bool)
bool_old[idx_old] = True
bool_new = np.zeros((self._highest_parent_.param_array.size,),dtype=np.bool)
bool_new[idx_new] = True
label_buf_new = np.empty(self._highest_parent_.param_array.shape, dtype=np.int)
label_buf_new[np.logical_not(bool_new)] = self.label_buf[np.logical_not(bool_old)]
label_buf_new[idx_new] = self.label_buf[idx_old[bool_list]]
self.label_buf = label_buf_new
def _check_change(self):
changed = False
if self.tied_param is not None:
for i in xrange(self.tied_param.size):
b0 = self.label_buf==self.label_buf[self.buf_idx[i]]
b = self._highest_parent_.param_array[b0]!=self.tied_param[i]
if b.sum()==0:
print 'XXX'
continue
elif b.sum()==1:
print '!!!'
val = self._highest_parent_.param_array[b0][b][0]
self._highest_parent_.param_array[b0] = val
else:
print '@@@'
self._highest_parent_.param_array[b0] = self.tied_param[i]
changed = True
return changed
def parameters_changed(self):
#ensure all out parameters have the correct value, as specified by our mapping
index = self._highest_parent_.constraints[self]
if np.all(self._highest_parent_.param_array[index]==self.value):
return # STOP TRIGGER THE UPDATE LOOP MULTIPLE TIMES!!!
self._highest_parent_.param_array[index] = self.mapping()
[p.notify_observers(which=self) for p in self.tied_parameters]
changed = self._check_change()
if changed:
self._highest_parent_._trigger_params_changed()
self.collate_gradient()
def mapping(self):
return self.value
def collate_gradient(self):
index = self._highest_parent_.constraints[self]
self.value.gradient = np.sum(self._highest_parent_.gradient[index])
if self.tied_param is not None:
self.tied_param.gradient = 0.
[np.put(self.tied_param.gradient, i, self._highest_parent_.gradient[self.label_buf==self.label_buf[self.buf_idx[i]]].sum())
for i in xrange(self.tied_param.size)]
def propagate_val(self):
if self.tied_param is not None:
for i in xrange(self.tied_param.size):
self._highest_parent_.param_array[self.label_buf==self.label_buf[self.buf_idx[i]]] = self.tied_param[i]

View file

@ -387,7 +387,7 @@ def silhouette(max_iters=100, optimize=True, plot=True):
print m
return m
def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True):
def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True, checkgrad=True):
"""Run a 1D example of a sparse GP regression."""
# sample inputs and outputs
X = np.random.uniform(-3., 3., (num_samples, 1))
@ -396,7 +396,9 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, opti
rbf = GPy.kern.RBF(1)
# create simple GP Model
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
m.checkgrad(verbose=1)
if checkgrad:
m.checkgrad(verbose=1)
if optimize:
m.optimize('tnc', messages=1, max_iters=max_iters)

View file

@ -68,26 +68,5 @@ try:
except:
pass
def jitchol(A, L, cublas_handle, maxtries=5):
try:
cublas.cublasDcopy(cublas_handle, A.size, A.gpudata, 1, L.gpudata, 1)
culinalg.cho_factor(L,'L')
except culaExceptions:
diagA = np.diag(A)
if np.any(diagA <= 0.):
raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
jitter = diagA.mean() * 1e-6
while maxtries > 0 and np.isfinite(jitter):
print 'Warning: adding jitter of {:.10e}'.format(jitter)
try:
return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
except:
jitter *= 10
finally:
maxtries -= 1
raise linalg.LinAlgError, "not positive definite, even with jitter."