some progress for parameter tie

2026-05-07 19:12:40 +02:00 · 2014-08-29 18:52:53 +01:00 · 2014-08-29 18:52:53 +01:00 · 1110cc31e6
commit 1110cc31e6
parent 140354c02d
5 changed files with 208 additions and 70 deletions
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -21,6 +21,10 @@ class Model(Parameterized):
        self.optimization_runs = []
        self.sampling_runs = []
        self.preferred_optimizer = 'bfgs'
        from .parameterization.ties_and_remappings import Tie
        self.tie = Tie()
        self.add_parameter(self.tie, -1)
        self.add_observer(self.tie, self.tie._parameters_changed_notification, priority=-500)
    def log_likelihood(self):
        raise NotImplementedError, "this needs to be implemented to use the model class"
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@ -511,6 +511,22 @@ class Indexable(Nameable, Observable):
            [np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
            return ret
        return 0.
    #===========================================================================
    # Tie parameters together
    #===========================================================================
    def _has_ties(self):
        if self._highest_parent_.tie.tied_param is None:
            return False
        if self.has_parent():
            return self._highest_parent_.tie.label_buf[self._highest_parent_._raveled_index_for(self)].sum()>0
        return True
    def tie_together(self):
        self._highest_parent_.tie.add_tied_parameter(self)
        self._highest_parent_._set_fixed(self,self._raveled_index())
        self._trigger_params_changed()
    #===========================================================================
    # Constrain operations -> done
@ -653,7 +669,7 @@ class OptimizationHandlable(Indexable):
        will be set accordingly. It has to be set with an array, retrieved from
        this method, as e.g. fixing will resize the array.
-        The optimizer should only interfere with this array, such that transofrmations
+        The optimizer should only interfere with this array, such that transformations
        are secured.
        """
        if self.__dict__.get('_optimizer_copy_', None) is None or self.size != self._optimizer_copy_.size:
@ -662,12 +678,13 @@ class OptimizationHandlable(Indexable):
        if not self._optimizer_copy_transformed:
            self._optimizer_copy_.flat = self.param_array.flat
            [np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
-            if self.has_parent() and self.constraints[__fixed__].size != 0:
+            if self.has_parent() and (self.constraints[__fixed__].size != 0 or self._has_ties()):
                fixes = np.ones(self.size).astype(bool)
                fixes[self.constraints[__fixed__]] = FIXED
-                return self._optimizer_copy_[fixes]
+                return self._optimizer_copy_[np.logical_and(fixes, self._highest_parent_.tie.getTieFlag(self))]
            elif self._has_fixes():
-                return self._optimizer_copy_[self._fixes_]
+                    return self._optimizer_copy_[self._fixes_]
            self._optimizer_copy_transformed = True
        return self._optimizer_copy_
@ -694,6 +711,7 @@ class OptimizationHandlable(Indexable):
            self.param_array.flat[f] = p
            [np.put(self.param_array, ind[f[ind]], c.f(self.param_array.flat[ind[f[ind]]]))
             for c, ind in self.constraints.iteritems() if c != __fixed__]
        self._highest_parent_.tie.propagate_val()
        self._optimizer_copy_transformed = False
        self._trigger_params_changed()
@ -726,6 +744,7 @@ class OptimizationHandlable(Indexable):
        Transform the gradients by multiplying the gradient factor for each
        constraint to it.
        """
        self._highest_parent_.tie.collate_gradient()
        [np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
        if self._has_fixes(): return g[self._fixes_]
        return g
--- a/GPy/core/parameterization/ties_and_remappings.py
+++ b/GPy/core/parameterization/ties_and_remappings.py
@ -31,59 +31,193 @@ class Fix(Remapping):
-class Tie(Remapping):
+class Tie(Parameterized):
-    def __init__(self, value, name):
+    """
    The new parameter tie framework. (under development)
    All the parameters tied together get a new parameter inside the *Tie* object. 
    Its value should always be equal to all the tied parameters, and its gradient
    is the sum of all the tied parameters.
    =====Implementation Details=====
    The *Tie* object should only exist on the top of param tree (the highest parent).
    self.label_buf:
    It uses a label buffer that has the same length as all the parameters (self._highest_parent_.param_array).
    The buffer keeps track of all the tied parameters. All the tied parameters have a label (an interger) higher 
    than 0, and the parameters that have the same label are tied together.
    self.buf_index:
    An auxiliary index list for the global index of the tie parameter inside the *Tie* object.
    ================================
    TODO:
    * EVERYTHING
    """
    def __init__(self, name='tie'):
        super(Tie, self).__init__(name)
-        self.tied_parameters = []
+        self.tied_param = None
-        self.value = Param('val', value)
+        # The buffer keeps track of tie status
-        self.add_parameter(self.value)
+        self.label_buf = None
        # The global indices of the 'tied' param
        self.buf_idx = None
        # A boolean array indicating non-tied parameters
        self._tie_ = None
    def getTieFlag(self, p=None):
        if self.tied_param is None:
            if self._tie_ is None or self._tie_.size != self._highest_parent_.param_array.size:
                self._tie_ = np.ones((self._highest_parent_.param_array.size,),dtype=np.bool)
        if p is not None:
            return self._tie_[p._highest_parent_._raveled_index_for(p)]
        return self._tie_
    def _init_labelBuf(self):
        if self.label_buf is None:
            self.label_buf = np.zeros(self._highest_parent_.param_array.shape, dtype=np.int)
        if self._tie_ is None or self._tie_.size != self._highest_parent_.param_array.size:
            self._tie_ = np.ones((self._highest_parent_.param_array.size,),dtype=np.bool)
    def _updateTieFlag(self):
        if self._tie_.size != self.label_buf.size:
            self._tie_ = np.ones((self._highest_parent_.param_array.size,),dtype=np.bool)
        self._tie_[self.label_buf>0] = False
        self._tie_[self.buf_idx] = True
-    def add_tied_parameter(self, p):
+    def add_tied_parameter(self, p, p2=None):
        self.tied_parameters.append(p)
        p.add_observer(self, self.callback)
        self.parameters_changed()
    def callback(self, param=None, which=None):
        """
-        This gets called whenever any of the tied parameters changes. we spend
+        Tie the list of parameters p together (p2==None) or 
-        considerable effort working out what has changed and to what value.
+        Tie the list of parameters p with the list of parameters p2 (p2!=None) 
        Then we store that value in self.value, and broadcast it everywhere
        with parameters_changed.
        """
-        if which is self:return
+        self._init_labelBuf()
-        index = self._highest_parent_.constraints[self]
+        if p2 is None:
-        if len(index)==0:
+            idx = self._highest_parent_._raveled_index_for(p)
-            return # nothing to tie together, this tie exists without any tied parameters
+            val = self._sync_val_group(idx)            
-        self.collate_gradient()
+            if np.all(self.label_buf[idx]==0):
-        vals = self._highest_parent_.param_array[index]
+                # None of p has been tied before.
-        uvals = np.unique(vals)
+                tie_idx = self._expandTieParam(1)
-        if len(uvals)==1:
+                print tie_idx
-            #all of the tied things are at the same value
+                tie_id = self.label_buf.max()+1
-            if np.all(self.value==uvals[0]):
+                self.label_buf[tie_idx] = tie_id
-                return # DO NOT DO ANY CHANGES IF THE TIED PART IS NOT CHANGED!
+            else:
-            self.value[...] = uvals[0]
+                b = self.label_buf[idx]
-        elif len(uvals)==2:
+                ids = np.unique(b[b>0])
-            #only *one* of the tied things has changed. it must be different to self.value
+                tie_id, tie_idx = self._merge_tie_param(ids)
-            newval = uvals[uvals != self.value*1]
+            self._highest_parent_.param_array[tie_idx] = val
-            self.value[...] = newval
+            idx = self._highest_parent_._raveled_index_for(p)
            self.label_buf[idx] = tie_id
        else:
-            #more than one of the tied things changed. panic.
+            pass
-            raise ValueError, "something is wrong with the tieing"
+        self._updateTieFlag()
    def _merge_tie_param(self, ids):
        """Merge the tie parameters with ids in the list."""
        if len(ids)==1:
            id_final_idx = self.buf_idx[self.label_buf[self.buf_idx]==ids[0]][0]
            return ids[0],id_final_idx
        id_final = ids[0]
        ids_rm = ids[1:]
        label_buf_param = self.label_buf[self.buf_idx]
        idx_param = [np.where(label_buf_param==i)[0][0] for i in ids_rm]
        self._removeTieParam(idx_param)
        [np.put(self.label_buf, np.where(self.label_buf==i), id_final) for i in ids_rm]
        id_final_idx = self.buf_idx[self.label_buf[self.buf_idx]==id_final][0]
        return id_final, id_final_idx
    def _sync_val_group(self, idx):
        self._highest_parent_.param_array[idx] = self._highest_parent_.param_array[idx].mean()
        return self._highest_parent_.param_array[idx][0]
    def _expandTieParam(self, num):
        """Expand the tie param with the number of *num* parameters"""
        if self.tied_param is None:
            new_buf = np.empty((num,))
        else:
            new_buf = np.empty((self.tied_param.size+num,))
            new_buf[:self.tied_param.size] = self.tied_param.param_array.copy()
            self.remove_parameter(self.tied_param)
        self.tied_param = Param('tied',new_buf)
        self.add_parameter(self.tied_param)
        buf_idx_new = self._highest_parent_._raveled_index_for(self.tied_param)
        self._expand_label_buf(self.buf_idx, buf_idx_new)
        self.buf_idx = buf_idx_new
        return self.buf_idx[-num:]
    def _removeTieParam(self, idx):
        """idx within tied_param"""
        new_buf = np.empty((self.tied_param.size-len(idx),))
        bool_list = np.ones((self.tied_param.size,),dtype=np.bool)
        bool_list[idx] = False
        new_buf[:] = self.tied_param.param_array[bool_list]
        self.remove_parameter(self.tied_param)
        self.tied_param = Param('tied',new_buf)
        self.add_parameter(self.tied_param)
        buf_idx_new = self._highest_parent_._raveled_index_for(self.tied_param)
        self._shrink_label_buf(self.buf_idx, buf_idx_new, bool_list)
        self.buf_idx = buf_idx_new
    def _expand_label_buf(self, idx_old, idx_new):
        """Expand label buffer accordingly"""
        if idx_old is None:
            self.label_buf = np.zeros(self._highest_parent_.param_array.shape, dtype=np.int)
        else:
            bool_old = np.zeros((self.label_buf.size,),dtype=np.bool)
            bool_old[idx_old] = True
            bool_new = np.zeros((self._highest_parent_.param_array.size,),dtype=np.bool)
            bool_new[idx_new] = True
            label_buf_new = np.zeros(self._highest_parent_.param_array.shape, dtype=np.int)
            label_buf_new[np.logical_not(bool_new)] = self.label_buf[np.logical_not(bool_old)]
            label_buf_new[idx_new[:len(idx_old)]] = self.label_buf[idx_old]
            self.label_buf = label_buf_new
    def _shrink_label_buf(self, idx_old, idx_new, bool_list):
        bool_old = np.zeros((self.label_buf.size,),dtype=np.bool)
        bool_old[idx_old] = True
        bool_new = np.zeros((self._highest_parent_.param_array.size,),dtype=np.bool)
        bool_new[idx_new] = True
        label_buf_new = np.empty(self._highest_parent_.param_array.shape, dtype=np.int)
        label_buf_new[np.logical_not(bool_new)] = self.label_buf[np.logical_not(bool_old)]
        label_buf_new[idx_new] = self.label_buf[idx_old[bool_list]]
        self.label_buf = label_buf_new
    def _check_change(self):
        changed = False
        if self.tied_param is not None:
            for i in xrange(self.tied_param.size):
                b0 = self.label_buf==self.label_buf[self.buf_idx[i]]
                b = self._highest_parent_.param_array[b0]!=self.tied_param[i]
                if b.sum()==0:
                    print 'XXX'
                    continue
                elif b.sum()==1:
                    print '!!!'
                    val = self._highest_parent_.param_array[b0][b][0]
                    self._highest_parent_.param_array[b0] = val
                else:
                    print '@@@'
                    self._highest_parent_.param_array[b0] = self.tied_param[i]
                changed = True
        return changed
    def parameters_changed(self):
        #ensure all out parameters have the correct value, as specified by our mapping
-        index = self._highest_parent_.constraints[self]
+        changed = self._check_change()
-        if np.all(self._highest_parent_.param_array[index]==self.value):
+        if changed:
-            return # STOP TRIGGER THE UPDATE LOOP MULTIPLE TIMES!!!
+            self._highest_parent_._trigger_params_changed()
        self._highest_parent_.param_array[index] = self.mapping()
        [p.notify_observers(which=self) for p in self.tied_parameters]
        self.collate_gradient()
    def mapping(self):
        return self.value
    def collate_gradient(self):
-        index = self._highest_parent_.constraints[self]
+        if self.tied_param is not None:
-        self.value.gradient = np.sum(self._highest_parent_.gradient[index])
+            self.tied_param.gradient = 0.
            [np.put(self.tied_param.gradient, i, self._highest_parent_.gradient[self.label_buf==self.label_buf[self.buf_idx[i]]].sum()) 
                for i in xrange(self.tied_param.size)]
    def propagate_val(self):
        if self.tied_param is not None:
            for i in xrange(self.tied_param.size):
                self._highest_parent_.param_array[self.label_buf==self.label_buf[self.buf_idx[i]]] = self.tied_param[i]
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@ -387,7 +387,7 @@ def silhouette(max_iters=100, optimize=True, plot=True):
    print m
    return m
-def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True):
+def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True, checkgrad=True):
    """Run a 1D example of a sparse GP regression."""
    # sample inputs and outputs
    X = np.random.uniform(-3., 3., (num_samples, 1))
@ -396,7 +396,9 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, opti
    rbf = GPy.kern.RBF(1)
    # create simple GP Model
    m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
-    m.checkgrad(verbose=1)
+
    if checkgrad:
        m.checkgrad(verbose=1)
    if optimize:
        m.optimize('tnc', messages=1, max_iters=max_iters)
--- a/GPy/util/linalg_gpu.py
+++ b/GPy/util/linalg_gpu.py
@ -68,26 +68,5 @@ try:
 except:
    pass
 def jitchol(A, L, cublas_handle, maxtries=5):
    try:
        cublas.cublasDcopy(cublas_handle, A.size, A.gpudata, 1, L.gpudata, 1)
        culinalg.cho_factor(L,'L')
    except culaExceptions:
        diagA = np.diag(A)
        if np.any(diagA <= 0.):
            raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
        jitter = diagA.mean() * 1e-6
        while maxtries > 0 and np.isfinite(jitter):
            print 'Warning: adding jitter of {:.10e}'.format(jitter)
            try:
                return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
            except:
                jitter *= 10
            finally:
                maxtries -= 1
        raise linalg.LinAlgError, "not positive definite, even with jitter."