diff --git a/GPy/core/model.py b/GPy/core/model.py index 7feb72b2..00c6d5ff 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -21,6 +21,10 @@ class Model(Parameterized): self.optimization_runs = [] self.sampling_runs = [] self.preferred_optimizer = 'bfgs' + from .parameterization.ties_and_remappings import Tie + self.tie = Tie() + self.add_parameter(self.tie, -1) + self.add_observer(self.tie, self.tie._parameters_changed_notification, priority=-500) def log_likelihood(self): raise NotImplementedError, "this needs to be implemented to use the model class" diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 2adc318e..815f069b 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -511,6 +511,22 @@ class Indexable(Nameable, Observable): [np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()] return ret return 0. + + #=========================================================================== + # Tie parameters together + #=========================================================================== + + def _has_ties(self): + if self._highest_parent_.tie.tied_param is None: + return False + if self.has_parent(): + return self._highest_parent_.tie.label_buf[self._highest_parent_._raveled_index_for(self)].sum()>0 + return True + + def tie_together(self): + self._highest_parent_.tie.add_tied_parameter(self) + self._highest_parent_._set_fixed(self,self._raveled_index()) + self._trigger_params_changed() #=========================================================================== # Constrain operations -> done @@ -653,7 +669,7 @@ class OptimizationHandlable(Indexable): will be set accordingly. It has to be set with an array, retrieved from this method, as e.g. fixing will resize the array. - The optimizer should only interfere with this array, such that transofrmations + The optimizer should only interfere with this array, such that transformations are secured. """ if self.__dict__.get('_optimizer_copy_', None) is None or self.size != self._optimizer_copy_.size: @@ -662,12 +678,13 @@ class OptimizationHandlable(Indexable): if not self._optimizer_copy_transformed: self._optimizer_copy_.flat = self.param_array.flat [np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] - if self.has_parent() and self.constraints[__fixed__].size != 0: + if self.has_parent() and (self.constraints[__fixed__].size != 0 or self._has_ties()): fixes = np.ones(self.size).astype(bool) fixes[self.constraints[__fixed__]] = FIXED - return self._optimizer_copy_[fixes] + return self._optimizer_copy_[np.logical_and(fixes, self._highest_parent_.tie.getTieFlag(self))] elif self._has_fixes(): - return self._optimizer_copy_[self._fixes_] + return self._optimizer_copy_[self._fixes_] + self._optimizer_copy_transformed = True return self._optimizer_copy_ @@ -694,6 +711,7 @@ class OptimizationHandlable(Indexable): self.param_array.flat[f] = p [np.put(self.param_array, ind[f[ind]], c.f(self.param_array.flat[ind[f[ind]]])) for c, ind in self.constraints.iteritems() if c != __fixed__] + self._highest_parent_.tie.propagate_val() self._optimizer_copy_transformed = False self._trigger_params_changed() @@ -726,6 +744,7 @@ class OptimizationHandlable(Indexable): Transform the gradients by multiplying the gradient factor for each constraint to it. """ + self._highest_parent_.tie.collate_gradient() [np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__] if self._has_fixes(): return g[self._fixes_] return g diff --git a/GPy/core/parameterization/ties_and_remappings.py b/GPy/core/parameterization/ties_and_remappings.py index da46acaa..a81b8d61 100644 --- a/GPy/core/parameterization/ties_and_remappings.py +++ b/GPy/core/parameterization/ties_and_remappings.py @@ -31,59 +31,193 @@ class Fix(Remapping): -class Tie(Remapping): - def __init__(self, value, name): +class Tie(Parameterized): + """ + The new parameter tie framework. (under development) + + All the parameters tied together get a new parameter inside the *Tie* object. + Its value should always be equal to all the tied parameters, and its gradient + is the sum of all the tied parameters. + + =====Implementation Details===== + The *Tie* object should only exist on the top of param tree (the highest parent). + + self.label_buf: + It uses a label buffer that has the same length as all the parameters (self._highest_parent_.param_array). + The buffer keeps track of all the tied parameters. All the tied parameters have a label (an interger) higher + than 0, and the parameters that have the same label are tied together. + + self.buf_index: + An auxiliary index list for the global index of the tie parameter inside the *Tie* object. + + ================================ + + TODO: + * EVERYTHING + + """ + def __init__(self, name='tie'): super(Tie, self).__init__(name) - self.tied_parameters = [] - self.value = Param('val', value) - self.add_parameter(self.value) + self.tied_param = None + # The buffer keeps track of tie status + self.label_buf = None + # The global indices of the 'tied' param + self.buf_idx = None + # A boolean array indicating non-tied parameters + self._tie_ = None + + def getTieFlag(self, p=None): + if self.tied_param is None: + if self._tie_ is None or self._tie_.size != self._highest_parent_.param_array.size: + self._tie_ = np.ones((self._highest_parent_.param_array.size,),dtype=np.bool) + if p is not None: + return self._tie_[p._highest_parent_._raveled_index_for(p)] + return self._tie_ + + def _init_labelBuf(self): + if self.label_buf is None: + self.label_buf = np.zeros(self._highest_parent_.param_array.shape, dtype=np.int) + if self._tie_ is None or self._tie_.size != self._highest_parent_.param_array.size: + self._tie_ = np.ones((self._highest_parent_.param_array.size,),dtype=np.bool) + + def _updateTieFlag(self): + if self._tie_.size != self.label_buf.size: + self._tie_ = np.ones((self._highest_parent_.param_array.size,),dtype=np.bool) + self._tie_[self.label_buf>0] = False + self._tie_[self.buf_idx] = True - def add_tied_parameter(self, p): - self.tied_parameters.append(p) - p.add_observer(self, self.callback) - self.parameters_changed() - - def callback(self, param=None, which=None): + def add_tied_parameter(self, p, p2=None): """ - This gets called whenever any of the tied parameters changes. we spend - considerable effort working out what has changed and to what value. - Then we store that value in self.value, and broadcast it everywhere - with parameters_changed. + Tie the list of parameters p together (p2==None) or + Tie the list of parameters p with the list of parameters p2 (p2!=None) """ - if which is self:return - index = self._highest_parent_.constraints[self] - if len(index)==0: - return # nothing to tie together, this tie exists without any tied parameters - self.collate_gradient() - vals = self._highest_parent_.param_array[index] - uvals = np.unique(vals) - if len(uvals)==1: - #all of the tied things are at the same value - if np.all(self.value==uvals[0]): - return # DO NOT DO ANY CHANGES IF THE TIED PART IS NOT CHANGED! - self.value[...] = uvals[0] - elif len(uvals)==2: - #only *one* of the tied things has changed. it must be different to self.value - newval = uvals[uvals != self.value*1] - self.value[...] = newval + self._init_labelBuf() + if p2 is None: + idx = self._highest_parent_._raveled_index_for(p) + val = self._sync_val_group(idx) + if np.all(self.label_buf[idx]==0): + # None of p has been tied before. + tie_idx = self._expandTieParam(1) + print tie_idx + tie_id = self.label_buf.max()+1 + self.label_buf[tie_idx] = tie_id + else: + b = self.label_buf[idx] + ids = np.unique(b[b>0]) + tie_id, tie_idx = self._merge_tie_param(ids) + self._highest_parent_.param_array[tie_idx] = val + idx = self._highest_parent_._raveled_index_for(p) + self.label_buf[idx] = tie_id else: - #more than one of the tied things changed. panic. - raise ValueError, "something is wrong with the tieing" + pass + self._updateTieFlag() + + def _merge_tie_param(self, ids): + """Merge the tie parameters with ids in the list.""" + if len(ids)==1: + id_final_idx = self.buf_idx[self.label_buf[self.buf_idx]==ids[0]][0] + return ids[0],id_final_idx + id_final = ids[0] + ids_rm = ids[1:] + label_buf_param = self.label_buf[self.buf_idx] + idx_param = [np.where(label_buf_param==i)[0][0] for i in ids_rm] + self._removeTieParam(idx_param) + [np.put(self.label_buf, np.where(self.label_buf==i), id_final) for i in ids_rm] + id_final_idx = self.buf_idx[self.label_buf[self.buf_idx]==id_final][0] + return id_final, id_final_idx + + def _sync_val_group(self, idx): + self._highest_parent_.param_array[idx] = self._highest_parent_.param_array[idx].mean() + return self._highest_parent_.param_array[idx][0] + + def _expandTieParam(self, num): + """Expand the tie param with the number of *num* parameters""" + if self.tied_param is None: + new_buf = np.empty((num,)) + else: + new_buf = np.empty((self.tied_param.size+num,)) + new_buf[:self.tied_param.size] = self.tied_param.param_array.copy() + self.remove_parameter(self.tied_param) + self.tied_param = Param('tied',new_buf) + self.add_parameter(self.tied_param) + buf_idx_new = self._highest_parent_._raveled_index_for(self.tied_param) + self._expand_label_buf(self.buf_idx, buf_idx_new) + self.buf_idx = buf_idx_new + return self.buf_idx[-num:] + + def _removeTieParam(self, idx): + """idx within tied_param""" + new_buf = np.empty((self.tied_param.size-len(idx),)) + bool_list = np.ones((self.tied_param.size,),dtype=np.bool) + bool_list[idx] = False + new_buf[:] = self.tied_param.param_array[bool_list] + self.remove_parameter(self.tied_param) + self.tied_param = Param('tied',new_buf) + self.add_parameter(self.tied_param) + buf_idx_new = self._highest_parent_._raveled_index_for(self.tied_param) + self._shrink_label_buf(self.buf_idx, buf_idx_new, bool_list) + self.buf_idx = buf_idx_new + + def _expand_label_buf(self, idx_old, idx_new): + """Expand label buffer accordingly""" + if idx_old is None: + self.label_buf = np.zeros(self._highest_parent_.param_array.shape, dtype=np.int) + else: + bool_old = np.zeros((self.label_buf.size,),dtype=np.bool) + bool_old[idx_old] = True + bool_new = np.zeros((self._highest_parent_.param_array.size,),dtype=np.bool) + bool_new[idx_new] = True + label_buf_new = np.zeros(self._highest_parent_.param_array.shape, dtype=np.int) + label_buf_new[np.logical_not(bool_new)] = self.label_buf[np.logical_not(bool_old)] + label_buf_new[idx_new[:len(idx_old)]] = self.label_buf[idx_old] + self.label_buf = label_buf_new + + def _shrink_label_buf(self, idx_old, idx_new, bool_list): + bool_old = np.zeros((self.label_buf.size,),dtype=np.bool) + bool_old[idx_old] = True + bool_new = np.zeros((self._highest_parent_.param_array.size,),dtype=np.bool) + bool_new[idx_new] = True + label_buf_new = np.empty(self._highest_parent_.param_array.shape, dtype=np.int) + label_buf_new[np.logical_not(bool_new)] = self.label_buf[np.logical_not(bool_old)] + label_buf_new[idx_new] = self.label_buf[idx_old[bool_list]] + self.label_buf = label_buf_new + + def _check_change(self): + changed = False + if self.tied_param is not None: + for i in xrange(self.tied_param.size): + b0 = self.label_buf==self.label_buf[self.buf_idx[i]] + b = self._highest_parent_.param_array[b0]!=self.tied_param[i] + if b.sum()==0: + print 'XXX' + continue + elif b.sum()==1: + print '!!!' + val = self._highest_parent_.param_array[b0][b][0] + self._highest_parent_.param_array[b0] = val + else: + print '@@@' + self._highest_parent_.param_array[b0] = self.tied_param[i] + changed = True + return changed + def parameters_changed(self): #ensure all out parameters have the correct value, as specified by our mapping - index = self._highest_parent_.constraints[self] - if np.all(self._highest_parent_.param_array[index]==self.value): - return # STOP TRIGGER THE UPDATE LOOP MULTIPLE TIMES!!! - self._highest_parent_.param_array[index] = self.mapping() - [p.notify_observers(which=self) for p in self.tied_parameters] + changed = self._check_change() + if changed: + self._highest_parent_._trigger_params_changed() self.collate_gradient() - def mapping(self): - return self.value - def collate_gradient(self): - index = self._highest_parent_.constraints[self] - self.value.gradient = np.sum(self._highest_parent_.gradient[index]) + if self.tied_param is not None: + self.tied_param.gradient = 0. + [np.put(self.tied_param.gradient, i, self._highest_parent_.gradient[self.label_buf==self.label_buf[self.buf_idx[i]]].sum()) + for i in xrange(self.tied_param.size)] + + def propagate_val(self): + if self.tied_param is not None: + for i in xrange(self.tied_param.size): + self._highest_parent_.param_array[self.label_buf==self.label_buf[self.buf_idx[i]]] = self.tied_param[i] diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index 2a4b91b3..c4465061 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -387,7 +387,7 @@ def silhouette(max_iters=100, optimize=True, plot=True): print m return m -def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True): +def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True, checkgrad=True): """Run a 1D example of a sparse GP regression.""" # sample inputs and outputs X = np.random.uniform(-3., 3., (num_samples, 1)) @@ -396,7 +396,9 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, opti rbf = GPy.kern.RBF(1) # create simple GP Model m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing) - m.checkgrad(verbose=1) + + if checkgrad: + m.checkgrad(verbose=1) if optimize: m.optimize('tnc', messages=1, max_iters=max_iters) diff --git a/GPy/util/linalg_gpu.py b/GPy/util/linalg_gpu.py index d969d14f..cba09dd3 100644 --- a/GPy/util/linalg_gpu.py +++ b/GPy/util/linalg_gpu.py @@ -68,26 +68,5 @@ try: except: pass -def jitchol(A, L, cublas_handle, maxtries=5): - try: - cublas.cublasDcopy(cublas_handle, A.size, A.gpudata, 1, L.gpudata, 1) - culinalg.cho_factor(L,'L') - except culaExceptions: - - - diagA = np.diag(A) - if np.any(diagA <= 0.): - raise linalg.LinAlgError, "not pd: non-positive diagonal elements" - jitter = diagA.mean() * 1e-6 - while maxtries > 0 and np.isfinite(jitter): - print 'Warning: adding jitter of {:.10e}'.format(jitter) - try: - return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True) - except: - jitter *= 10 - finally: - maxtries -= 1 - raise linalg.LinAlgError, "not positive definite, even with jitter." -