diff --git a/GPy/core/model.py b/GPy/core/model.py index f70125fd..f4a79a28 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -7,12 +7,13 @@ from scipy import optimize import sys, pdb import multiprocessing as mp from GPy.util.misc import opt_wrapper -#import numdifftools as ndt +# import numdifftools as ndt from parameterised import parameterised, truncate_pad import priors from ..util.linalg import jitchol from ..inference import optimization from .. import likelihoods +import re class model(parameterised): def __init__(self): @@ -24,14 +25,14 @@ class model(parameterised): self.preferred_optimizer = 'tnc' def _get_params(self): raise NotImplementedError, "this needs to be implemented to use the model class" - def _set_params(self,x): + def _set_params(self, x): raise NotImplementedError, "this needs to be implemented to use the model class" def log_likelihood(self): raise NotImplementedError, "this needs to be implemented to use the model class" def _log_likelihood_gradients(self): raise NotImplementedError, "this needs to be implemented to use the model class" - def set_prior(self,which,what): + def set_prior(self, which, what): """ Sets priors on the model parameters. @@ -52,38 +53,44 @@ class model(parameterised): which = self.grep_param_names(which) - #check tied situation - tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie)==set(which))] + # check tied situation + tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie) == set(which))] if len(tie_partial_matches): raise ValueError, "cannot place prior across partial ties" - tie_matches = [tie for tie in self.tied_indices if set(which)==set(tie) ] - if len(tie_matches)>1: + tie_matches = [tie for tie in self.tied_indices if set(which) == set(tie) ] + if len(tie_matches) > 1: raise ValueError, "cannot place prior across multiple ties" - elif len(tie_matches)==1: - which = which[:1]# just place a prior object on the first parameter + elif len(tie_matches) == 1: + which = which[:1] # just place a prior object on the first parameter - #check constraints are okay + # check constraints are okay if isinstance(what, (priors.gamma, priors.log_Gaussian)): - assert not np.any(which[:,None]==self.constrained_negative_indices), "constraint and prior incompatible" - assert not np.any(which[:,None]==self.constrained_bounded_indices), "constraint and prior incompatible" + assert not np.any(which[:, None] == self.constrained_negative_indices), "constraint and prior incompatible" + assert not np.any(which[:, None] == self.constrained_bounded_indices), "constraint and prior incompatible" unconst = np.setdiff1d(which, self.constrained_positive_indices) if len(unconst): print "Warning: constraining parameters to be positive:" - print '\n'.join([n for i,n in enumerate(self._get_param_names()) if i in unconst]) + print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst]) print '\n' self.constrain_positive(unconst) - elif isinstance(what,priors.Gaussian): - assert not np.any(which[:,None]==self.all_constrained_indices()), "constraint and prior incompatible" + elif isinstance(what, priors.Gaussian): + assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible" else: raise ValueError, "prior not recognised" - #store the prior in a local list + # store the prior in a local list for w in which: self.priors[w] = what - def get(self,name, return_names=False): + def __getitem__(self, name): + return self.get(name) + + def __setitem(self, name, val): + return self.set(name, val) + + def get(self, name, return_names=False): """ Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. """ @@ -94,9 +101,9 @@ class model(parameterised): else: return self._get_params()[matches] else: - raise AttributeError, "no parameter matches %s"%name + raise AttributeError, "no parameter matches %s" % name - def set(self,name,val): + def set(self, name, val): """ Set model parameter(s) by name. The name is provided as a regular expression. All parameters matching that regular expression are set to ghe given value. """ @@ -106,30 +113,30 @@ class model(parameterised): x[matches] = val self._set_params(x) else: - raise AttributeError, "no parameter matches %s"%name + raise AttributeError, "no parameter matches %s" % name - def get_gradient(self,name, return_names=False): + def get_gradient(self, name, return_names=False): """ Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. """ matches = self.grep_param_names(name) if len(matches): if return_names: - return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist() + return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist() else: return self._log_likelihood_gradients()[matches] else: - raise AttributeError, "no parameter matches %s"%name + raise AttributeError, "no parameter matches %s" % name def log_prior(self): """evaluate the prior""" - return np.sum([p.lnpdf(x) for p, x in zip(self.priors,self._get_params()) if p is not None]) + return np.sum([p.lnpdf(x) for p, x in zip(self.priors, self._get_params()) if p is not None]) def _log_prior_gradients(self): """evaluate the gradients of the priors""" x = self._get_params() ret = np.zeros(x.size) - [np.put(ret,i,p.lnpdf_grad(xx)) for i,(p,xx) in enumerate(zip(self.priors,x)) if not p is None] + [np.put(ret, i, p.lnpdf_grad(xx)) for i, (p, xx) in enumerate(zip(self.priors, x)) if not p is None] return ret def _transform_gradients(self, g): @@ -138,13 +145,13 @@ class model(parameterised): """ x = self._get_params() - g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices] - g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices] - [np.put(g,i,g[i]*(x[i]-l)*(h-x[i])/(h-l)) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)] - [np.put(g,i,v) for i,v in [(t[0],np.sum(g[t])) for t in self.tied_indices]] + g[self.constrained_positive_indices] = g[self.constrained_positive_indices] * x[self.constrained_positive_indices] + g[self.constrained_negative_indices] = g[self.constrained_negative_indices] * x[self.constrained_negative_indices] + [np.put(g, i, g[i] * (x[i] - l) * (h - x[i]) / (h - l)) for i, l, h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)] + [np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]] if len(self.tied_indices) or len(self.constrained_fixed_indices): - to_remove = np.hstack((self.constrained_fixed_indices+[t[1:] for t in self.tied_indices])) - return np.delete(g,to_remove) + to_remove = np.hstack((self.constrained_fixed_indices + [t[1:] for t in self.tied_indices])) + return np.delete(g, to_remove) else: return g @@ -154,15 +161,15 @@ class model(parameterised): Randomize the model. Make this draw from the prior if one exists, else draw from N(0,1) """ - #first take care of all parameters (from N(0,1)) + # first take care of all parameters (from N(0,1)) x = self._get_params_transformed() x = np.random.randn(x.size) self._set_params_transformed(x) - #now draw from prior where possible + # now draw from prior where possible x = self._get_params() - [np.put(x,i,p.rvs(1)) for i,p in enumerate(self.priors) if not p is None] + [np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None] self._set_params(x) - self._set_params_transformed(self._get_params_transformed())#makes sure all of the tied parameters get the same init (since there's only one prior object...) + self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...) def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs): @@ -196,10 +203,10 @@ class model(parameterised): pool = mp.Pool(processes=num_processes) for i in range(Nrestarts): self.randomize() - job = pool.apply_async(opt_wrapper, args = (self,), kwds = kwargs) + job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs) jobs.append(job) - pool.close() # signal that no more data coming in + pool.close() # signal that no more data coming in pool.join() # wait for all the tasks to complete except KeyboardInterrupt: print "Ctrl+c received, terminating and joining pool." @@ -215,10 +222,10 @@ class model(parameterised): self.optimization_runs.append(jobs[i].get()) if verbose: - print("Optimization restart {0}/{1}, f = {2}".format(i+1, Nrestarts, self.optimization_runs[-1].f_opt)) + print("Optimization restart {0}/{1}, f = {2}".format(i + 1, Nrestarts, self.optimization_runs[-1].f_opt)) except Exception as e: if robust: - print("Warning - optimization restart {0}/{1} failed".format(i+1, Nrestarts)) + print("Warning - optimization restart {0}/{1} failed".format(i + 1, Nrestarts)) else: raise e @@ -228,22 +235,22 @@ class model(parameterised): else: self._set_params_transformed(initial_parameters) - def ensure_default_constraints(self,warn=False): + def ensure_default_constraints(self, warn=False): """ Ensure that any variables which should clearly be positive have been constrained somehow. """ - positive_strings = ['variance','lengthscale', 'precision'] + positive_strings = ['variance', 'lengthscale', 'precision'] param_names = self._get_param_names() currently_constrained = self.all_constrained_indices() to_make_positive = [] for s in positive_strings: for i in self.grep_param_names(s): if not (i in currently_constrained): - to_make_positive.append(param_names[i]) + to_make_positive.append(re.escape(param_names[i])) if warn: - print "Warning! constraining %s postive"%name + print "Warning! constraining %s postive" % name if len(to_make_positive): - self.constrain_positive('('+'|'.join(to_make_positive)+')') + self.constrain_positive('(' + '|'.join(to_make_positive) + ')') @@ -261,14 +268,14 @@ class model(parameterised): self._set_params_transformed(x) LL_gradients = self._transform_gradients(self._log_likelihood_gradients()) prior_gradients = self._transform_gradients(self._log_prior_gradients()) - return - LL_gradients - prior_gradients + return -LL_gradients - prior_gradients def objective_and_gradients(self, x): self._set_params_transformed(x) - obj_f = -self.log_likelihood() - self.log_prior() + obj_f = -self.log_likelihood() - self.log_prior() LL_gradients = self._transform_gradients(self._log_likelihood_gradients()) prior_gradients = self._transform_gradients(self._log_prior_gradients()) - obj_grads = - LL_gradients - prior_gradients + obj_grads = -LL_gradients - prior_gradients return obj_f, obj_grads def optimize(self, optimizer=None, start=None, **kwargs): @@ -288,13 +295,13 @@ class model(parameterised): start = self._get_params_transformed() optimizer = optimization.get_optimizer(optimizer) - opt = optimizer(start, model = self, **kwargs) + opt = optimizer(start, model=self, **kwargs) opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients) self.optimization_runs.append(opt) self._set_params_transformed(opt.x_opt) - def optimize_SGD(self, momentum = 0.1, learning_rate = 0.01, iterations = 20, **kwargs): + def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs): # assert self.Y.shape[1] > 1, "SGD only works with D > 1" sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs) sgd.run() @@ -302,8 +309,8 @@ class model(parameterised): def Laplace_covariance(self): """return the covariance matric of a Laplace approximatino at the current (stationary) point""" - #TODO add in the prior contributions for MAP estimation - #TODO fix the hessian for tied, constrained and fixed components + # TODO add in the prior contributions for MAP estimation + # TODO fix the hessian for tied, constrained and fixed components if hasattr(self, 'log_likelihood_hessian'): A = -self.log_likelihood_hessian() @@ -317,8 +324,8 @@ class model(parameterised): A = -h(x) self._set_params(x) # check for almost zero components on the diagonal which screw up the cholesky - aa = np.nonzero((np.diag(A)<1e-6) & (np.diag(A)>0.))[0] - A[aa,aa] = 0. + aa = np.nonzero((np.diag(A) < 1e-6) & (np.diag(A) > 0.))[0] + A[aa, aa] = 0. return A def Laplace_evidence(self): @@ -329,11 +336,11 @@ class model(parameterised): hld = np.sum(np.log(np.diag(jitchol(A)[0]))) except: return np.nan - return 0.5*self._get_params().size*np.log(2*np.pi) + self.log_likelihood() - hld + return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld def __str__(self): s = parameterised.__str__(self).split('\n') - #add priors to the string + # add priors to the string strs = [str(p) if p is not None else '' for p in self.priors] width = np.array(max([len(p) for p in strs] + [5])) + 4 @@ -344,16 +351,16 @@ class model(parameterised): obj_funct += ', Log prior: {0:.3e}, LL+prior = {0:.3e}'.format(log_prior, log_like + log_prior) obj_funct += '\n\n' s[0] = obj_funct + s[0] - s[0] += "|{h:^{col}}".format(h = 'Prior', col = width) - s[1] += '-'*(width + 1) + s[0] += "|{h:^{col}}".format(h='Prior', col=width) + s[1] += '-' * (width + 1) - for p in range(2, len(strs)+2): - s[p] += '|{prior:^{width}}'.format(prior = strs[p-2], width = width) + for p in range(2, len(strs) + 2): + s[p] += '|{prior:^{width}}'.format(prior=strs[p - 2], width=width) return '\n'.join(s) - def checkgrad(self, target_param = None, verbose=False, step=1e-6, tolerance = 1e-3): + def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3): """ Check the gradient of the model by comparing to a numerical estimate. If the verbose flag is passed, invividual components are tested (and printed) @@ -373,27 +380,27 @@ class model(parameterised): x = self._get_params_transformed().copy() if not verbose: - #just check the global ratio - dx = step*np.sign(np.random.uniform(-1,1,x.size)) + # just check the global ratio + dx = step * np.sign(np.random.uniform(-1, 1, x.size)) - #evaulate around the point x - f1, g1 = self.objective_and_gradients(x+dx) - f2, g2 = self.objective_and_gradients(x-dx) + # evaulate around the point x + f1, g1 = self.objective_and_gradients(x + dx) + f2, g2 = self.objective_and_gradients(x - dx) gradient = self.objective_function_gradients(x) - numerical_gradient = (f1-f2)/(2*dx) - global_ratio = (f1-f2)/(2*np.dot(dx,gradient)) + numerical_gradient = (f1 - f2) / (2 * dx) + global_ratio = (f1 - f2) / (2 * np.dot(dx, gradient)) - if (np.abs(1.-global_ratio)>> index = np.asarray([0,0,0,1,1,1,2,2,2]) + returns + >>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]] + + or, a more complicated example + >>> index = np.asarray([0,0,1,1,0,2,2,2,1,1]) + returns + >>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]] + """ + + #contruct the return structure + ind = np.asarray(index,dtype=np.int64) + ret = [[] for i in range(ind.max()+1)] + + #find the switchpoints + ind_ = np.hstack((ind,ind[0]+ind[-1]+1)) + switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0] + + [ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))] + return ret + +class independent_outputs(kernpart): + """ + A kernel part shich can reopresent several independent functions. + this kernel 'switches off' parts of the matrix where the output indexes are different. + + The index of the functions is given by the last column in the input X + the rest of the columns of X are passed to the kernel for computation (in blocks). + + """ + def __init__(self,k): + self.D = k.D + 1 + self.Nparam = k.Nparam + self.name = 'iops('+ k.name + ')' + self.k = k + + def _get_params(self): + return self.k._get_params() + + def _set_params(self,x): + self.k._set_params(x) + self.params = x + + def _get_param_names(self): + return self.k._get_param_names() + + def K(self,X,X2,target): + #Sort out the slices from the input data + X,slices = X[:,:-1],index_to_slices(X[:,-1]) + if X2 is None: + X2,slices2 = X,slices + else: + X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) + + [[[self.k.K(X[s],X2[s2],target[s,s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] + + def Kdiag(self,X,target): + X,slices = X[:,:-1],index_to_slices(X[:,-1]) + [[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices] + + def dK_dtheta(self,dL_dK,X,X2,target): + X,slices = X[:,:-1],index_to_slices(X[:,-1]) + if X2 is None: + X2,slices2 = X,slices + else: + X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) + [[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] + + + def dK_dX(self,dL_dK,X,X2,target): + X,slices = X[:,:-1],index_to_slices(X[:,-1]) + if X2 is None: + X2,slices2 = X,slices + else: + X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) + [[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] + + def dKdiag_dX(self,dL_dKdiag,X,target): + X,slices = X[:,:-1],index_to_slices(X[:,-1]) + [[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target[s,:-1]) for s in slices_i] for slices_i in slices] + + + def dKdiag_dtheta(self,dL_dKdiag,X,target): + X,slices = X[:,:-1],index_to_slices(X[:,-1]) + [[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target) for s in slices_i] for slices_i in slices]