diff --git a/GPy/core/model.py b/GPy/core/model.py index eb60d465..28bb4ff5 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -233,13 +233,18 @@ class model(parameterised): Ensure that any variables which should clearly be positive have been constrained somehow. """ positive_strings = ['variance','lengthscale', 'precision'] + param_names = self._get_param_names() + currently_constrained = self.all_constrained_indices() + to_make_positive = [] for s in positive_strings: for i in self.grep_param_names(s): - if not (i in self.all_constrained_indices()): - name = self._get_param_names()[i] - self.constrain_positive(name) + if not (i in currently_constrained): + to_make_positive.append(param_names[i]) if warn: print "Warning! constraining %s postive"%name + if len(to_make_positive): + self.constrain_positive('('+'|'.join(to_make_positive)+')') + def objective_function(self, x): diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index bfdc9bb6..c4e064a5 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -61,7 +61,7 @@ def GPLVM_oil_100(optimize=True, M=15): m.plot_latent(labels=m.data_labels) return m -def BGPLVM_oil(optimize=True, N=100, Q=10, M=15): +def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300): data = GPy.util.datasets.oil() # create simple GP model @@ -73,10 +73,10 @@ def BGPLVM_oil(optimize=True, N=100, Q=10, M=15): if optimize: m.constrain_fixed('noise', 0.05) m.ensure_default_constraints() - m.optimize('scg', messages=1) + m.optimize('scg', messages=1, max_f_eval=max(80, max_f_eval)) m.unconstrain('noise') m.constrain_positive('noise') - m.optimize('scg', messages=1) + m.optimize('scg', messages=1, max_f_eval=max(0, max_f_eval - 80)) else: m.ensure_default_constraints() @@ -171,31 +171,31 @@ def stick(): return m +# def BGPLVM_oil(): +# data = GPy.util.datasets.oil() +# Y, X = data['Y'], data['X'] +# X -= X.mean(axis=0) +# X /= X.std(axis=0) +# +# Q = 10 +# M = 30 +# +# kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q) +# m = GPy.models.Bayesian_GPLVM(X, Q, kernel=kernel, M=M) +# # m.scale_factor = 100.0 +# m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)') +# from sklearn import cluster +# km = cluster.KMeans(M, verbose=10) +# Z = km.fit(m.X).cluster_centers_ +# # Z = GPy.util.misc.kmm_init(m.X, M) +# m.set('iip', Z) +# m.set('bias', 1e-4) +# # optimize +# # m.ensure_default_constraints() +# +# import pdb; pdb.set_trace() +# m.optimize('tnc', messages=1) +# print m +# m.plot_latent(labels=data['Y'].argmax(axis=1)) +# return m -def BGPLVM_oil(): - data = GPy.util.datasets.oil() - Y, X = data['Y'], data['X'] - X -= X.mean(axis=0) - X /= X.std(axis=0) - - Q = 10 - M = 30 - - kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q) - m = GPy.models.Bayesian_GPLVM(X, Q, kernel=kernel, M=M) - # m.scale_factor = 100.0 - m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)') - from sklearn import cluster - km = cluster.KMeans(M, verbose=10) - Z = km.fit(m.X).cluster_centers_ - # Z = GPy.util.misc.kmm_init(m.X, M) - m.set('iip', Z) - m.set('bias', 1e-4) - # optimize - # m.ensure_default_constraints() - - import pdb; pdb.set_trace() - m.optimize('tnc', messages=1) - print m - m.plot_latent(labels=data['Y'].argmax(axis=1)) - return m diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py index 84f7d68d..84521cf9 100644 --- a/GPy/kern/rbf.py +++ b/GPy/kern/rbf.py @@ -5,6 +5,7 @@ from kernpart import kernpart import numpy as np import hashlib +from scipy import weave class rbf(kernpart): """ @@ -172,7 +173,7 @@ class rbf(kernpart): """Think N,M,M,Q """ self._psi_computations(Z,mu,S) tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom - target_mu += (dL_dpsi2[:,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1) + target_mu += -2.*(dL_dpsi2[:,:,:,None]*tmp*self._psi2_mudist).sum(1).sum(1) target_S += (dL_dpsi2[:,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1) @@ -206,7 +207,6 @@ class rbf(kernpart): if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)): #something's changed. recompute EVERYTHING - #TODO: make more efficient for large Q (using NDL's dot product trick) #psi1 self._psi1_denom = S[:,None,:]/self.lengthscale2 + 1. self._psi1_dist = Z[None,:,:]-mu[:,None,:] @@ -216,9 +216,78 @@ class rbf(kernpart): #psi2 self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscale2+1. # N,M,M,Q - self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q - self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom) - self._psi2_exponent = np.sum(-self._psi2_Zdist_sq/4. -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M + self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu,self._psi2_Zhat) + #self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q + #self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom) + #self._psi2_exponent = np.sum(-self._psi2_Zdist_sq/4. -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M + #store matrices for caching self._Z, self._mu, self._S = Z, mu,S + + def weave_psi2(self,mu,Zhat): + weave_options = {'headers' : [''], + 'extra_compile_args': ['-fopenmp -march=native'], + 'extra_link_args' : ['-lgomp'], + 'compiler' : 'gcc'} + + N,Q = mu.shape + M = Zhat.shape[0] + + mudist = np.empty((N,M,M,Q)) + mudist_sq = np.empty((N,M,M,Q)) + psi2_exponent = np.zeros((N,M,M)) + psi2 = np.empty((N,M,M)) + + psi2_Zdist_sq = self._psi2_Zdist_sq + half_log_psi2_denom = 0.5*np.log(self._psi2_denom).squeeze() + variance_sq = float(np.square(self.variance)) + if self.ARD: + lengthscale2 = self.lengthscale2 + else: + lengthscale2 = np.ones(Q)*self.lengthscale2 + _psi2_denom = self._psi2_denom.squeeze() + code = """ + double tmp; + + #pragma omp parallel for private(tmp) + for (int n=0; n + #include + """ + weave.inline(code, support_code=support_code, libraries=['gomp'], + arg_names=['N','M','Q','mu','Zhat','mudist_sq','mudist','lengthscale2','_psi2_denom','psi2_Zdist_sq','psi2_exponent','half_log_psi2_denom','psi2','variance_sq'], + type_converters=weave.converters.blitz,**weave_options) + + return mudist,mudist_sq, psi2_exponent, psi2 diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py index ba9603bb..ee485e76 100644 --- a/GPy/models/Bayesian_GPLVM.py +++ b/GPy/models/Bayesian_GPLVM.py @@ -95,3 +95,4 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): input_1, input_2 = which_indices ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2],*args, **kwargs) ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w') + return ax diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py index 32594594..ca380a77 100644 --- a/GPy/models/GPLVM.py +++ b/GPy/models/GPLVM.py @@ -24,7 +24,7 @@ class GPLVM(GP): :type init: 'PCA'|'random' """ - def __init__(self, Y, Q, init='PCA', X = None, kernel=None, **kwargs): + def __init__(self, Y, Q, init='PCA', X=None, kernel=None, **kwargs): if X is None: X = self.initialise_latent(init, Q, Y) if kernel is None: @@ -39,87 +39,89 @@ class GPLVM(GP): return np.random.randn(Y.shape[0], Q) def _get_param_names(self): - return sum([['X_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[]) + GP._get_param_names(self) + return sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], []) + GP._get_param_names(self) def _get_params(self): return np.hstack((self.X.flatten(), GP._get_params(self))) - def _set_params(self,x): - self.X = x[:self.X.size].reshape(self.N,self.Q).copy() + def _set_params(self, x): + self.X = x[:self.X.size].reshape(self.N, self.Q).copy() GP._set_params(self, x[self.X.size:]) def _log_likelihood_gradients(self): - dL_dX = 2.*self.kern.dK_dX(self.dL_dK,self.X) + dL_dX = 2.*self.kern.dK_dX(self.dL_dK, self.X) - return np.hstack((dL_dX.flatten(),GP._log_likelihood_gradients(self))) + return np.hstack((dL_dX.flatten(), GP._log_likelihood_gradients(self))) def plot(self): - assert self.likelihood.Y.shape[1]==2 - pb.scatter(self.likelihood.Y[:,0],self.likelihood.Y[:,1],40,self.X[:,0].copy(),linewidth=0,cmap=pb.cm.jet) - Xnew = np.linspace(self.X.min(),self.X.max(),200)[:,None] + assert self.likelihood.Y.shape[1] == 2 + pb.scatter(self.likelihood.Y[:, 0], self.likelihood.Y[:, 1], 40, self.X[:, 0].copy(), linewidth=0, cmap=pb.cm.jet) + Xnew = np.linspace(self.X.min(), self.X.max(), 200)[:, None] mu, var, upper, lower = self.predict(Xnew) - pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5) + pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5) - def plot_latent(self,labels=None, which_indices=None, resolution=50): + def plot_latent(self, labels=None, which_indices=None, resolution=50): """ :param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc) :param resolution: the resolution of the grid on which to evaluate the predictive variance """ util.plot.Tango.reset() - + if labels is None: labels = np.ones(self.N) if which_indices is None: - if self.Q==1: + if self.Q == 1: input_1 = 0 input_2 = None - if self.Q==2: - input_1, input_2 = 0,1 + if self.Q == 2: + input_1, input_2 = 0, 1 else: - #try to find a linear of RBF kern in the kernel - k = [p for p in self.kern.parts if p.name in ['rbf','linear']] - if (not len(k)==1) or (not k[0].ARD): + # try to find a linear of RBF kern in the kernel + k = [p for p in self.kern.parts if p.name in ['rbf', 'linear']] + if (not len(k) == 1) or (not k[0].ARD): raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'" k = k[0] - if k.name=='rbf': + if k.name == 'rbf': input_1, input_2 = np.argsort(k.lengthscale)[:2] - elif k.name=='linear': + elif k.name == 'linear': input_1, input_2 = np.argsort(k.variances)[::-1][:2] - #first, plot the output variance as a function of the latent space - Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(self.X[:,[input_1, input_2]],resolution=resolution) - Xtest_full = np.zeros((Xtest.shape[0], self.X.shape[1])) - Xtest_full[:, :2] = Xtest - mu, var, low, up = self.predict(Xtest_full) - var = var[:, :2] - pb.imshow(var.reshape(resolution,resolution).T[::-1,:],extent=[xmin[0],xmax[0],xmin[1],xmax[1]],cmap=pb.cm.binary,interpolation='bilinear') + # first, plot the output variance as a function of the latent space + Xtest, xx, yy, xmin, xmax = util.plot.x_frame2D(self.X[:, [input_1, input_2]], resolution=resolution) + Xtest_full = np.zeros((Xtest.shape[0], self.X.shape[1])) + Xtest_full[:, :2] = Xtest + mu, var, low, up = self.predict(Xtest_full) + var = var[:, :1] + pb.imshow(var.reshape(resolution, resolution).T[::-1, :], + extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary, interpolation='bilinear') - - for i,ul in enumerate(np.unique(labels)): + for i, ul in enumerate(np.unique(labels)): if type(ul) is np.string_: this_label = ul elif type(ul) is np.int64: - this_label = 'class %i'%ul + this_label = 'class %i' % ul else: - this_label = 'class %i'%i + this_label = 'class %i' % i - index = np.nonzero(labels==ul)[0] - if self.Q==1: - x = self.X[index,input_1] + index = np.nonzero(labels == ul)[0] + if self.Q == 1: + x = self.X[index, input_1] y = np.zeros(index.size) else: - x = self.X[index,input_1] - y = self.X[index,input_2] - pb.plot(x,y,marker='o',color=util.plot.Tango.nextMedium(),mew=0,label=this_label,linewidth=0) + x = self.X[index, input_1] + y = self.X[index, input_2] + pb.plot(x, y, marker='o', color=util.plot.Tango.nextMedium(), mew=0, label=this_label, linewidth=0) - pb.xlabel('latent dimension %i'%input_1) - pb.ylabel('latent dimension %i'%input_2) + pb.xlabel('latent dimension %i' % input_1) + pb.ylabel('latent dimension %i' % input_2) - if not np.all(labels==1.): - pb.legend(loc=0,numpoints=1) + if not np.all(labels == 1.): + pb.legend(loc=0, numpoints=1) - pb.xlim(xmin[0],xmax[0]) - pb.ylim(xmin[1],xmax[1]) - - return input_1, input_2 + pb.xlim(xmin[0], xmax[0]) + pb.ylim(xmin[1], xmax[1]) + pb.grid(b=False) # remove the grid if present, it doesn't look good + ax = pb.gca() + ax.set_aspect('auto') # set a nice aspect ratio + return ax diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index 59f598f9..d82bb50f 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -51,6 +51,26 @@ def _mdot_r(a,b): return np.dot(a,b) def jitchol(A,maxtries=5): + A = np.asfortranarray(A) + L,info = linalg.lapack.flapack.dpotrf(A,lower=1) + if info ==0: + return L + else: + diagA = np.diag(A) + if np.any(diagA<0.): + raise linalg.LinAlgError, "not pd: negative diagonal elements" + jitter= diagA.mean()*1e-6 + for i in range(1,maxtries+1): + print 'Warning: adding jitter of '+str(jitter) + try: + return linalg.cholesky(A+np.eye(A.shape[0]).T*jitter, lower = True) + except: + jitter *= 10 + raise linalg.LinAlgError,"not positive definite, even with jitter." + + + +def jitchol_old(A,maxtries=5): """ :param A : An almost pd square matrix @@ -71,7 +91,7 @@ def jitchol(A,maxtries=5): for i in range(1,maxtries+1): print 'Warning: adding jitter of '+str(jitter) try: - return linalg.cholesky(A+np.eye(A.shape[0])*jitter, lower = True) + return linalg.cholesky(A+np.eye(A.shape[0]).T*jitter, lower = True) except: jitter *= 10 @@ -93,7 +113,8 @@ def pdinv(A): L = jitchol(A) logdet = 2.*np.sum(np.log(np.diag(L))) Li = chol_inv(L) - Ai = np.dot(Li.T,Li) #TODO: get the flapack routine form multiplying triangular matrices + Ai = linalg.lapack.flapack.dpotri(L)[0] + Ai = np.tril(Ai) + np.tril(Ai,-1).T return Ai, L, Li, logdet diff --git a/GPy/util/visualize.py b/GPy/util/visualize.py index dde9cd32..f9538942 100644 --- a/GPy/util/visualize.py +++ b/GPy/util/visualize.py @@ -4,7 +4,7 @@ import GPy import numpy as np class lvm: - def __init__(self, model, data_visualize, latent_axis): + def __init__(self, model, data_visualize, latent_axis, latent_index=[0,1], latent_dim=2): self.cid = latent_axis.figure.canvas.mpl_connect('button_press_event', self.on_click) self.cid = latent_axis.figure.canvas.mpl_connect('motion_notify_event', self.on_move) self.data_visualize = data_visualize @@ -12,6 +12,8 @@ class lvm: self.latent_axis = latent_axis self.called = False self.move_on = False + self.latent_index = latent_index + self.latent_dim = latent_dim def on_click(self, event): #print 'click', event.xdata, event.ydata @@ -32,7 +34,8 @@ class lvm: if self.called and self.move_on: # Call modify code on move #print 'move', event.xdata, event.ydata - latent_values = np.array((event.xdata, event.ydata)) + latent_values = np.zeros((1,self.latent_dim)) + latent_values[0,self.latent_index] = np.array([event.xdata, event.ydata]) y = self.model.predict(latent_values)[0] self.data_visualize.modify(y) #print 'y', y @@ -45,7 +48,7 @@ class data_show: # If no axes are defined, create some. if axis==None: fig = plt.figure() - self.axis = fig.add_subplot(111) + self.axis = fig.add_subplot(111) else: self.axis = axis @@ -57,7 +60,7 @@ class vector_show(data_show): def __init__(self, vals, axis=None): data_show.__init__(self, vals, axis) self.vals = vals.T - self.handle = plt.plot(np.arange(0, len(vals))[:, None], self.vals)[0] + self.handle = self.axis.plot(np.arange(0, len(vals))[:, None], self.vals)[0] def modify(self, vals): xdata, ydata = self.handle.get_data() @@ -84,7 +87,7 @@ class image_show(data_show): self.handle.set_array(self.vals) #self.axis.figure.canvas.draw() plt.show() - + def set_image(self, vals): self.vals = np.reshape(vals, self.dimensions, order='F') if self.transpose: @@ -94,7 +97,7 @@ class image_show(data_show): #if self.invert: # self.vals = -self.vals -class stick_show(data_show): +class stick_show(data_show): """Show a three dimensional point cloud as a figure. Connect elements of the figure together using the matrix connect.""" def __init__(self, vals, axis=None, connect=None): @@ -159,6 +162,6 @@ class stick_show(data_show): self.line_handle = self.axis.plot(np.array(x), np.array(y), np.array(z), 'b-') self.axis.figure.canvas.draw() - +