mrd example added

This commit is contained in:
Max Zwiessele 2013-04-11 15:54:36 +01:00
commit 89a50e260a
7 changed files with 193 additions and 92 deletions

View file

@ -233,13 +233,18 @@ class model(parameterised):
Ensure that any variables which should clearly be positive have been constrained somehow. Ensure that any variables which should clearly be positive have been constrained somehow.
""" """
positive_strings = ['variance','lengthscale', 'precision'] positive_strings = ['variance','lengthscale', 'precision']
param_names = self._get_param_names()
currently_constrained = self.all_constrained_indices()
to_make_positive = []
for s in positive_strings: for s in positive_strings:
for i in self.grep_param_names(s): for i in self.grep_param_names(s):
if not (i in self.all_constrained_indices()): if not (i in currently_constrained):
name = self._get_param_names()[i] to_make_positive.append(param_names[i])
self.constrain_positive(name)
if warn: if warn:
print "Warning! constraining %s postive"%name print "Warning! constraining %s postive"%name
if len(to_make_positive):
self.constrain_positive('('+'|'.join(to_make_positive)+')')
def objective_function(self, x): def objective_function(self, x):

View file

@ -61,7 +61,7 @@ def GPLVM_oil_100(optimize=True, M=15):
m.plot_latent(labels=m.data_labels) m.plot_latent(labels=m.data_labels)
return m return m
def BGPLVM_oil(optimize=True, N=100, Q=10, M=15): def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300):
data = GPy.util.datasets.oil() data = GPy.util.datasets.oil()
# create simple GP model # create simple GP model
@ -73,10 +73,10 @@ def BGPLVM_oil(optimize=True, N=100, Q=10, M=15):
if optimize: if optimize:
m.constrain_fixed('noise', 0.05) m.constrain_fixed('noise', 0.05)
m.ensure_default_constraints() m.ensure_default_constraints()
m.optimize('scg', messages=1) m.optimize('scg', messages=1, max_f_eval=max(80, max_f_eval))
m.unconstrain('noise') m.unconstrain('noise')
m.constrain_positive('noise') m.constrain_positive('noise')
m.optimize('scg', messages=1) m.optimize('scg', messages=1, max_f_eval=max(0, max_f_eval - 80))
else: else:
m.ensure_default_constraints() m.ensure_default_constraints()
@ -171,31 +171,31 @@ def stick():
return m return m
# def BGPLVM_oil():
# data = GPy.util.datasets.oil()
# Y, X = data['Y'], data['X']
# X -= X.mean(axis=0)
# X /= X.std(axis=0)
#
# Q = 10
# M = 30
#
# kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
# m = GPy.models.Bayesian_GPLVM(X, Q, kernel=kernel, M=M)
# # m.scale_factor = 100.0
# m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)')
# from sklearn import cluster
# km = cluster.KMeans(M, verbose=10)
# Z = km.fit(m.X).cluster_centers_
# # Z = GPy.util.misc.kmm_init(m.X, M)
# m.set('iip', Z)
# m.set('bias', 1e-4)
# # optimize
# # m.ensure_default_constraints()
#
# import pdb; pdb.set_trace()
# m.optimize('tnc', messages=1)
# print m
# m.plot_latent(labels=data['Y'].argmax(axis=1))
# return m
def BGPLVM_oil():
data = GPy.util.datasets.oil()
Y, X = data['Y'], data['X']
X -= X.mean(axis=0)
X /= X.std(axis=0)
Q = 10
M = 30
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
m = GPy.models.Bayesian_GPLVM(X, Q, kernel=kernel, M=M)
# m.scale_factor = 100.0
m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)')
from sklearn import cluster
km = cluster.KMeans(M, verbose=10)
Z = km.fit(m.X).cluster_centers_
# Z = GPy.util.misc.kmm_init(m.X, M)
m.set('iip', Z)
m.set('bias', 1e-4)
# optimize
# m.ensure_default_constraints()
import pdb; pdb.set_trace()
m.optimize('tnc', messages=1)
print m
m.plot_latent(labels=data['Y'].argmax(axis=1))
return m

View file

@ -5,6 +5,7 @@
from kernpart import kernpart from kernpart import kernpart
import numpy as np import numpy as np
import hashlib import hashlib
from scipy import weave
class rbf(kernpart): class rbf(kernpart):
""" """
@ -172,7 +173,7 @@ class rbf(kernpart):
"""Think N,M,M,Q """ """Think N,M,M,Q """
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom
target_mu += (dL_dpsi2[:,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1) target_mu += -2.*(dL_dpsi2[:,:,:,None]*tmp*self._psi2_mudist).sum(1).sum(1)
target_S += (dL_dpsi2[:,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1) target_S += (dL_dpsi2[:,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
@ -206,7 +207,6 @@ class rbf(kernpart):
if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)): if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)):
#something's changed. recompute EVERYTHING #something's changed. recompute EVERYTHING
#TODO: make more efficient for large Q (using NDL's dot product trick)
#psi1 #psi1
self._psi1_denom = S[:,None,:]/self.lengthscale2 + 1. self._psi1_denom = S[:,None,:]/self.lengthscale2 + 1.
self._psi1_dist = Z[None,:,:]-mu[:,None,:] self._psi1_dist = Z[None,:,:]-mu[:,None,:]
@ -216,9 +216,78 @@ class rbf(kernpart):
#psi2 #psi2
self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscale2+1. # N,M,M,Q self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscale2+1. # N,M,M,Q
self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu,self._psi2_Zhat)
self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom) #self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
self._psi2_exponent = np.sum(-self._psi2_Zdist_sq/4. -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M #self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
#self._psi2_exponent = np.sum(-self._psi2_Zdist_sq/4. -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M
self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M
#store matrices for caching
self._Z, self._mu, self._S = Z, mu,S self._Z, self._mu, self._S = Z, mu,S
def weave_psi2(self,mu,Zhat):
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -march=native'],
'extra_link_args' : ['-lgomp'],
'compiler' : 'gcc'}
N,Q = mu.shape
M = Zhat.shape[0]
mudist = np.empty((N,M,M,Q))
mudist_sq = np.empty((N,M,M,Q))
psi2_exponent = np.zeros((N,M,M))
psi2 = np.empty((N,M,M))
psi2_Zdist_sq = self._psi2_Zdist_sq
half_log_psi2_denom = 0.5*np.log(self._psi2_denom).squeeze()
variance_sq = float(np.square(self.variance))
if self.ARD:
lengthscale2 = self.lengthscale2
else:
lengthscale2 = np.ones(Q)*self.lengthscale2
_psi2_denom = self._psi2_denom.squeeze()
code = """
double tmp;
#pragma omp parallel for private(tmp)
for (int n=0; n<N; n++){
for (int m=0; m<M; m++){
for (int mm=0; mm<(m+1); mm++){
for (int q=0; q<Q; q++){
//compute mudist
tmp = mu(n,q) - Zhat(m,mm,q);
mudist(n,m,mm,q) = tmp;
mudist(n,mm,m,q) = tmp;
//now mudist_sq
tmp = tmp*tmp/lengthscale2(q)/_psi2_denom(n,q);
mudist_sq(n,m,mm,q) = tmp;
mudist_sq(n,mm,m,q) = tmp;
//now psi2_exponent
tmp = -psi2_Zdist_sq(m,mm,q)/4.0 - tmp - half_log_psi2_denom(n,q);
psi2_exponent(n,mm,m) += tmp;
if (m !=mm){
psi2_exponent(n,m,mm) += tmp;
}
//psi2 would be computed like this, but np is faster
//tmp = variance_sq*exp(psi2_exponent(n,m,mm));
//psi2(n,m,mm) = tmp;
//psi2(n,mm,m) = tmp;
}
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','M','Q','mu','Zhat','mudist_sq','mudist','lengthscale2','_psi2_denom','psi2_Zdist_sq','psi2_exponent','half_log_psi2_denom','psi2','variance_sq'],
type_converters=weave.converters.blitz,**weave_options)
return mudist,mudist_sq, psi2_exponent, psi2

View file

@ -95,3 +95,4 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
input_1, input_2 = which_indices input_1, input_2 = which_indices
ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2],*args, **kwargs) ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2],*args, **kwargs)
ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w') ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
return ax

View file

@ -24,7 +24,7 @@ class GPLVM(GP):
:type init: 'PCA'|'random' :type init: 'PCA'|'random'
""" """
def __init__(self, Y, Q, init='PCA', X = None, kernel=None, **kwargs): def __init__(self, Y, Q, init='PCA', X=None, kernel=None, **kwargs):
if X is None: if X is None:
X = self.initialise_latent(init, Q, Y) X = self.initialise_latent(init, Q, Y)
if kernel is None: if kernel is None:
@ -39,28 +39,28 @@ class GPLVM(GP):
return np.random.randn(Y.shape[0], Q) return np.random.randn(Y.shape[0], Q)
def _get_param_names(self): def _get_param_names(self):
return sum([['X_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[]) + GP._get_param_names(self) return sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], []) + GP._get_param_names(self)
def _get_params(self): def _get_params(self):
return np.hstack((self.X.flatten(), GP._get_params(self))) return np.hstack((self.X.flatten(), GP._get_params(self)))
def _set_params(self,x): def _set_params(self, x):
self.X = x[:self.X.size].reshape(self.N,self.Q).copy() self.X = x[:self.X.size].reshape(self.N, self.Q).copy()
GP._set_params(self, x[self.X.size:]) GP._set_params(self, x[self.X.size:])
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
dL_dX = 2.*self.kern.dK_dX(self.dL_dK,self.X) dL_dX = 2.*self.kern.dK_dX(self.dL_dK, self.X)
return np.hstack((dL_dX.flatten(),GP._log_likelihood_gradients(self))) return np.hstack((dL_dX.flatten(), GP._log_likelihood_gradients(self)))
def plot(self): def plot(self):
assert self.likelihood.Y.shape[1]==2 assert self.likelihood.Y.shape[1] == 2
pb.scatter(self.likelihood.Y[:,0],self.likelihood.Y[:,1],40,self.X[:,0].copy(),linewidth=0,cmap=pb.cm.jet) pb.scatter(self.likelihood.Y[:, 0], self.likelihood.Y[:, 1], 40, self.X[:, 0].copy(), linewidth=0, cmap=pb.cm.jet)
Xnew = np.linspace(self.X.min(),self.X.max(),200)[:,None] Xnew = np.linspace(self.X.min(), self.X.max(), 200)[:, None]
mu, var, upper, lower = self.predict(Xnew) mu, var, upper, lower = self.predict(Xnew)
pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5) pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
def plot_latent(self,labels=None, which_indices=None, resolution=50): def plot_latent(self, labels=None, which_indices=None, resolution=50):
""" """
:param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc) :param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc)
:param resolution: the resolution of the grid on which to evaluate the predictive variance :param resolution: the resolution of the grid on which to evaluate the predictive variance
@ -71,55 +71,57 @@ class GPLVM(GP):
if labels is None: if labels is None:
labels = np.ones(self.N) labels = np.ones(self.N)
if which_indices is None: if which_indices is None:
if self.Q==1: if self.Q == 1:
input_1 = 0 input_1 = 0
input_2 = None input_2 = None
if self.Q==2: if self.Q == 2:
input_1, input_2 = 0,1 input_1, input_2 = 0, 1
else: else:
#try to find a linear of RBF kern in the kernel # try to find a linear of RBF kern in the kernel
k = [p for p in self.kern.parts if p.name in ['rbf','linear']] k = [p for p in self.kern.parts if p.name in ['rbf', 'linear']]
if (not len(k)==1) or (not k[0].ARD): if (not len(k) == 1) or (not k[0].ARD):
raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'" raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'"
k = k[0] k = k[0]
if k.name=='rbf': if k.name == 'rbf':
input_1, input_2 = np.argsort(k.lengthscale)[:2] input_1, input_2 = np.argsort(k.lengthscale)[:2]
elif k.name=='linear': elif k.name == 'linear':
input_1, input_2 = np.argsort(k.variances)[::-1][:2] input_1, input_2 = np.argsort(k.variances)[::-1][:2]
#first, plot the output variance as a function of the latent space # first, plot the output variance as a function of the latent space
Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(self.X[:,[input_1, input_2]],resolution=resolution) Xtest, xx, yy, xmin, xmax = util.plot.x_frame2D(self.X[:, [input_1, input_2]], resolution=resolution)
Xtest_full = np.zeros((Xtest.shape[0], self.X.shape[1])) Xtest_full = np.zeros((Xtest.shape[0], self.X.shape[1]))
Xtest_full[:, :2] = Xtest Xtest_full[:, :2] = Xtest
mu, var, low, up = self.predict(Xtest_full) mu, var, low, up = self.predict(Xtest_full)
var = var[:, :2] var = var[:, :1]
pb.imshow(var.reshape(resolution,resolution).T[::-1,:],extent=[xmin[0],xmax[0],xmin[1],xmax[1]],cmap=pb.cm.binary,interpolation='bilinear') pb.imshow(var.reshape(resolution, resolution).T[::-1, :],
extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary, interpolation='bilinear')
for i, ul in enumerate(np.unique(labels)):
for i,ul in enumerate(np.unique(labels)):
if type(ul) is np.string_: if type(ul) is np.string_:
this_label = ul this_label = ul
elif type(ul) is np.int64: elif type(ul) is np.int64:
this_label = 'class %i'%ul this_label = 'class %i' % ul
else: else:
this_label = 'class %i'%i this_label = 'class %i' % i
index = np.nonzero(labels==ul)[0] index = np.nonzero(labels == ul)[0]
if self.Q==1: if self.Q == 1:
x = self.X[index,input_1] x = self.X[index, input_1]
y = np.zeros(index.size) y = np.zeros(index.size)
else: else:
x = self.X[index,input_1] x = self.X[index, input_1]
y = self.X[index,input_2] y = self.X[index, input_2]
pb.plot(x,y,marker='o',color=util.plot.Tango.nextMedium(),mew=0,label=this_label,linewidth=0) pb.plot(x, y, marker='o', color=util.plot.Tango.nextMedium(), mew=0, label=this_label, linewidth=0)
pb.xlabel('latent dimension %i'%input_1) pb.xlabel('latent dimension %i' % input_1)
pb.ylabel('latent dimension %i'%input_2) pb.ylabel('latent dimension %i' % input_2)
if not np.all(labels==1.): if not np.all(labels == 1.):
pb.legend(loc=0,numpoints=1) pb.legend(loc=0, numpoints=1)
pb.xlim(xmin[0],xmax[0]) pb.xlim(xmin[0], xmax[0])
pb.ylim(xmin[1],xmax[1]) pb.ylim(xmin[1], xmax[1])
pb.grid(b=False) # remove the grid if present, it doesn't look good
return input_1, input_2 ax = pb.gca()
ax.set_aspect('auto') # set a nice aspect ratio
return ax

View file

@ -51,6 +51,26 @@ def _mdot_r(a,b):
return np.dot(a,b) return np.dot(a,b)
def jitchol(A,maxtries=5): def jitchol(A,maxtries=5):
A = np.asfortranarray(A)
L,info = linalg.lapack.flapack.dpotrf(A,lower=1)
if info ==0:
return L
else:
diagA = np.diag(A)
if np.any(diagA<0.):
raise linalg.LinAlgError, "not pd: negative diagonal elements"
jitter= diagA.mean()*1e-6
for i in range(1,maxtries+1):
print 'Warning: adding jitter of '+str(jitter)
try:
return linalg.cholesky(A+np.eye(A.shape[0]).T*jitter, lower = True)
except:
jitter *= 10
raise linalg.LinAlgError,"not positive definite, even with jitter."
def jitchol_old(A,maxtries=5):
""" """
:param A : An almost pd square matrix :param A : An almost pd square matrix
@ -71,7 +91,7 @@ def jitchol(A,maxtries=5):
for i in range(1,maxtries+1): for i in range(1,maxtries+1):
print 'Warning: adding jitter of '+str(jitter) print 'Warning: adding jitter of '+str(jitter)
try: try:
return linalg.cholesky(A+np.eye(A.shape[0])*jitter, lower = True) return linalg.cholesky(A+np.eye(A.shape[0]).T*jitter, lower = True)
except: except:
jitter *= 10 jitter *= 10
@ -93,7 +113,8 @@ def pdinv(A):
L = jitchol(A) L = jitchol(A)
logdet = 2.*np.sum(np.log(np.diag(L))) logdet = 2.*np.sum(np.log(np.diag(L)))
Li = chol_inv(L) Li = chol_inv(L)
Ai = np.dot(Li.T,Li) #TODO: get the flapack routine form multiplying triangular matrices Ai = linalg.lapack.flapack.dpotri(L)[0]
Ai = np.tril(Ai) + np.tril(Ai,-1).T
return Ai, L, Li, logdet return Ai, L, Li, logdet

View file

@ -4,7 +4,7 @@ import GPy
import numpy as np import numpy as np
class lvm: class lvm:
def __init__(self, model, data_visualize, latent_axis): def __init__(self, model, data_visualize, latent_axis, latent_index=[0,1], latent_dim=2):
self.cid = latent_axis.figure.canvas.mpl_connect('button_press_event', self.on_click) self.cid = latent_axis.figure.canvas.mpl_connect('button_press_event', self.on_click)
self.cid = latent_axis.figure.canvas.mpl_connect('motion_notify_event', self.on_move) self.cid = latent_axis.figure.canvas.mpl_connect('motion_notify_event', self.on_move)
self.data_visualize = data_visualize self.data_visualize = data_visualize
@ -12,6 +12,8 @@ class lvm:
self.latent_axis = latent_axis self.latent_axis = latent_axis
self.called = False self.called = False
self.move_on = False self.move_on = False
self.latent_index = latent_index
self.latent_dim = latent_dim
def on_click(self, event): def on_click(self, event):
#print 'click', event.xdata, event.ydata #print 'click', event.xdata, event.ydata
@ -32,7 +34,8 @@ class lvm:
if self.called and self.move_on: if self.called and self.move_on:
# Call modify code on move # Call modify code on move
#print 'move', event.xdata, event.ydata #print 'move', event.xdata, event.ydata
latent_values = np.array((event.xdata, event.ydata)) latent_values = np.zeros((1,self.latent_dim))
latent_values[0,self.latent_index] = np.array([event.xdata, event.ydata])
y = self.model.predict(latent_values)[0] y = self.model.predict(latent_values)[0]
self.data_visualize.modify(y) self.data_visualize.modify(y)
#print 'y', y #print 'y', y
@ -57,7 +60,7 @@ class vector_show(data_show):
def __init__(self, vals, axis=None): def __init__(self, vals, axis=None):
data_show.__init__(self, vals, axis) data_show.__init__(self, vals, axis)
self.vals = vals.T self.vals = vals.T
self.handle = plt.plot(np.arange(0, len(vals))[:, None], self.vals)[0] self.handle = self.axis.plot(np.arange(0, len(vals))[:, None], self.vals)[0]
def modify(self, vals): def modify(self, vals):
xdata, ydata = self.handle.get_data() xdata, ydata = self.handle.get_data()