diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 7d7d5fdd..6875c0b5 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -82,7 +82,7 @@ def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300): m.ensure_default_constraints() y = m.likelihood.Y[0, :] - fig,(latent_axes,hist_axes) = plt.subplots(1,2) + fig, (latent_axes, hist_axes) = plt.subplots(1, 2) plt.sca(latent_axes) m.plot_latent() data_show = GPy.util.visualize.vector_show(y) @@ -176,20 +176,34 @@ def bgplvm_simulation_matlab_compare(): Y = sim_data['Y'] S = sim_data['S'] mu = sim_data['mu'] - M, [_, Q] = 20, mu.shape + M, [_, Q] = 30, mu.shape + Q = 2 from GPy.models import mrd from GPy import kern reload(mrd); reload(kern) - k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) + k = kern.rbf(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, # X=mu, # X_variance=S, _debug=True) m.ensure_default_constraints() m.auto_scale_factor = True - m['noise'] = .01 # Y.var() / 100. - m['{}_variance'.format(k.parts[0].name)] = .01 + m['noise'] = Y.var() / 100. + + lscstr = '{}'.format(k.parts[0].name) +# m[lscstr] = .01 + m.unconstrain(lscstr); m.constrain_fixed(lscstr, 10) + + lscstr = 'X_variance' +# m[lscstr] = .01 + m.unconstrain(lscstr); m.constrain_fixed(lscstr, .1) + +# cstr = 'white' +# m.unconstrain(cstr); m.constrain_bounded(cstr, .01, 1.) + +# cstr = 'noise' +# m.unconstrain(cstr); m.constrain_bounded(cstr, .01, 1.) return m def bgplvm_simulation(burnin='scg', plot_sim=False, @@ -385,7 +399,7 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True): Y = data['Y'] if in_place: # Make figure move in place. - data['Y'][:, 0:3]=0.0 + data['Y'][:, 0:3] = 0.0 m = GPy.models.GPLVM(data['Y'], 2, normalize_Y=True) # optimize diff --git a/GPy/inference/conjugate_gradient_descent.py b/GPy/inference/conjugate_gradient_descent.py index ddd5cb85..93dac6df 100644 --- a/GPy/inference/conjugate_gradient_descent.py +++ b/GPy/inference/conjugate_gradient_descent.py @@ -4,14 +4,14 @@ Created on 24 Apr 2013 @author: maxz ''' from GPy.inference.gradient_descent_update_rules import FletcherReeves -import numpy -from multiprocessing import Value -from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2 -from multiprocessing.synchronize import Event -from multiprocessing.queues import Queue from Queue import Empty -import sys +from multiprocessing import Value +from multiprocessing.queues import Queue +from multiprocessing.synchronize import Event +from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2 from threading import Thread +import numpy +import sys RUNNING = "running" CONVERGED = "converged" @@ -20,10 +20,9 @@ MAX_F_EVAL = "maximum number of function calls reached" LINE_SEARCH = "line search failed" KBINTERRUPT = "interrupted" -SENTINEL = None - class _Async_Optimization(Thread): - def __init__(self, f, df, x0, update_rule, runsignal, + + def __init__(self, f, df, x0, update_rule, runsignal, SENTINEL, report_every=10, messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6, outqueue=None, *args, **kw): """ @@ -42,6 +41,7 @@ class _Async_Optimization(Thread): self.maxiter = maxiter self.max_f_eval = max_f_eval self.gtol = gtol + self.SENTINEL = SENTINEL self.runsignal = runsignal # self.parent = parent # self.result = None @@ -70,7 +70,7 @@ class _Async_Optimization(Thread): def callback_return(self, *a): self.callback(*a) - self.outq.put(SENTINEL) + self.outq.put(self.SENTINEL) self.runsignal.clear() def run(self, *args, **kwargs): @@ -136,7 +136,7 @@ class _CGDAsync(_Async_Optimization): if gfi is not None: gi = gfi - if fi_old > fi: + if numpy.isnan(fi) or fi_old < fi: gi, ur, si = self.reset(xi, *a, **kw) else: xi += numpy.dot(alphai, si) @@ -145,22 +145,23 @@ class _CGDAsync(_Async_Optimization): sys.stdout.flush() sys.stdout.write("iteration: {0:> 6g} f:{1:> 12e} |g|:{2:> 12e}".format(it, fi, numpy.dot(gi.T, gi))) - if it % self.report_every == 0: - self.callback(xi, fi, it, self.f_call.value, self.df_call.value, status) + if it % self.report_every == 0: + self.callback(xi, fi, gi, it, self.f_call.value, self.df_call.value, status) it += 1 else: status = MAXITER - # self.result = [xi, fi, it, self.f_call.value, self.df_call.value, status] - self.callback_return(xi, fi, it, self.f_call.value, self.df_call.value, status) + self.callback_return(xi, fi, gi, it, self.f_call.value, self.df_call.value, status) + self.result = [xi, fi, gi, it, self.f_call.value, self.df_call.value, status] class Async_Optimize(object): callback = lambda *x: None runsignal = Event() + SENTINEL = "SENTINEL" def async_callback_collect(self, q): while self.runsignal.is_set(): try: - for ret in iter(lambda: q.get(timeout=1), SENTINEL): + for ret in iter(lambda: q.get(timeout=1), self.SENTINEL): self.callback(*ret) except Empty: pass @@ -169,12 +170,12 @@ class Async_Optimize(object): messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6, report_every=10, *args, **kwargs): self.runsignal.set() - outqueue = Queue(5) + outqueue = Queue() if callback: self.callback = callback c = Thread(target=self.async_callback_collect, args=(outqueue,)) c.start() - p = _CGDAsync(f, df, x0, update_rule, self.runsignal, + p = _CGDAsync(f, df, x0, update_rule, self.runsignal, self.SENTINEL, report_every=report_every, messages=messages, maxiter=maxiter, max_f_eval=max_f_eval, gtol=gtol, outqueue=outqueue, *args, **kwargs) p.run() @@ -189,12 +190,14 @@ class Async_Optimize(object): while self.runsignal.is_set(): try: p.join(1) - c.join(1) + # c.join(1) except KeyboardInterrupt: # print "^C" self.runsignal.clear() p.join() - c.join() + if c.is_alive(): + print "WARNING: callback still running, optimisation done!" + return p.result class CGD(Async_Optimize): ''' @@ -215,7 +218,7 @@ class CGD(Async_Optimize): callback gets called every `report_every` iterations - callback(xi, fi, iteration, function_calls, gradient_calls, status_message) + callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message) if df returns tuple (grad, natgrad) it will optimize according to natural gradient rules @@ -233,7 +236,7 @@ class CGD(Async_Optimize): **calls** --------- - callback(x_opt, f_opt, iteration, function_calls, gradient_calls, status_message) + callback(x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message) at end of optimization! """ @@ -247,7 +250,7 @@ class CGD(Async_Optimize): Minimize f, calling callback every `report_every` iterations with following syntax: - callback(xi, fi, iteration, function_calls, gradient_calls, status_message) + callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message) if df returns tuple (grad, natgrad) it will optimize according to natural gradient rules @@ -260,7 +263,7 @@ class CGD(Async_Optimize): **returns** --------- - x_opt, f_opt, iteration, function_calls, gradient_calls, status_message + x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message at end of optimization """ diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py index 78dbdf01..4c85c6d5 100644 --- a/GPy/kern/linear.py +++ b/GPy/kern/linear.py @@ -5,6 +5,7 @@ from kernpart import kernpart import numpy as np from ..util.linalg import tdot +from GPy.util.linalg import mdot class linear(kernpart): """ @@ -23,7 +24,7 @@ class linear(kernpart): :rtype: kernel object """ - def __init__(self,D,variances=None,ARD=False): + def __init__(self, D, variances=None, ARD=False): self.D = D self.ARD = ARD if ARD == False: @@ -45,15 +46,15 @@ class linear(kernpart): variances = np.ones(self.D) self._set_params(variances.flatten()) - #initialize cache - self._Z, self._mu, self._S = np.empty(shape=(3,1)) - self._X, self._X2, self._params = np.empty(shape=(3,1)) + # initialize cache + self._Z, self._mu, self._S = np.empty(shape=(3, 1)) + self._X, self._X2, self._params = np.empty(shape=(3, 1)) def _get_params(self): return self.variances - def _set_params(self,x): - assert x.size==(self.Nparam) + def _set_params(self, x): + assert x.size == (self.Nparam) self.variances = x self.variances2 = np.square(self.variances) @@ -61,115 +62,136 @@ class linear(kernpart): if self.Nparam == 1: return ['variance'] else: - return ['variance_%i'%i for i in range(self.variances.size)] + return ['variance_%i' % i for i in range(self.variances.size)] - def K(self,X,X2,target): + def K(self, X, X2, target): if self.ARD: - XX = X*np.sqrt(self.variances) + XX = X * np.sqrt(self.variances) if X2 is None: target += tdot(XX) else: - XX2 = X2*np.sqrt(self.variances) + XX2 = X2 * np.sqrt(self.variances) target += np.dot(XX, XX2.T) else: self._K_computations(X, X2) target += self.variances * self._dot_product - def Kdiag(self,X,target): - np.add(target,np.sum(self.variances*np.square(X),-1),target) + def Kdiag(self, X, target): + np.add(target, np.sum(self.variances * np.square(X), -1), target) - def dK_dtheta(self,dL_dK,X,X2,target): + def dK_dtheta(self, dL_dK, X, X2, target): if self.ARD: if X2 is None: - [np.add(target[i:i+1],np.sum(dL_dK*tdot(X[:,i:i+1])),target[i:i+1]) for i in range(self.D)] + [np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.D)] else: - product = X[:,None,:]*X2[None,:,:] - target += (dL_dK[:,:,None]*product).sum(0).sum(0) + product = X[:, None, :] * X2[None, :, :] + target += (dL_dK[:, :, None] * product).sum(0).sum(0) else: self._K_computations(X, X2) - target += np.sum(self._dot_product*dL_dK) + target += np.sum(self._dot_product * dL_dK) - def dKdiag_dtheta(self,dL_dKdiag, X, target): - tmp = dL_dKdiag[:,None]*X**2 + def dKdiag_dtheta(self, dL_dKdiag, X, target): + tmp = dL_dKdiag[:, None] * X ** 2 if self.ARD: target += tmp.sum(0) else: target += tmp.sum() - def dK_dX(self,dL_dK,X,X2,target): - target += (((X2[:, None, :] * self.variances)) * dL_dK[:,:, None]).sum(0) + def dK_dX(self, dL_dK, X, X2, target): + target += (((X2[:, None, :] * self.variances)) * dL_dK[:, :, None]).sum(0) #---------------------------------------# # PSI statistics # #---------------------------------------# - def psi0(self,Z,mu,S,target): - self._psi_computations(Z,mu,S) - target += np.sum(self.variances*self.mu2_S,1) + def psi0(self, Z, mu, S, target): + self._psi_computations(Z, mu, S) + target += np.sum(self.variances * self.mu2_S, 1) - def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target): - self._psi_computations(Z,mu,S) + def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target): + self._psi_computations(Z, mu, S) tmp = dL_dpsi0[:, None] * self.mu2_S if self.ARD: target += tmp.sum(0) else: target += tmp.sum() - def dpsi0_dmuS(self,dL_dpsi0, Z,mu,S,target_mu,target_S): - target_mu += dL_dpsi0[:, None] * (2.0*mu*self.variances) + def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S): + target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances) target_S += dL_dpsi0[:, None] * self.variances - def psi1(self,Z,mu,S,target): + def psi1(self, Z, mu, S, target): """the variance, it does nothing""" self._psi1 = self.K(mu, Z, target) - def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target): + def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target): """the variance, it does nothing""" - self.dK_dtheta(dL_dpsi1,mu,Z,target) + self.dK_dtheta(dL_dpsi1, mu, Z, target) - def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S): + def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S): """Do nothing for S, it does not affect psi1""" - self._psi_computations(Z,mu,S) - target_mu += (dL_dpsi1.T[:,:, None]*(Z*self.variances)).sum(1) + self._psi_computations(Z, mu, S) + target_mu += (dL_dpsi1.T[:, :, None] * (Z * self.variances)).sum(1) - def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target): - self.dK_dX(dL_dpsi1.T,Z,mu,target) + def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target): + self.dK_dX(dL_dpsi1.T, Z, mu, target) - def psi2(self,Z,mu,S,target): + def psi2(self, Z, mu, S, target): """ returns N,M,M matrix """ - self._psi_computations(Z,mu,S) - #psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :] - #target += psi2.sum(-1) - target += np.tensordot(self.ZZ[None,:,:,:]*np.square(self.variances),self.mu2_S[:, None, None, :],((3),(3))).squeeze().T + self._psi_computations(Z, mu, S) +# psi2_old = self.ZZ * np.square(self.variances) * self.mu2_S[:, None, None, :] +# target += psi2.sum(-1) + # slow way of doing it, but right + psi2_real = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0])) + for n in range(mu.shape[0]): + for m_prime in range(Z.shape[0]): + for m in range(Z.shape[0]): + tmp = self._Z[m:m + 1] * self.variances + tmp = np.dot(tmp, (tdot(self._mu[n:n + 1].T) + np.diag(S[n:n + 1]))) + psi2_real[n, m, m_prime] = np.dot(tmp, ( + self._Z[m_prime:m_prime + 1] * self.variances).T) - def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target): - self._psi_computations(Z,mu,S) - tmp = (dL_dpsi2[:,:,:,None]*(2.*self.ZZ*self.mu2_S[:,None,None,:]*self.variances)) + psi2_inner = mdot(self.ZA, self.inner, self.ZA.T) + mu2_S = (self._mu[:, None] * self._mu[:, :, None]) + self._S[:, :, None] + psi2 = (self.ZA[None, :, None, :] * mu2_S[:, None]).sum(-1) + psi2 = (psi2[:, :, None] * self.ZA[None, None]).sum(-1) +# psi2_tensor = np.tensordot(self.ZZ[None, :, :, :] * np.square(self.variances), self.mu2_S[:, None, None, :], ((3), (3))).squeeze().T +# import ipdb;ipdb.set_trace() + target += psi2_real + + def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target): + self._psi_computations(Z, mu, S) + tmp = (dL_dpsi2[:, :, :, None] * (2.*self.ZZ * self.mu2_S[:, None, None, :] * self.variances)) if self.ARD: target += tmp.sum(0).sum(0).sum(0) else: target += tmp.sum() - def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S): + def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S): """Think N,M,M,Q """ - self._psi_computations(Z,mu,S) - tmp = self.ZZ*np.square(self.variances) # M,M,Q - target_mu += (dL_dpsi2[:,:,:,None]*tmp*2.*mu[:,None,None,:]).sum(1).sum(1) - target_S += (dL_dpsi2[:,:,:,None]*tmp).sum(1).sum(1) + self._psi_computations(Z, mu, S) + tmp = self.ZZ * np.square(self.variances) # M,M,Q +# import ipdb;ipdb.set_trace() + target_mu += (dL_dpsi2[:, :, :, None] * tmp * 2.*mu[:, None, None, :]).sum(1).sum(1) + target_S += (dL_dpsi2[:, :, :, None] * tmp).sum(1).sum(1) * S.shape[0] - def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target): - self._psi_computations(Z,mu,S) - mu2_S = np.sum(self.mu2_S,0)# Q, - target += (dL_dpsi2[:,:,:,None] * (self.mu2_S[:,None,None,:]*(Z*np.square(self.variances)[None,:])[None,None,:,:])).sum(0).sum(1) - #TODO: tensordot would gain some time here + def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): + self._psi_computations(Z, mu, S) +# mu2_S = np.sum(self.mu2_S, 0) # Q, +# import ipdb;ipdb.set_trace() +# prod = (np.eye(Z.shape[0])[:, None, :, None] * (np.dot(self.ZA, self.inner) * self.variances)[None, :, None]) +# psi2_dZ = prod.swapaxes(0, 1) + prod + psi2_dZ_old = (dL_dpsi2[:, :, :, None] * (self.mu2_S[:, None, None, :] * (Z * np.square(self.variances)[None, :])[None, None, :, :])).sum(0).sum(1) + target += psi2_dZ_old # .sum(0).sum(1) + # TODO: tensordot would gain some time here #---------------------------------------# # Precomputations # #---------------------------------------# - def _K_computations(self,X,X2): + def _K_computations(self, X, X2): if not (np.array_equal(X, self._Xcache) and np.array_equal(X2, self._X2cache)): self._Xcache = X.copy() if X2 is None: @@ -177,16 +199,18 @@ class linear(kernpart): self._X2cache = None else: self._X2cache = X2.copy() - self._dot_product = np.dot(X,X2.T) + self._dot_product = np.dot(X, X2.T) - def _psi_computations(self,Z,mu,S): - #here are the "statistics" for psi1 and psi2 - if not np.all(Z==self._Z): - #Z has changed, compute Z specific stuff - #self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q - self.ZZ = np.empty((Z.shape[0],Z.shape[0],Z.shape[1]),order='F') - [tdot(Z[:,i:i+1],self.ZZ[:,:,i].T) for i in xrange(Z.shape[1])] + def _psi_computations(self, Z, mu, S): + # here are the "statistics" for psi1 and psi2 + if not np.all(Z == self._Z): + # Z has changed, compute Z specific stuff + # self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q + self.ZZ = np.empty((Z.shape[0], Z.shape[0], Z.shape[1]), order='F') + [tdot(Z[:, i:i + 1], self.ZZ[:, :, i].T) for i in xrange(Z.shape[1])] self._Z = Z.copy() - if not (np.all(mu==self._mu) and np.all(S==self._S)): - self.mu2_S = np.square(mu)+S + self.ZA = Z * self.variances + if not (np.all(mu == self._mu) and np.all(S == self._S)): + self.mu2_S = np.square(mu) + S + self.inner = tdot(mu.T) + (np.diag(S.sum(0))) self._mu, self._S = mu.copy(), S.copy() diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py index 6333fb1c..793c2613 100644 --- a/GPy/models/Bayesian_GPLVM.py +++ b/GPy/models/Bayesian_GPLVM.py @@ -308,6 +308,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): Slatentgrads = ax3.quiver(xlatent, S, Ulatent, Sg, color=colors, units=quiver_units, scale_units=quiver_scale_units, scale=quiver_scale) + ax3.set_ylim(0, 1.) xZ = np.tile(np.arange(0, Z.shape[0])[:, None], Z.shape[1]) UZ = np.zeros_like(Z) @@ -427,11 +428,11 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): cbarkmmdl.update_normal(imkmmdl) ax2.relim() - ax3.relim() + # ax3.relim() ax4.relim() ax5.relim() ax2.autoscale() - ax3.autoscale() + # ax3.autoscale() ax4.autoscale() ax5.autoscale() diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py index aa55ecd3..58f02cca 100644 --- a/GPy/models/sparse_GP.py +++ b/GPy/models/sparse_GP.py @@ -30,22 +30,22 @@ class sparse_GP(GP): """ def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False): - self.scale_factor = 100.0 # a scaling factor to help keep the algorithm stable + self.scale_factor = 100.0# a scaling factor to help keep the algorithm stable self.auto_scale_factor = False self.Z = Z self.M = Z.shape[0] self.likelihood = likelihood if X_variance is None: - self.has_uncertain_inputs = False + self.has_uncertain_inputs=False else: - assert X_variance.shape == X.shape - self.has_uncertain_inputs = True + assert X_variance.shape==X.shape + self.has_uncertain_inputs=True self.X_variance = X_variance GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X) - # normalize X uncertainty also + #normalize X uncertainty also if self.has_uncertain_inputs: self.X_variance /= np.square(self._Xstd) @@ -54,155 +54,156 @@ class sparse_GP(GP): # kernel computations, using BGPLVM notation self.Kmm = self.kern.K(self.Z) if self.has_uncertain_inputs: - self.psi0 = self.kern.psi0(self.Z, self.X, self.X_variance) - self.psi1 = self.kern.psi1(self.Z, self.X, self.X_variance).T - self.psi2 = self.kern.psi2(self.Z, self.X, self.X_variance) + self.psi0 = self.kern.psi0(self.Z,self.X, self.X_variance) + self.psi1 = self.kern.psi1(self.Z,self.X, self.X_variance).T + self.psi2 = self.kern.psi2(self.Z,self.X, self.X_variance) else: self.psi0 = self.kern.Kdiag(self.X) - self.psi1 = self.kern.K(self.Z, self.X) + self.psi1 = self.kern.K(self.Z,self.X) self.psi2 = None def _computations(self): - # TODO: find routine to multiply triangular matrices + #TODO: find routine to multiply triangular matrices sf = self.scale_factor - sf2 = sf ** 2 + sf2 = sf**2 - # The rather complex computations of psi2_beta_scaled + #The rather complex computations of psi2_beta_scaled if self.likelihood.is_heteroscedastic: - assert self.likelihood.D == 1 # TODO: what if the likelihood is heterscedatic and there are multiple independent outputs? + assert self.likelihood.D == 1 #TODO: what if the likelihood is heterscedatic and there are multiple independent outputs? if self.has_uncertain_inputs: - self.psi2_beta_scaled = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1) / sf2)).sum(0) + self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0) else: - tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N)) / sf) - # self.psi2_beta_scaled = np.dot(tmp,tmp.T) + tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf) + #self.psi2_beta_scaled = np.dot(tmp,tmp.T) self.psi2_beta_scaled = tdot(tmp) else: if self.has_uncertain_inputs: - self.psi2_beta_scaled = (self.psi2 * (self.likelihood.precision / sf2)).sum(0) + self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0) else: - tmp = self.psi1 * (np.sqrt(self.likelihood.precision) / sf) - # self.psi2_beta_scaled = np.dot(tmp,tmp.T) + tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf) + #self.psi2_beta_scaled = np.dot(tmp,tmp.T) self.psi2_beta_scaled = tdot(tmp) self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm) - self.V = (self.likelihood.precision / self.scale_factor) * self.likelihood.Y + self.V = (self.likelihood.precision/self.scale_factor)*self.likelihood.Y - # Compute A = L^-1 psi2 beta L^-T - # self. A = mdot(self.Lmi,self.psi2_beta_scaled,self.Lmi.T) - tmp = linalg.lapack.flapack.dtrtrs(self.Lm, self.psi2_beta_scaled.T, lower=1)[0] - self.A = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp.T), lower=1)[0] + #Compute A = L^-1 psi2 beta L^-T + #self. A = mdot(self.Lmi,self.psi2_beta_scaled,self.Lmi.T) + tmp = linalg.lapack.flapack.dtrtrs(self.Lm,self.psi2_beta_scaled.T,lower=1)[0] + self.A = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1)[0] - self.B = np.eye(self.M) / sf2 + self.A + self.B = np.eye(self.M)/sf2 + self.A self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) self.psi1V = np.dot(self.psi1, self.V) - tmp = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.Bi), lower=1, trans=1)[0] - self.C = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp.T), lower=1, trans=1)[0] + tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.Bi),lower=1,trans=1)[0] + self.C = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0] - # self.Cpsi1V = np.dot(self.C,self.psi1V) - # back substitute C into psi1V - tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0) - tmp, _ = linalg.lapack.flapack.dpotrs(self.LB, tmp, lower=1) - self.Cpsi1V, _ = linalg.lapack.flapack.dtrtrs(self.Lm, tmp, lower=1, trans=1) + #back substutue C into psi1V + tmp,info1 = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.psi1V),lower=1,trans=0) + tmp,info2 = linalg.lapack.flapack.dpotrs(self.LB,tmp,lower=1) + self.Cpsi1V,info3 = linalg.lapack.flapack.dtrtrs(self.Lm,tmp,lower=1,trans=1) + #self.Cpsi1V = np.dot(self.C,self.psi1V) - self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V, self.psi1V.T) # TODO: stabilize? - self.E = tdot(self.Cpsi1V / sf) + self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T) + + self.E = tdot(self.Cpsi1V/sf) # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case - self.dL_dpsi0 = -0.5 * self.D * (self.likelihood.precision * np.ones([self.N, 1])).flatten() - self.dL_dpsi1 = np.dot(self.Cpsi1V, self.V.T) + self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten() + self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T) if self.likelihood.is_heteroscedastic: if self.has_uncertain_inputs: - # self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB - # self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC - # self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD - self.dL_dpsi2 = 0.5 * self.likelihood.precision[:, None, None] * (self.D * (self.Kmmi - self.C / sf2) - self.E)[None, :, :] + #self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB + #self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC + #self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD + self.dL_dpsi2 = 0.5*self.likelihood.precision[:,None,None]*(self.D*(self.Kmmi - self.C/sf2) -self.E)[None,:,:] else: - # self.dL_dpsi1 += mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB - # self.dL_dpsi1 += -mdot(self.C,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)/sf2) #dC - # self.dL_dpsi1 += -mdot(self.E,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dD - self.dL_dpsi1 += np.dot(self.Kmmi - self.C / sf2 - self.E, self.psi1 * self.likelihood.precision.reshape(1, self.N)) + #self.dL_dpsi1 += mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB + #self.dL_dpsi1 += -mdot(self.C,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)/sf2) #dC + #self.dL_dpsi1 += -mdot(self.E,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dD + self.dL_dpsi1 += np.dot(self.Kmmi - self.C/sf2 -self.E,self.psi1*self.likelihood.precision.reshape(1,self.N)) self.dL_dpsi2 = None else: - # self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB - # self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C # dC - # self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E # dD - self.dL_dpsi2 = 0.5 * self.likelihood.precision * (self.D * (self.Kmmi - self.C / sf2) - self.E) + #self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB + #self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C # dC + #self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E # dD + self.dL_dpsi2 = 0.5*self.likelihood.precision*(self.D*(self.Kmmi - self.C/sf2) -self.E) if self.has_uncertain_inputs: - # repeat for each of the N psi_2 matrices - self.dL_dpsi2 = np.repeat(self.dL_dpsi2[None, :, :], self.N, axis=0) + #repeat for each of the N psi_2 matrices + self.dL_dpsi2 = np.repeat(self.dL_dpsi2[None,:,:],self.N,axis=0) else: - self.dL_dpsi1 += 2.*np.dot(self.dL_dpsi2, self.psi1) + self.dL_dpsi1 += 2.*np.dot(self.dL_dpsi2,self.psi1) self.dL_dpsi2 = None # Compute dL_dKmm - # self.dL_dKmm_old = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB - # self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC - # self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD - tmp = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.B), lower=1, trans=1)[0] - self.dL_dKmm = -0.5 * self.D * sf2 * linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp.T), lower=1, trans=1)[0] # dA - tmp = np.dot(self.D * self.C + self.E * sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1 - tmp = linalg.lapack.flapack.dpotrs(self.Lm, np.asfortranarray(tmp.T), lower=1)[0].T - self.dL_dKmm += 0.5 * (self.D * self.C / sf2 + self.E) + tmp # d(C+D) + #self.dL_dKmm_old = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB + #self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC + #self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD + tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.B),lower=1,trans=1)[0] + self.dL_dKmm = -0.5*self.D*sf2*linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0] #dA + tmp = np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1 + tmp = linalg.lapack.flapack.dpotrs(self.Lm,np.asfortranarray(tmp.T),lower=1)[0].T + self.dL_dKmm += 0.5*(self.D*self.C/sf2 + self.E) +tmp # d(C+D) - # the partial derivative vector for the likelihood - if self.likelihood.Nparams == 0: - # save computation here. + #the partial derivative vector for the likelihood + if self.likelihood.Nparams ==0: + #save computation here. self.partial_for_likelihood = None elif self.likelihood.is_heteroscedastic: raise NotImplementedError, "heteroscedatic derivates not implemented" - # self.partial_for_likelihood = - 0.5 * self.D*self.likelihood.precision + 0.5 * (self.likelihood.Y**2).sum(1)*self.likelihood.precision**2 #dA - # self.partial_for_likelihood += 0.5 * self.D * (self.psi0*self.likelihood.precision**2 - (self.psi2*self.Kmmi[None,:,:]*self.likelihood.precision[:,None,None]**2).sum(1).sum(1)/sf2) #dB - # self.partial_for_likelihood += 0.5 * self.D * np.sum(self.Bi*self.A)*self.likelihood.precision #dC - # self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD + #self.partial_for_likelihood = - 0.5 * self.D*self.likelihood.precision + 0.5 * (self.likelihood.Y**2).sum(1)*self.likelihood.precision**2 #dA + #self.partial_for_likelihood += 0.5 * self.D * (self.psi0*self.likelihood.precision**2 - (self.psi2*self.Kmmi[None,:,:]*self.likelihood.precision[:,None,None]**2).sum(1).sum(1)/sf2) #dB + #self.partial_for_likelihood += 0.5 * self.D * np.sum(self.Bi*self.A)*self.likelihood.precision #dC + #self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD else: - # likelihood is not heterscedatic - self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2 - self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision * sf2) - self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi, self.A) * self.likelihood.precision - self.partial_for_likelihood += self.likelihood.precision * (0.5 * trace_dot(self.psi2_beta_scaled, self.E * sf2) - np.trace(self.Cpsi1VVpsi1)) + #likelihood is not heterscedatic + self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * self.likelihood.trYYT*self.likelihood.precision**2 + self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2) + self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision + self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1)) def log_likelihood(self): """ Compute the (lower bound on the) log marginal likelihood """ - sf2 = self.scale_factor ** 2 + sf2 = self.scale_factor**2 if self.likelihood.is_heteroscedastic: - A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y) - B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2) + A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y) + B = -0.5*self.D*(np.sum(self.likelihood.precision.flatten()*self.psi0) - np.trace(self.A)*sf2) else: - A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT - B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2) - C = -0.5 * self.D * (self.B_logdet + self.M * np.log(sf2)) - D = 0.5 * np.trace(self.Cpsi1VVpsi1) - return A + B + C + D + A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT + B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2) + C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2)) + D = 0.5*np.trace(self.Cpsi1VVpsi1) + return A+B+C+D def _set_params(self, p): - self.Z = p[:self.M * self.Q].reshape(self.M, self.Q) - self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam]) - self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:]) + self.Z = p[:self.M*self.Q].reshape(self.M, self.Q) + self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam]) + self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:]) self._compute_kernel_matrices() if self.auto_scale_factor: - self.scale_factor = np.sqrt(self.psi2.sum(0).mean() * self.likelihood.precision) - # if self.auto_scale_factor: + self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision) + #if self.auto_scale_factor: # if self.likelihood.is_heteroscedastic: # self.scale_factor = max(1,np.sqrt(self.psi2_beta_scaled.sum(0).mean())) # else: # self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision) - # self.scale_factor = 1. + #self.scale_factor = 1. self._computations() def _get_params(self): - return np.hstack([self.Z.flatten(), GP._get_params(self)]) + return np.hstack([self.Z.flatten(),GP._get_params(self)]) def _get_param_names(self): - return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])], []) + GP._get_param_names(self) + return sum([['iip_%i_%i'%(i,j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])],[]) + GP._get_param_names(self) def update_likelihood_approximation(self): """ @@ -214,9 +215,9 @@ class sparse_GP(GP): if self.has_uncertain_inputs: raise NotImplementedError, "EP approximation not implemented for uncertain inputs" else: - self.likelihood.fit_DTC(self.Kmm, self.psi1) - # self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) - self._set_params(self._get_params()) # update the GP + self.likelihood.fit_DTC(self.Kmm,self.psi1) + #self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) + self._set_params(self._get_params()) # update the GP def _log_likelihood_gradients(self): @@ -226,13 +227,13 @@ class sparse_GP(GP): """ Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel """ - dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z) + dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z) if self.has_uncertain_inputs: - dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance) - dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T, self.Z, self.X, self.X_variance) - dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance) + dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_variance) + dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_variance) + dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z,self.X, self.X_variance) else: - dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.Z, self.X) + dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1,self.Z,self.X) dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X) return dL_dtheta @@ -243,22 +244,22 @@ class sparse_GP(GP): """ dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ if self.has_uncertain_inputs: - dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance) + dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_variance) dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance) else: - dL_dZ += self.kern.dK_dX(self.dL_dpsi1, self.Z, self.X) + dL_dZ += self.kern.dK_dX(self.dL_dpsi1,self.Z,self.X) return dL_dZ def _raw_predict(self, Xnew, which_parts='all', full_cov=False): """Internal helper function for making predictions, does not account for normalization""" Kx = self.kern.K(self.Z, Xnew) - mu = mdot(Kx.T, self.C / self.scale_factor, self.psi1V) + mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V) if full_cov: - Kxx = self.kern.K(Xnew, which_parts=which_parts) - var = Kxx - mdot(Kx.T, (self.Kmmi - self.C / self.scale_factor ** 2), Kx) # NOTE this won't work for plotting + Kxx = self.kern.K(Xnew,which_parts=which_parts) + var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting else: - Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts) - var = Kxx - np.sum(Kx * np.dot(self.Kmmi - self.C / self.scale_factor ** 2, Kx), 0) + Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts) + var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) - return mu, var[:, None] + return mu,var[:,None] diff --git a/GPy/testing/cgd_tests.py b/GPy/testing/cgd_tests.py index 8a0fa7a8..07c3d3aa 100644 --- a/GPy/testing/cgd_tests.py +++ b/GPy/testing/cgd_tests.py @@ -5,7 +5,7 @@ Created on 26 Apr 2013 ''' import unittest import numpy -from GPy.inference.conjugate_gradient_descent import CGD +from GPy.inference.conjugate_gradient_descent import CGD, RUNNING import pylab import time from scipy.optimize.optimize import rosen, rosen_der @@ -14,17 +14,62 @@ from scipy.optimize.optimize import rosen, rosen_der class Test(unittest.TestCase): def testMinimizeSquare(self): - f = lambda x: x ** 2 + 2 * x - 2 + N = 2 + A = numpy.random.rand(N) * numpy.eye(N) + b = numpy.random.rand(N) * 0 + f = lambda x: numpy.dot(x.T.dot(A), x) - numpy.dot(x.T, b) + df = lambda x: numpy.dot(A, x) - b + + opt = CGD() + + restarts = 10 + for _ in range(restarts): + try: + x0 = numpy.random.randn(N) * .5 + res = opt.fmin(f, df, x0, messages=0, + maxiter=1000, gtol=1e-10) + assert numpy.allclose(res[0], 0, atol=1e-3) + break + except: + # RESTART + pass + else: + raise AssertionError("Test failed for {} restarts".format(restarts)) + + def testRosen(self): + N = 2 + f = rosen + df = rosen_der + x0 = numpy.random.randn(N) * .5 + + opt = CGD() + + restarts = 10 + for _ in range(restarts): + try: + x0 = numpy.random.randn(N) * .5 + res = opt.fmin(f, df, x0, messages=0, + maxiter=1000, gtol=1e-10) + assert numpy.allclose(res[0], 1, atol=1e-5) + break + except: + # RESTART + pass + else: + raise AssertionError("Test failed for {} restarts".format(restarts)) if __name__ == "__main__": - # import sys;sys.argv = ['', 'Test.testMinimizeSquare'] +# import sys;sys.argv = ['', +# 'Test.testMinimizeSquare', +# 'Test.testRosen', +# ] # unittest.main() + N = 2 A = numpy.random.rand(N) * numpy.eye(N) - b = numpy.random.rand(N) -# f = lambda x: numpy.dot(x.T.dot(A), x) + numpy.dot(x.T, b) + b = numpy.random.rand(N) * 0 +# f = lambda x: numpy.dot(x.T.dot(A), x) - numpy.dot(x.T, b) # df = lambda x: numpy.dot(A, x) - b - f = rosen df = rosen_der x0 = numpy.random.randn(N) * .5 @@ -48,14 +93,21 @@ if __name__ == "__main__": optplts, = ax.plot3D([x0[0]], [x0[1]], zs=f(x0), marker='o', color='r') raw_input("enter to start optimize") + res = [0] - def callback(x, *a, **kw): - xopts.append(x.copy()) + def callback(*r): + xopts.append(r[0].copy()) # time.sleep(.3) optplts._verts3d = [numpy.array(xopts)[:, 0], numpy.array(xopts)[:, 1], [f(xs) for xs in xopts]] fig.canvas.draw() + if r[-1] != RUNNING: + res[0] = r + + p, c = opt.fmin_async(f, df, x0.copy(), callback, messages=True, maxiter=1000, + report_every=20, gtol=1e-12) - res = opt.fmin(f, df, x0, callback, messages=True, maxiter=1000, report_every=1) pylab.ion() pylab.show() + + pass diff --git a/GPy/testing/kern_psi_stat_tests.py b/GPy/testing/kern_psi_stat_tests.py index 581de9be..6166bb89 100644 --- a/GPy/testing/kern_psi_stat_tests.py +++ b/GPy/testing/kern_psi_stat_tests.py @@ -9,21 +9,30 @@ import numpy as np import pylab __test__ = False +np.random.seed(0) + +def ard(p): + try: + if p.ARD: + return "ARD" + except: + pass + return "" class Test(unittest.TestCase): D = 9 - M = 5 - Nsamples = 3e6 + M = 3 + Nsamples = 6e6 def setUp(self): self.kerns = ( - GPy.kern.rbf(self.D), GPy.kern.rbf(self.D, ARD=True), - GPy.kern.linear(self.D), GPy.kern.linear(self.D, ARD=True), +# GPy.kern.rbf(self.D), GPy.kern.rbf(self.D, ARD=True), + GPy.kern.linear(self.D, ARD=False), GPy.kern.linear(self.D, ARD=True), GPy.kern.linear(self.D) + GPy.kern.bias(self.D), - GPy.kern.rbf(self.D) + GPy.kern.bias(self.D), +# GPy.kern.rbf(self.D) + GPy.kern.bias(self.D), GPy.kern.linear(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D), - GPy.kern.rbf(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D), - GPy.kern.bias(self.D), GPy.kern.white(self.D), +# GPy.kern.rbf(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D), +# GPy.kern.bias(self.D), GPy.kern.white(self.D), ) self.q_x_mean = np.random.randn(self.D) self.q_x_variance = np.exp(np.random.randn(self.D)) @@ -66,18 +75,21 @@ class Test(unittest.TestCase): K_ += K diffs.append(((psi2 - (K_ / (i + 1))) ** 2).mean()) K_ /= self.Nsamples / Nsamples + msg = "psi2: {}".format("+".join([p.name + ard(p) for p in kern.parts])) try: -# pylab.figure("+".join([p.name for p in kern.parts]) + "psi2") -# pylab.plot(diffs) + pylab.figure(msg) + pylab.plot(diffs) self.assertTrue(np.allclose(psi2.squeeze(), K_, rtol=1e-1, atol=.1), - msg="{}: not matching".format("+".join([p.name for p in kern.parts]))) + msg=msg + ": not matching") except: - print "{}: not matching".format(kern.parts[0].name) + import ipdb;ipdb.set_trace() + kern.psi2(self.Z, self.q_x_mean, self.q_x_variance) + print msg + ": not matching" if __name__ == "__main__": import sys;sys.argv = ['', - 'Test.test_psi0', - 'Test.test_psi1', +# 'Test.test_psi0', +# 'Test.test_psi1', 'Test.test_psi2'] unittest.main() diff --git a/GPy/testing/psi_stat_tests.py b/GPy/testing/psi_stat_tests.py index 044f7fca..40c98619 100644 --- a/GPy/testing/psi_stat_tests.py +++ b/GPy/testing/psi_stat_tests.py @@ -106,18 +106,18 @@ if __name__ == "__main__": import sys interactive = 'i' in sys.argv if interactive: - N, M, Q, D = 30, 5, 4, 30 - X = numpy.random.rand(N, Q) - k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) - K = k.K(X) - Y = numpy.random.multivariate_normal(numpy.zeros(N), K, D).T - Y -= Y.mean(axis=0) - k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) - m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M) - m.ensure_default_constraints() - m.randomize() -# self.assertTrue(m.checkgrad()) - +# N, M, Q, D = 30, 5, 4, 30 +# X = numpy.random.rand(N, Q) +# k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) +# K = k.K(X) +# Y = numpy.random.multivariate_normal(numpy.zeros(N), K, D).T +# Y -= Y.mean(axis=0) +# k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) +# m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M) +# m.ensure_default_constraints() +# m.randomize() +# # self.assertTrue(m.checkgrad()) + numpy.random.seed(0) Q = 5 N = 50 M = 10 @@ -126,11 +126,11 @@ if __name__ == "__main__": X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1) Z = numpy.random.permutation(X)[:M] Y = X.dot(numpy.random.randn(Q, D)) - kernel = GPy.kern.bias(Q) - - kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q), - GPy.kern.linear(Q) + GPy.kern.bias(Q), - GPy.kern.rbf(Q) + GPy.kern.bias(Q)] +# kernel = GPy.kern.bias(Q) +# +# kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q), +# GPy.kern.linear(Q) + GPy.kern.bias(Q), +# GPy.kern.rbf(Q) + GPy.kern.bias(Q)] # for k in kernels: # m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z, @@ -143,11 +143,13 @@ if __name__ == "__main__": # M=M, kernel=kernel) # m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z, # M=M, kernel=kernel) - m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, - M=M, kernel=GPy.kern.rbf(Q)) +# m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, +# M=M, kernel=GPy.kern.rbf(Q)) m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, - M=M, kernel=GPy.kern.linear(Q) + GPy.kern.bias(Q)) - m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, - M=M, kernel=GPy.kern.rbf(Q) + GPy.kern.bias(Q)) + M=M, kernel=GPy.kern.linear(Q)) + m3.ensure_default_constraints() + # + GPy.kern.bias(Q)) +# m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, +# M=M, kernel=GPy.kern.rbf(Q) + GPy.kern.bias(Q)) else: unittest.main()