From 06336bf0d461bd90c3ca845e24803abeb4950f18 Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Tue, 6 May 2014 16:30:17 +0100 Subject: [PATCH 1/8] switch psi2 statistics design --- GPy/core/sparse_gp.py | 2 +- .../var_dtc_parallel.py | 28 +-- GPy/kern/_src/add.py | 2 +- GPy/kern/_src/kern.py | 2 +- GPy/kern/_src/kernel_slice_operations.py | 4 +- GPy/kern/_src/linear.py | 2 +- GPy/kern/_src/psi_comp/ssrbf_psi_comp.py | 192 ++++++++++++++---- GPy/kern/_src/rbf.py | 120 ++++++----- GPy/kern/_src/static.py | 2 +- GPy/models/ss_gplvm.py | 11 +- GPy/util/initialization.py | 2 +- GPy/util/linalg.py | 2 +- 12 files changed, 245 insertions(+), 124 deletions(-) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 7552b8ac..333fd526 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -66,7 +66,7 @@ class SparseGP(GP): #gradients wrt Z self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z) self.Z.gradient += self.kern.gradients_Z_expectations( - self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X) + self.grad_dict['dL_dpsi0'], self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X) else: #gradients wrt kernel self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X) diff --git a/GPy/inference/latent_function_inference/var_dtc_parallel.py b/GPy/inference/latent_function_inference/var_dtc_parallel.py index 87236e2a..7c347213 100644 --- a/GPy/inference/latent_function_inference/var_dtc_parallel.py +++ b/GPy/inference/latent_function_inference/var_dtc_parallel.py @@ -70,12 +70,13 @@ class VarDTC_minibatch(object): #see whether we've got a different noise variance for each datum beta = 1./np.fmax(likelihood.variance, 1e-6) het_noise = beta.size > 1 + if het_noise: + self.batchsize = 1 # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! #self.YYTfactor = beta*self.get_YYTfactor(Y) YYT_factor = Y trYYT = self.get_trYYT(Y) - psi2_full = np.zeros((num_inducing,num_inducing)) psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM psi0_full = 0 @@ -104,19 +105,18 @@ class VarDTC_minibatch(object): YRY_full += (beta_slice*np.square(Y_slice).sum(axis=-1)).sum() else: psi0_full += psi0.sum() - psi1Y_full += np.dot(Y_slice.T,psi1) # DxM - + psi1Y_full += np.dot(Y_slice.T,psi1) # DxM if uncertain_inputs: if het_noise: - psi2_full += np.einsum('n,nmo->mo',beta_slice,psi2) + psi2_full += beta_slice*psi2 else: - psi2_full += psi2.sum(axis=0) + psi2_full += psi2 else: if het_noise: - psi2_full += np.einsum('n,nm,no->mo',beta_slice,psi1,psi1) + psi2_full += beta_slice*np.outer(psi1,psi1) else: - psi2_full += tdot(psi1.T) + psi2_full += np.outer(psi1,psi1) if not het_noise: psi0_full *= beta @@ -223,7 +223,7 @@ class VarDTC_minibatch(object): psi2 = None if het_noise: - beta = beta[n_start:n_end] + beta = beta[n_start] # assuming batchsize==1 betaY = beta*Y_slice betapsi1 = np.einsum('n,nm->nm',beta,psi1) @@ -244,7 +244,7 @@ class VarDTC_minibatch(object): dL_dpsi1 = np.dot(betaY,v.T) if uncertain_inputs: - dL_dpsi2 = np.einsum('n,mo->nmo',beta * np.ones((n_end-n_start,)),dL_dpsi2R) + dL_dpsi2 = beta* dL_dpsi2R else: dL_dpsi1 += np.dot(betapsi1,dL_dpsi2R)*2. dL_dpsi2 = None @@ -262,11 +262,11 @@ class VarDTC_minibatch(object): dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0)-output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum(axis=-1) else: if uncertain_inputs: - psiR = np.einsum('mo,nmo->',dL_dpsi2R,psi2) + psiR = np.einsum('mo,mo->',dL_dpsi2R,psi2) else: psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R) - dL_dthetaL = ((np.square(betaY)).sum() + np.square(beta)*output_dim*(psi0.sum())-num_slice*output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum() + dL_dthetaL = ((np.square(betaY)).sum() + beta*beta*output_dim*(psi0.sum())-num_slice*output_dim*beta)/2. - beta*beta*psiR- (betaY*np.dot(betapsi1,v)).sum() if uncertain_inputs: grad_dict = {'dL_dpsi0':dL_dpsi0, @@ -296,7 +296,7 @@ def update_gradients(model): kern_grad = model.kern.gradient.copy() #gradients w.r.t. Z - model.Z.gradient[:,model.kern.active_dims] = model.kern.gradients_X(dL_dKmm, model.Z) + model.Z.gradient = model.kern.gradients_X(dL_dKmm, model.Z) isEnd = False while not isEnd: @@ -309,8 +309,8 @@ def update_gradients(model): kern_grad += model.kern.gradient #gradients w.r.t. Z - model.Z.gradient[:,model.kern.active_dims] += model.kern.gradients_Z_expectations( - grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice) + model.Z.gradient += model.kern.gradients_Z_expectations( + dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice) #gradients w.r.t. posterior parameters of X X_grad = model.kern.gradients_qX_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2']) diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 88b8e40c..9589d6be 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -119,7 +119,7 @@ class Add(CombinationKernel): eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) - def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): from static import White, Bias target = np.zeros(Z.shape) for p1 in self.parts: diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 70bd42b9..09eb6dd9 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -115,7 +115,7 @@ class Kern(Parameterized): """ raise NotImplementedError - def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): """ Returns the derivative of the objective wrt Z, using the chain rule through the expectation variables. diff --git a/GPy/kern/_src/kernel_slice_operations.py b/GPy/kern/_src/kernel_slice_operations.py index c1c8d7f1..3473ffce 100644 --- a/GPy/kern/_src/kernel_slice_operations.py +++ b/GPy/kern/_src/kernel_slice_operations.py @@ -124,9 +124,9 @@ def _slice_update_gradients_expectations(f): def _slice_gradients_Z_expectations(f): @wraps(f) - def wrap(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): with _Slice_wrap(self, Z, variational_posterior) as s: - ret = s.handle_return_array(f(self, dL_dpsi1, dL_dpsi2, s.X, s.X2)) + ret = s.handle_return_array(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2)) return ret return wrap diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index f9dacf02..007649b0 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -168,7 +168,7 @@ class Linear(Kern): else: self.variances.gradient += 2.*np.sum(dL_dpsi2 * self.psi2(Z, variational_posterior))/self.variances - def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): gamma = variational_posterior.binary_prob mu = variational_posterior.mean diff --git a/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py b/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py index d8414cfb..5e30b5c5 100644 --- a/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py +++ b/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py @@ -9,12 +9,23 @@ import numpy as np from GPy.util.caching import Cache_this @Cache_this(limit=1) -def _Z_distances(Z): - Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q - Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q - return Zhat, Zdist +def psicomputations(variance, lengthscale, Z, mu, S, gamma): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi0, psi1 and psi2 + # Produced intermediate results: + # _psi1 NxM + + psi0 = np.empty(mu.shape[0]) + psi0[:] = variance + psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma) + psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma) + return psi0, psi1, psi2 -@Cache_this(limit=1) def _psi1computations(variance, lengthscale, Z, mu, S, gamma): """ Z - MxQ @@ -22,15 +33,10 @@ def _psi1computations(variance, lengthscale, Z, mu, S, gamma): S - NxQ gamma - NxQ """ - # here are the "statistics" for psi1 and psi2 + # here are the "statistics" for psi1 # Produced intermediate results: # _psi1 NxM - # _dpsi1_dvariance NxM - # _dpsi1_dlengthscale NxMxQ - # _dpsi1_dZ NxMxQ - # _dpsi1_dgamma NxMxQ - # _dpsi1_dmu NxMxQ - # _dpsi1_dS NxMxQ + lengthscale2 = np.square(lengthscale) @@ -40,25 +46,15 @@ def _psi1computations(variance, lengthscale, Z, mu, S, gamma): _psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ _psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ _psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ - _psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom)) # NxMxQ - _psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ + _psi1_exponent1 = np.log(gamma[:,None,:]) - (_psi1_dist_sq + np.log(_psi1_denom))/2. # NxMxQ + _psi1_exponent2 = np.log(1.-gamma[:,None,:]) - (np.square(Z[None,:,:])/lengthscale2)/2. # NxMxQ _psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2) _psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM - _psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ - _psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ - _psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ _psi1 = variance * np.exp(_psi1_exp_sum) # NxM - _dpsi1_dvariance = _psi1 / variance # NxM - _dpsi1_dgamma = _psi1_q * (_psi1_exp_dist_sq/_psi1_denom_sqrt-_psi1_exp_Z) # NxMxQ - _dpsi1_dmu = _psi1_q * (_psi1_exp_dist_sq * _psi1_dist * _psi1_common) # NxMxQ - _dpsi1_dS = _psi1_q * (_psi1_exp_dist_sq * _psi1_common * 0.5 * (_psi1_dist_sq - 1.)) # NxMxQ - _dpsi1_dZ = _psi1_q * (- _psi1_common * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z) # NxMxQ - _dpsi1_dlengthscale = 2.*lengthscale*_psi1_q * (0.5*_psi1_common*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + 0.5*(1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z) # NxMxQ - return _psi1, _dpsi1_dvariance, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _dpsi1_dZ, _dpsi1_dlengthscale + return _psi1 -@Cache_this(limit=1) def _psi2computations(variance, lengthscale, Z, mu, S, gamma): """ Z - MxQ @@ -66,19 +62,14 @@ def _psi2computations(variance, lengthscale, Z, mu, S, gamma): S - NxQ gamma - NxQ """ - # here are the "statistics" for psi1 and psi2 + # here are the "statistics" for psi2 # Produced intermediate results: - # _psi2 NxMxM - # _psi2_dvariance NxMxM - # _psi2_dlengthscale NxMxMxQ - # _psi2_dZ NxMxMxQ - # _psi2_dgamma NxMxMxQ - # _psi2_dmu NxMxMxQ - # _psi2_dS NxMxMxQ + # _psi2 MxM lengthscale2 = np.square(lengthscale) - _psi2_Zhat, _psi2_Zdist = _Z_distances(Z) + _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q + _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ @@ -93,15 +84,130 @@ def _psi2computations(variance, lengthscale, Z, mu, S, gamma): _psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2) _psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max)) _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM - _psi2_q = np.square(variance) * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ + _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM + + return _psi2 + +def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): + ARD = (len(lengthscale)!=1) + + dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + + dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2 + + dL_dlengscale = dl_psi1 + dl_psi2 + if not ARD: + dL_dlengscale = dL_dlengscale.sum() + + dL_dgamma = dgamma_psi1 + dgamma_psi2 + dL_dmu = dmu_psi1 + dmu_psi2 + dL_dS = dS_psi1 + dS_psi2 + dL_dZ = dZ_psi1 + dZ_psi2 + + return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma + +def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma): + """ + dL_dpsi1 - NxM + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi1 + # Produced intermediate results: dL_dparams w.r.t. psi1 + # _dL_dvariance 1 + # _dL_dlengthscale Q + # _dL_dZ MxQ + # _dL_dgamma NxQ + # _dL_dmu NxQ + # _dL_dS NxQ + + lengthscale2 = np.square(lengthscale) + + # psi1 + _psi1_denom = S / lengthscale2 + 1. # NxQ + _psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ + _psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ + _psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom[:,None,:]) # NxMxQ + _psi1_common = gamma / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #NxQ + _psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom[:, None,:])) # NxMxQ + _psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ + _psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2) + _psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ + _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM + _psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ + _psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ + _psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ + _psi1 = variance * np.exp(_psi1_exp_sum) # NxM + _dL_dvariance = np.einsum('nm,nm->',dL_dpsi1, _psi1)/variance # 1 + _dL_dgamma = np.einsum('nm,nmq,nmq->nq',dL_dpsi1, _psi1_q, (_psi1_exp_dist_sq/_psi1_denom_sqrt[:,None,:]-_psi1_exp_Z)) # NxQ + _dL_dmu = np.einsum('nm, nmq, nmq, nmq, nq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_dist,_psi1_common) # NxQ + _dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ + _dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z)) + _dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z)) + +# _dpsi1_dmu = _psi1_q * (_psi1_exp_dist_sq * _psi1_dist * _psi1_common) # NxMxQ +# _dpsi1_dS = _psi1_q * (_psi1_exp_dist_sq * _psi1_common * 0.5 * (_psi1_dist_sq - 1.)) # NxMxQ +# _dpsi1_dZ = _psi1_q * (- _psi1_common * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z) # NxMxQ +# _dpsi1_dlengthscale = 2.*lengthscale*_psi1_q * (0.5*_psi1_common*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + 0.5*(1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z) # NxMxQ + + return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma + +def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + dL_dpsi2 - MxM + """ + # here are the "statistics" for psi2 + # Produced the derivatives w.r.t. psi2: + # _dL_dvariance 1 + # _dL_dlengthscale Q + # _dL_dZ MxQ + # _dL_dgamma NxQ + # _dL_dmu NxQ + # _dL_dS NxQ + + lengthscale2 = np.square(lengthscale) + + _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q + _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q + _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q + _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ + + # psi2 + _psi2_denom = 2.*S / lengthscale2 + 1. # NxQ + _psi2_denom_sqrt = np.sqrt(_psi2_denom) + _psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q + _psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom[:,None,None,:]) + _psi2_common = gamma/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # NxQ + _psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom[:,None,None,:])+np.log(gamma[:,None,None,:]) #N,M,M,Q + _psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ + _psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2) + _psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max)) + _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM + _psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ _psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ _psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ - _psi2 = np.square(variance) * np.exp(_psi2_exp_sum) # N,M,M - _dpsi2_dvariance = 2. * _psi2/variance # NxMxM - _dpsi2_dgamma = _psi2_q * (_psi2_exp_dist_sq/_psi2_denom_sqrt - _psi2_exp_Z) # NxMxMxQ - _dpsi2_dmu = _psi2_q * (-2.*_psi2_common*_psi2_mudist * _psi2_exp_dist_sq) # NxMxMxQ - _dpsi2_dS = _psi2_q * (_psi2_common * (2.*_psi2_mudist_sq - 1.) * _psi2_exp_dist_sq) # NxMxMxQ - _dpsi2_dZ = 2.*_psi2_q * (_psi2_common*(-_psi2_Zdist*_psi2_denom+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z) # NxMxMxQ - _dpsi2_dlengthscale = 2.*lengthscale* _psi2_q * (_psi2_common*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) # NxMxMxQ + _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM + _dL_dvariance = np.einsum('mo,mo->',dL_dpsi2,_psi2)*2./variance + _dL_dgamma = np.einsum('mo,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,(_psi2_exp_dist_sq/_psi2_denom_sqrt[:,None,None,:] - _psi2_exp_Z)) + _dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq) + _dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq) + _dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z)) +# print _psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq #+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) + _dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z)) + + +# _dpsi2_dvariance = 2. * _psi2/variance # NxMxM +# _dpsi2_dgamma = _psi2_q * (_psi2_exp_dist_sq/_psi2_denom_sqrt - _psi2_exp_Z) # NxMxMxQ +# _dpsi2_dmu = _psi2_q * (-2.*_psi2_common*_psi2_mudist * _psi2_exp_dist_sq) # NxMxMxQ +# _dpsi2_dS = _psi2_q * (_psi2_common * (2.*_psi2_mudist_sq - 1.) * _psi2_exp_dist_sq) # NxMxMxQ +# _dpsi2_dZ = 2.*_psi2_q * (_psi2_common*(-_psi2_Zdist*_psi2_denom+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z) # NxMxMxQ +# _dpsi2_dlengthscale = 2.*lengthscale* _psi2_q * (_psi2_common*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) # NxMxMxQ - return _psi2, _dpsi2_dvariance, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _dpsi2_dZ, _dpsi2_dlengthscale + return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index e0071fb9..5944e765 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -41,9 +41,11 @@ class RBF(Stationary): #---------------------------------------# def psi0(self, Z, variational_posterior): - if self.useGPU: - if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): + if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): + if self.useGPU: return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0] + else: + return ssrbf_psi_comp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0] else: return self.Kdiag(variational_posterior.mean) @@ -52,7 +54,7 @@ class RBF(Stationary): if self.useGPU: return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1] else: - psi1, _, _, _, _, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + return ssrbf_psi_comp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1] else: _, _, _, psi1 = self._psi1computations(Z, variational_posterior) return psi1 @@ -62,7 +64,7 @@ class RBF(Stationary): if self.useGPU: return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2] else: - psi2, _, _, _, _, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + return ssrbf_psi_comp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2] else: _, _, _, _, psi2 = self._psi2computations(Z, variational_posterior) return psi2 @@ -73,26 +75,30 @@ class RBF(Stationary): if self.useGPU: self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) else: +# dL_dvar, dL_dlengscale, dL_dZ, dL_dgamma, dL_dmu, dL_dS = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) + dL_dvar, dL_dlengscale, _, _, _, _ = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) + self.variance.gradient = dL_dvar + self.lengthscale.gradient = dL_dlengscale - _, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) - _, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) - - #contributions from psi0: - self.variance.gradient = np.sum(dL_dpsi0) - - #from psi1 - self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance) - if self.ARD: - self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) - else: - self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum() - - #from psi2 - self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum() - if self.ARD: - self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) - else: - self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum() +# _, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) +# _, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) +# +# #contributions from psi0: +# self.variance.gradient = np.sum(dL_dpsi0) +# +# #from psi1 +# self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance) +# if self.ARD: +# self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) +# else: +# self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum() +# +# #from psi2 +# self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum() +# if self.ARD: +# self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) +# else: +# self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum() elif isinstance(variational_posterior, variational.NormalPosterior): l2 = self.lengthscale**2 @@ -125,22 +131,25 @@ class RBF(Stationary): else: raise ValueError, "unknown distriubtion received for psi-statistics" - def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): # Spike-and-Slab GPLVM if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): if self.useGPU: return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) else: - _, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) - _, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) - - #psi1 - grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0) - - #psi2 - grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1) - - return grad + _, _, dL_dZ, _, _, _ = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) + return dL_dZ + +# _, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) +# _, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) +# +# #psi1 +# grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0) +# +# #psi2 +# grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1) +# +# return grad elif isinstance(variational_posterior, variational.NormalPosterior): l2 = self.lengthscale **2 @@ -166,26 +175,29 @@ class RBF(Stationary): if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): if self.useGPU: return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) - else: - ndata = variational_posterior.mean.shape[0] - - _, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) - _, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) - - #psi1 - grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1) - grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1) - grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1) - - #psi2 - grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1) - grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1) - grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1) - - if self.group_spike_prob: - grad_gamma[:] = grad_gamma.mean(axis=0) - - return grad_mu, grad_S, grad_gamma + else: + _, _, _, dL_dmu, dL_dS, dL_dgamma = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) + return dL_dmu, dL_dS, dL_dgamma + +# ndata = variational_posterior.mean.shape[0] +# +# _, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) +# _, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) +# +# #psi1 +# grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1) +# grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1) +# grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1) +# +# #psi2 +# grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1) +# grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1) +# grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1) +# +# if self.group_spike_prob: +# grad_gamma[:] = grad_gamma.mean(axis=0) +# +# return grad_mu, grad_S, grad_gamma elif isinstance(variational_posterior, variational.NormalPosterior): diff --git a/GPy/kern/_src/static.py b/GPy/kern/_src/static.py index d853b4e0..68884937 100644 --- a/GPy/kern/_src/static.py +++ b/GPy/kern/_src/static.py @@ -25,7 +25,7 @@ class Static(Kern): def gradients_X_diag(self, dL_dKdiag, X): return np.zeros(X.shape) - def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): return np.zeros(Z.shape) def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): diff --git a/GPy/models/ss_gplvm.py b/GPy/models/ss_gplvm.py index 57be302a..dba12700 100644 --- a/GPy/models/ss_gplvm.py +++ b/GPy/models/ss_gplvm.py @@ -11,7 +11,7 @@ from ..likelihoods import Gaussian from ..inference.optimization import SCG from ..util import linalg from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior -from ..inference.latent_function_inference.var_dtc_parallel import update_gradients +from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU @@ -41,7 +41,7 @@ class SSGPLVM(SparseGP): if X_variance is None: # The variance of the variational approximation (S) X_variance = np.random.uniform(0,.1,X.shape) - gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation + gamma = np.empty_like(X, order='F') # The posterior probabilities of the binary variable in the variational approximation gamma[:] = 0.5 + 0.01 * np.random.randn(X.shape[0], input_dim) if group_spike: @@ -60,12 +60,15 @@ class SSGPLVM(SparseGP): pi = np.empty((input_dim)) pi[:] = 0.5 self.variational_prior = SpikeAndSlabPrior(pi=pi) # the prior probability of the latent binary variable b + + X = np.asfortranarray(X) + X_variance = np.asfortranarray(X_variance) + gamma = np.asfortranarray(gamma) X = SpikeAndSlabPosterior(X, X_variance, gamma) if group_spike: kernel.group_spike_prob = True self.variational_prior.group_spike_prob = True - SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs) self.add_parameter(self.X, index=0) @@ -76,7 +79,7 @@ class SSGPLVM(SparseGP): X.mean.gradient, X.variance.gradient, X.binary_prob.gradient = X_grad def parameters_changed(self): - if isinstance(self.inference_method, VarDTC_GPU): + if isinstance(self.inference_method, VarDTC_GPU) or isinstance(self.inference_method, VarDTC_minibatch): update_gradients(self) return diff --git a/GPy/util/initialization.py b/GPy/util/initialization.py index 22e63b6b..dd14c9a5 100644 --- a/GPy/util/initialization.py +++ b/GPy/util/initialization.py @@ -8,7 +8,7 @@ import numpy as np from GPy.util.pca import pca def initialize_latent(init, input_dim, Y): - Xr = np.random.randn(Y.shape[0], input_dim) + Xr = np.asfortranarray(np.random.randn(Y.shape[0], input_dim)) if init == 'PCA': p = pca(Y) PC = p.project(Y, min(input_dim, Y.shape[1])) diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index b204f813..661a2b47 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -123,7 +123,7 @@ def dtrtrs(A, B, lower=1, trans=0, unitdiag=0): :returns: """ - A = force_F_ordered(A) + A = np.asfortranarray(A) #Note: B does not seem to need to be F ordered! return lapack.dtrtrs(A, B, lower=lower, trans=trans, unitdiag=unitdiag) From 8531c4bff0946d6b552cfe755f843bb07849f19f Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 12 May 2014 16:23:01 +0100 Subject: [PATCH 2/8] [params] indexing with boolean arrays switched off, rases proper error now --- GPy/core/parameterization/param.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index 3ccbd169..7dae04b8 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -137,6 +137,8 @@ class Param(OptimizationHandlable, ObsAr): slice_index = self._current_slice_ def f(a): a, b = a + if isinstance(a, numpy.ndarray) and a.dtype == bool: + raise ValueError, "Boolean indexing not implemented, use Param[np.where(index)] to index by boolean arrays!" if a not in (slice(None), Ellipsis): if isinstance(a, slice): start, stop, step = a.indices(b) From 02b5ee1e4604b1ce6d2c5dced4a9366aea4e5365 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 15 May 2014 11:29:20 +0100 Subject: [PATCH 3/8] [parameterized] restructered a lot and finalized some stuff --- GPy/core/parameterization/lists_and_dicts.py | 27 +- GPy/core/parameterization/param.py | 20 +- GPy/core/parameterization/parameter_core.py | 351 +++++++------------ GPy/core/parameterization/parameterized.py | 215 ++++++++---- GPy/core/parameterization/variational.py | 16 +- GPy/kern/_src/add.py | 6 +- GPy/kern/_src/kern.py | 6 +- GPy/plotting/matplot_dep/kernel_plots.py | 2 +- GPy/testing/parameterized_tests.py | 2 +- GPy/testing/pickle_tests.py | 34 +- 10 files changed, 354 insertions(+), 325 deletions(-) diff --git a/GPy/core/parameterization/lists_and_dicts.py b/GPy/core/parameterization/lists_and_dicts.py index 084ab0db..96cf363c 100644 --- a/GPy/core/parameterization/lists_and_dicts.py +++ b/GPy/core/parameterization/lists_and_dicts.py @@ -38,7 +38,12 @@ class ArrayList(list): raise ValueError, "{} is not in list".format(item) pass -class ObservablesList(object): +class ObserverList(object): + """ + A list which containts the observables. + It only holds weak references to observers, such that unbound + observers dont dangle in memory. + """ def __init__(self): self._poc = [] @@ -46,27 +51,30 @@ class ObservablesList(object): p,o,c = self._poc[ind] return p, o(), c - def remove(self, priority, observable, callble): + def remove(self, priority, observer, callble): """ + Remove one observer, which had priority and callble. """ self.flush() for i in range(len(self) - 1, -1, -1): p,o,c = self[i] - if priority==p and observable==o and callble==c: + if priority==p and observer==o and callble==c: del self._poc[i] def __repr__(self): return self._poc.__repr__() - - def add(self, priority, observable, callble): - if observable is not None: + def add(self, priority, observer, callble): + """ + Add an observer with priority and callble + """ + if observer is not None: ins = 0 for pr, _, _ in self: if priority > pr: break ins += 1 - self._poc.insert(ins, (priority, weakref.ref(observable), callble)) + self._poc.insert(ins, (priority, weakref.ref(observer), callble)) def __str__(self): ret = [] @@ -83,6 +91,9 @@ class ObservablesList(object): return '\n'.join(ret) def flush(self): + """ + Make sure all weak references, which point to nothing are flushed (deleted) + """ self._poc = [(p,o,c) for p,o,c in self._poc if o() is not None] def __iter__(self): @@ -95,7 +106,7 @@ class ObservablesList(object): return self._poc.__len__() def __deepcopy__(self, memo): - s = ObservablesList() + s = ObserverList() for p,o,c in self: import copy s.add(p, copy.deepcopy(o, memo), copy.deepcopy(c, memo)) diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index b2103f43..b0655e03 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -4,7 +4,7 @@ import itertools import numpy np = numpy -from parameter_core import OptimizationHandlable, adjust_name_for_printing +from parameter_core import Parameterizable, adjust_name_for_printing from observable_array import ObsAr ###### printing @@ -16,7 +16,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision __print_threshold__ = 5 ###### -class Param(OptimizationHandlable, ObsAr): +class Param(Parameterizable, ObsAr): """ Parameter object for GPy models. @@ -42,7 +42,7 @@ class Param(OptimizationHandlable, ObsAr): """ __array_priority__ = -1 # Never give back Param _fixes_ = None - _parameters_ = [] + parameters = [] def __new__(cls, name, input_array, default_constraint=None): obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array)) obj._current_slice_ = (slice(obj.shape[0]),) @@ -87,6 +87,9 @@ class Param(OptimizationHandlable, ObsAr): @property def param_array(self): + """ + As we are a leaf, this just returns self + """ return self @property @@ -139,6 +142,9 @@ class Param(OptimizationHandlable, ObsAr): def _raveled_index_for(self, obj): return self._raveled_index() + #=========================================================================== + # Index recreation + #=========================================================================== def _expand_index(self, slice_index=None): # this calculates the full indexing arrays from the slicing objects given by get_item for _real..._ attributes # it basically translates slices to their respective index arrays and turns negative indices around @@ -177,15 +183,17 @@ class Param(OptimizationHandlable, ObsAr): This will function will just call visit on self, as Param are leaf nodes. """ + self.__visited = True visit(self, *args, **kwargs) - + self.__visited = False + def traverse_parents(self, visit, *args, **kwargs): """ Traverse the hierarchy upwards, visiting all parents and their children, except self. See "visitor pattern" in literature. This is implemented in pre-order fashion. - + Example: - + parents = [] self.traverse_parents(parents.append) print parents diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 8cda1216..f0fbf4ea 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -17,7 +17,7 @@ from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, import numpy as np import re -__updated__ = '2014-05-12' +__updated__ = '2014-05-15' class HierarchyError(Exception): """ @@ -52,13 +52,23 @@ class Observable(object): _updated = True def __init__(self, *args, **kwargs): super(Observable, self).__init__() - from lists_and_dicts import ObservablesList - self.observers = ObservablesList() + from lists_and_dicts import ObserverList + self.observers = ObserverList() def add_observer(self, observer, callble, priority=0): + """ + Add an observer `observer` with the callback `callble` + and priority `priority` to this observers list. + """ self.observers.add(priority, observer, callble) def remove_observer(self, observer, callble=None): + """ + Either (if callble is None) remove all callables, + which were added alongside observer, + or remove callable `callble` which was added alongside + the observer `observer`. + """ to_remove = [] for poc in self.observers: _, obs, clble = poc @@ -91,10 +101,6 @@ class Observable(object): break callble(self, which=which) -#=============================================================================== -# Foundation framework for parameterized and param objects: -#=============================================================================== - class Parentable(object): """ Enable an Object to have a parent. @@ -172,7 +178,11 @@ class Pickleable(object): # copy and pickling #=========================================================================== def copy(self): - """Returns a (deep) copy of the current model""" + """ + Returns a (deep) copy of the current parameter handle. + + All connections to parents of the copy will be cut. + """ #raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy" import copy memo = {} @@ -196,12 +206,11 @@ class Pickleable(object): return s def __getstate__(self): - ignore_list = ([#'_parent_', '_parent_index_', - #'observers', - '_param_array_', '_gradient_array_', '_fixes_', - '_Cacher_wrap__cachers'] - #+ self.parameter_names(recursive=False) - ) + ignore_list = ['_param_array_', # parameters get set from bottom to top + '_gradient_array_', # as well as gradients + '_fixes_', # and fixes + '_Cacher_wrap__cachers', # never pickle cachers + ] dc = dict() for k,v in self.__dict__.iteritems(): if k not in ignore_list: @@ -246,7 +255,6 @@ class Gradcheckable(Pickleable, Parentable): """ raise HierarchyError, "This parameter is not in a model with a likelihood, and, therefore, cannot be gradient checked!" - class Nameable(Gradcheckable): """ Make an object nameable inside the hierarchy. @@ -285,41 +293,8 @@ class Nameable(Gradcheckable): return self._parent_.hierarchy_name() + "." + adjust(self.name) return adjust(self.name) -class Indexable(object): - """ - Enable enraveled indexes and offsets for this object. - The raveled index of an object is the index for its parameters in a flattened int array. - """ - def __init__(self, *a, **kw): - super(Indexable, self).__init__() - def _raveled_index(self): - """ - Flattened array of ints, specifying the index of this object. - This has to account for shaped parameters! - """ - raise NotImplementedError, "Need to be able to get the raveled Index" - - def _offset_for(self, param): - """ - Return the offset of the param inside this parameterized object. - This does not need to account for shaped parameters, as it - basically just sums up the parameter sizes which come before param. - """ - return 0 - #raise NotImplementedError, "shouldnt happen, offset required from non parameterization object?" - - def _raveled_index_for(self, param): - """ - get the raveled index for a param - that is an int array, containing the indexes for the flattened - param inside this parameterized logic. - """ - return param._raveled_index() - #raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?" - - -class Constrainable(Nameable, Indexable, Observable): +class Indexable(Nameable, Observable): """ Make an object constrainable with Priors and Transformations. TODO: Mappings!! @@ -330,7 +305,7 @@ class Constrainable(Nameable, Indexable, Observable): :func:`constrain()` and :func:`unconstrain()` are main methods here """ def __init__(self, name, default_constraint=None, *a, **kw): - super(Constrainable, self).__init__(name=name, *a, **kw) + super(Indexable, self).__init__(name=name, *a, **kw) self._default_constraint_ = default_constraint from index_operations import ParameterIndexOperations self.constraints = ParameterIndexOperations() @@ -352,6 +327,39 @@ class Constrainable(Nameable, Indexable, Observable): self._connect_fixes() self._notify_parent_change() + #=========================================================================== + # Indexable + #=========================================================================== + def _offset_for(self, param): + """ + Return the offset of the param inside this parameterized object. + This does not need to account for shaped parameters, as it + basically just sums up the parameter sizes which come before param. + """ + if param.has_parent(): + if param._parent_._get_original(param) in self.parameters: + return self._param_slices_[param._parent_._get_original(param)._parent_index_].start + return self._offset_for(param._parent_) + param._parent_._offset_for(param) + return 0 + + def _raveled_index_for(self, param): + """ + get the raveled index for a param + that is an int array, containing the indexes for the flattened + param inside this parameterized logic. + """ + from param import ParamConcatenation + if isinstance(param, ParamConcatenation): + return np.hstack((self._raveled_index_for(p) for p in param.params)) + return param._raveled_index() + self._offset_for(param) + + def _raveled_index(self): + """ + Flattened array of ints, specifying the index of this object. + This has to account for shaped parameters! + """ + return np.r_[:self.size] + #=========================================================================== # Fixing Parameters: #=========================================================================== @@ -406,9 +414,24 @@ class Constrainable(Nameable, Indexable, Observable): self._fixes_ = None del self.constraints[__fixed__] + #=========================================================================== + # Convenience for fixed + #=========================================================================== def _has_fixes(self): return hasattr(self, "_fixes_") and self._fixes_ is not None and self._fixes_.size == self.size + @property + def is_fixed(self): + for p in self.parameters: + if not p.is_fixed: return False + return True + + def _get_original(self, param): + # if advanced indexing is activated it happens that the array is a copy + # you can retrieve the original param through this method, by passing + # the copy here + return self.parameters[param._parent_index_] + #=========================================================================== # Prior Operations #=========================================================================== @@ -432,8 +455,7 @@ class Constrainable(Nameable, Indexable, Observable): def unset_priors(self, *priors): """ - Un-set all priors given from this parameter handle. - + Un-set all priors given (in *priors) from this parameter handle. """ return self._remove_from_index_operations(self.priors, priors) @@ -535,7 +557,7 @@ class Constrainable(Nameable, Indexable, Observable): self.constraints = ParameterIndexOperationsView(parent.constraints, parent._offset_for(self), self.size) self.priors = ParameterIndexOperationsView(parent.priors, parent._offset_for(self), self.size) self._fixes_ = None - for p in self._parameters_: + for p in self.parameters: p._parent_changed(parent) def _add_to_index_operations(self, which, reconstrained, what, warning): @@ -563,14 +585,13 @@ class Constrainable(Nameable, Indexable, Observable): removed = np.empty((0,), dtype=int) for t in transforms: unconstrained = which.remove(t, self._raveled_index()) - print unconstrained removed = np.union1d(removed, unconstrained) if t is __fixed__: self._highest_parent_._set_unfixed(self, unconstrained) return removed -class OptimizationHandlable(Constrainable): +class OptimizationHandlable(Indexable): """ This enables optimization handles on an Object as done in GPy 0.4. @@ -580,7 +601,7 @@ class OptimizationHandlable(Constrainable): super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw) def _get_params_transformed(self): - # transformed parameters (apply transformation rules) + # transformed parameters (apply un-transformation rules) p = self.param_array.copy() [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] if self.has_parent() and self.constraints[__fixed__].size != 0: @@ -592,6 +613,11 @@ class OptimizationHandlable(Constrainable): return p def _set_params_transformed(self, p): + """ + Set parameters p, but make sure they get transformed before setting. + This means, the optimizer sees p, whereas the model sees transformed(p), + such that, the parameters the model sees are in the right domain. + """ if not(p is self.param_array): if self.has_parent() and self.constraints[__fixed__].size != 0: fixes = np.ones(self.size).astype(bool) @@ -604,12 +630,33 @@ class OptimizationHandlable(Constrainable): self._trigger_params_changed() def _trigger_params_changed(self, trigger_parent=True): - [p._trigger_params_changed(trigger_parent=False) for p in self._parameters_] + """ + First tell all children to update, + then update yourself. + + If trigger_parent is True, we will tell the parent, otherwise not. + """ + [p._trigger_params_changed(trigger_parent=False) for p in self.parameters] self.notify_observers(None, None if trigger_parent else -np.inf) def _size_transformed(self): + """ + As fixes are not passed to the optimiser, the size of the model for the optimiser + is the size of all parameters minus the size of the fixes. + """ return self.size - self.constraints[__fixed__].size + def _transform_gradients(self, g): + """ + Transform the gradients by multiplying the gradient factor for each + constraint to it. + """ + if self.has_parent(): + return g + [np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__] + if self._has_fixes(): return g[self._fixes_] + return g + @property def num_params(self): """ @@ -628,8 +675,8 @@ class OptimizationHandlable(Constrainable): """ if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x) else: adjust = lambda x: x - if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)] - else: names = [adjust(x.name) for x in self._parameters_] + if recursive: names = [xi for x in self.parameters for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)] + else: names = [adjust(x.name) for x in self.parameters] if add_self: names = map(lambda x: adjust(self.name) + "." + x, names) return names @@ -651,7 +698,7 @@ class OptimizationHandlable(Constrainable): Randomize the model. Make this draw from the prior if one exists, else draw from given random generator - :param rand_gen: numpy random number generator which takes args and kwargs + :param rand_gen: np random number generator which takes args and kwargs :param flaot loc: loc parameter for random number generator :param float scale: scale parameter for random number generator :param args, kwargs: will be passed through to random number generator @@ -667,7 +714,7 @@ class OptimizationHandlable(Constrainable): # for all parameterized objects #=========================================================================== @property - def full_gradient(self): + def gradient_full(self): """ Note to users: This does not return the gradient in the right shape! Use self.gradient @@ -681,26 +728,43 @@ class OptimizationHandlable(Constrainable): return self._gradient_array_ def _propagate_param_grad(self, parray, garray): + """ + For propagating the param_array and gradient_array. + This ensures the in memory view of each subsequent array. + + 1.) connect param_array of children to self.param_array + 2.) tell all children to propagate further + """ pi_old_size = 0 - for pi in self._parameters_: + for pi in self.parameters: pislice = slice(pi_old_size, pi_old_size + pi.size) self.param_array[pislice] = pi.param_array.flat # , requirements=['C', 'W']).flat - self.full_gradient[pislice] = pi.full_gradient.flat # , requirements=['C', 'W']).flat + self.gradient_full[pislice] = pi.gradient_full.flat # , requirements=['C', 'W']).flat pi.param_array.data = parray[pislice].data - pi.full_gradient.data = garray[pislice].data + pi.gradient_full.data = garray[pislice].data pi._propagate_param_grad(parray[pislice], garray[pislice]) pi_old_size += pi.size class Parameterizable(OptimizationHandlable): + """ + A parameterisable class. + + This class provides the parameters list (ArrayList) and standard parameter handling, + such as {add|remove}_parameter(), traverse hierarchy and param_array, gradient_array + and the empty parameters_changed(). + + This class is abstract and should not be instantiated. + Use GPy.core.Parameterized() as node (or leaf) in the parameterized hierarchy. + Use GPy.core.Param() for a leaf in the parameterized hierarchy. + """ def __init__(self, *args, **kwargs): super(Parameterizable, self).__init__(*args, **kwargs) from GPy.core.parameterization.lists_and_dicts import ArrayList - self._parameters_ = ArrayList() + self.parameters = ArrayList() self._param_array_ = None - self.size = 0 self._added_names_ = set() self.__visited = False # for traversing in reverse order we need to know if we were here already @@ -735,7 +799,7 @@ class Parameterizable(OptimizationHandlable): if not self.__visited: visit(self, *args, **kwargs) self.__visited = True - for c in self._parameters_: + for c in self.parameters: c.traverse(visit, *args, **kwargs) self.__visited = False @@ -743,9 +807,9 @@ class Parameterizable(OptimizationHandlable): """ Traverse the hierarchy upwards, visiting all parents and their children except self. See "visitor pattern" in literature. This is implemented in pre-order fashion. - + Example: - + parents = [] self.traverse_parents(parents.append) print parents @@ -754,7 +818,7 @@ class Parameterizable(OptimizationHandlable): self.__visited = True self._parent_._traverse_parents(visit, *args, **kwargs) self.__visited = False - + def _traverse_parents(self, visit, *args, **kwargs): if not self.__visited: self.__visited = True @@ -779,7 +843,7 @@ class Parameterizable(OptimizationHandlable): @property def num_params(self): - return len(self._parameters_) + return len(self.parameters) def _add_parameter_name(self, param, ignore_added_names=False): pname = adjust_name_for_printing(param.name) @@ -812,132 +876,6 @@ class Parameterizable(OptimizationHandlable): self._remove_parameter_name(None, old_name) self._add_parameter_name(param) - def add_parameter(self, param, index=None, _ignore_added_names=False): - """ - :param parameters: the parameters to add - :type parameters: list of or one :py:class:`GPy.core.param.Param` - :param [index]: index of where to put parameters - - :param bool _ignore_added_names: whether the name of the parameter overrides a possibly existing field - - Add all parameters to this param class, you can insert parameters - at any given index using the :func:`list.insert` syntax - """ - if param in self._parameters_ and index is not None: - self.remove_parameter(param) - self.add_parameter(param, index) - # elif param.has_parent(): - # raise HierarchyError, "parameter {} already in another model ({}), create new object (or copy) for adding".format(param._short(), param._highest_parent_._short()) - elif param not in self._parameters_: - if param.has_parent(): - def visit(parent, self): - if parent is self: - raise HierarchyError, "You cannot add a parameter twice into the hierarchy" - param.traverse_parents(visit, self) - param._parent_.remove_parameter(param) - # make sure the size is set - if index is None: - self.constraints.update(param.constraints, self.size) - self.priors.update(param.priors, self.size) - self._parameters_.append(param) - else: - start = sum(p.size for p in self._parameters_[:index]) - self.constraints.shift_right(start, param.size) - self.priors.shift_right(start, param.size) - self.constraints.update(param.constraints, start) - self.priors.update(param.priors, start) - self._parameters_.insert(index, param) - - param.add_observer(self, self._pass_through_notify_observers, -np.inf) - - parent = self - while parent is not None: - parent.size += param.size - parent = parent._parent_ - - self._connect_parameters() - - self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names) - self._highest_parent_._notify_parent_change() - self._highest_parent_._connect_fixes() - - else: - raise HierarchyError, """Parameter exists already and no copy made""" - - - def add_parameters(self, *parameters): - """ - convenience method for adding several - parameters without gradient specification - """ - [self.add_parameter(p) for p in parameters] - - def remove_parameter(self, param): - """ - :param param: param object to remove from being a parameter of this parameterized object. - """ - if not param in self._parameters_: - raise RuntimeError, "Parameter {} does not belong to this object {}, remove parameters directly from their respective parents".format(param._short(), self.name) - - start = sum([p.size for p in self._parameters_[:param._parent_index_]]) - self._remove_parameter_name(param) - self.size -= param.size - del self._parameters_[param._parent_index_] - - param._disconnect_parent() - param.remove_observer(self, self._pass_through_notify_observers) - self.constraints.shift_left(start, param.size) - - self._connect_parameters() - self._notify_parent_change() - - parent = self._parent_ - while parent is not None: - parent.size -= param.size - parent = parent._parent_ - - self._highest_parent_._connect_parameters() - self._highest_parent_._connect_fixes() - self._highest_parent_._notify_parent_change() - - def _connect_parameters(self, ignore_added_names=False): - # connect parameterlist to this parameterized object - # This just sets up the right connection for the params objects - # to be used as parameters - # it also sets the constraints for each parameter to the constraints - # of their respective parents - if not hasattr(self, "_parameters_") or len(self._parameters_) < 1: - # no parameters for this class - return - if self.param_array.size != self.size: - self.param_array = np.empty(self.size, dtype=np.float64) - if self.gradient.size != self.size: - self._gradient_array_ = np.empty(self.size, dtype=np.float64) - - old_size = 0 - self._param_slices_ = [] - for i, p in enumerate(self._parameters_): - p._parent_ = self - p._parent_index_ = i - - pslice = slice(old_size, old_size + p.size) - # first connect all children - p._propagate_param_grad(self.param_array[pslice], self.full_gradient[pslice]) - # then connect children to self - self.param_array[pslice] = p.param_array.flat # , requirements=['C', 'W']).ravel(order='C') - self.full_gradient[pslice] = p.full_gradient.flat # , requirements=['C', 'W']).ravel(order='C') - - if not p.param_array.flags['C_CONTIGUOUS']: - raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS" - - p.param_array.data = self.param_array[pslice].data - p.full_gradient.data = self.full_gradient[pslice].data - - self._param_slices_.append(pslice) - - self._add_parameter_name(p, ignore_added_names=ignore_added_names) - old_size += p.size - #=========================================================================== # notification system #=========================================================================== @@ -947,30 +885,13 @@ class Parameterizable(OptimizationHandlable): self.notify_observers(which=which) #=========================================================================== - # Pickling - #=========================================================================== - def __setstate__(self, state): - super(Parameterizable, self).__setstate__(state) - self._connect_parameters() - self._connect_fixes() - self._notify_parent_change() - - self.parameters_changed() - - def copy(self): - c = super(Parameterizable, self).copy() - c._connect_parameters() - c._connect_fixes() - c._notify_parent_change() - return c - #=========================================================================== # From being parentable, we have to define the parent_change notification #=========================================================================== def _notify_parent_change(self): """ Notify all parameters that the parent has changed """ - for p in self._parameters_: + for p in self.parameters: p._parent_changed(self) def parameters_changed(self): diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index 67694a1b..cfb2171d 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -3,13 +3,10 @@ import numpy; np = numpy -import cPickle import itertools from re import compile, _pattern_type from param import ParamConcatenation -from parameter_core import Pickleable, Parameterizable, adjust_name_for_printing -from transformations import __fixed__ -from lists_and_dicts import ArrayList +from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing class ParametersChangedMeta(type): def __call__(self, *args, **kw): @@ -68,8 +65,7 @@ class Parameterized(Parameterizable): def __init__(self, name=None, parameters=[], *a, **kw): super(Parameterized, self).__init__(name=name, *a, **kw) self._in_init_ = True - self._parameters_ = ArrayList() - self.size = sum(p.size for p in self._parameters_) + self.size = sum(p.size for p in self.parameters) self.add_observer(self, self._parameters_changed_notification, -100) if not self._has_fixes(): self._fixes_ = None @@ -86,7 +82,7 @@ class Parameterized(Parameterizable): iamroot=True node = pydot.Node(id(self), shape='box', label=self.name)#, color='white') G.add_node(node) - for child in self._parameters_: + for child in self.parameters: child_node = child.build_pydot(G) G.add_edge(pydot.Edge(node, child_node))#, color='white')) @@ -102,58 +98,133 @@ class Parameterized(Parameterizable): return node #=========================================================================== - # Gradient control + # Add remove parameters: #=========================================================================== - def _transform_gradients(self, g): - if self.has_parent(): - return g - [numpy.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__] - if self._has_fixes(): return g[self._fixes_] - return g - - - #=========================================================================== - # Indexable - #=========================================================================== - def _offset_for(self, param): - # get the offset in the parameterized index array for param - if param.has_parent(): - if param._parent_._get_original(param) in self._parameters_: - return self._param_slices_[param._parent_._get_original(param)._parent_index_].start - return self._offset_for(param._parent_) + param._parent_._offset_for(param) - return 0 - - def _raveled_index_for(self, param): + def add_parameter(self, param, index=None, _ignore_added_names=False): """ - get the raveled index for a param - that is an int array, containing the indexes for the flattened - param inside this parameterized logic. - """ - if isinstance(param, ParamConcatenation): - return numpy.hstack((self._raveled_index_for(p) for p in param.params)) - return param._raveled_index() + self._offset_for(param) + :param parameters: the parameters to add + :type parameters: list of or one :py:class:`GPy.core.param.Param` + :param [index]: index of where to put parameters - def _raveled_index(self): - """ - get the raveled index for this object, - this is not in the global view of things! - """ - return numpy.r_[:self.size] + :param bool _ignore_added_names: whether the name of the parameter overrides a possibly existing field - #=========================================================================== - # Convenience for fixed, tied checking of param: - #=========================================================================== - @property - def is_fixed(self): - for p in self._parameters_: - if not p.is_fixed: return False - return True + Add all parameters to this param class, you can insert parameters + at any given index using the :func:`list.insert` syntax + """ + if param in self.parameters and index is not None: + self.remove_parameter(param) + self.add_parameter(param, index) + # elif param.has_parent(): + # raise HierarchyError, "parameter {} already in another model ({}), create new object (or copy) for adding".format(param._short(), param._highest_parent_._short()) + elif param not in self.parameters: + if param.has_parent(): + def visit(parent, self): + if parent is self: + raise HierarchyError, "You cannot add a parameter twice into the hierarchy" + param.traverse_parents(visit, self) + param._parent_.remove_parameter(param) + # make sure the size is set + if index is None: + self.constraints.update(param.constraints, self.size) + self.priors.update(param.priors, self.size) + self.parameters.append(param) + else: + start = sum(p.size for p in self.parameters[:index]) + self.constraints.shift_right(start, param.size) + self.priors.shift_right(start, param.size) + self.constraints.update(param.constraints, start) + self.priors.update(param.priors, start) + self.parameters.insert(index, param) - def _get_original(self, param): - # if advanced indexing is activated it happens that the array is a copy - # you can retrieve the original param through this method, by passing - # the copy here - return self._parameters_[param._parent_index_] + param.add_observer(self, self._pass_through_notify_observers, -np.inf) + + parent = self + while parent is not None: + parent.size += param.size + parent = parent._parent_ + + self._connect_parameters() + + self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names) + self._highest_parent_._notify_parent_change() + self._highest_parent_._connect_fixes() + + else: + raise HierarchyError, """Parameter exists already and no copy made""" + + + def add_parameters(self, *parameters): + """ + convenience method for adding several + parameters without gradient specification + """ + [self.add_parameter(p) for p in parameters] + + def remove_parameter(self, param): + """ + :param param: param object to remove from being a parameter of this parameterized object. + """ + if not param in self.parameters: + raise RuntimeError, "Parameter {} does not belong to this object {}, remove parameters directly from their respective parents".format(param._short(), self.name) + + start = sum([p.size for p in self.parameters[:param._parent_index_]]) + self._remove_parameter_name(param) + self.size -= param.size + del self.parameters[param._parent_index_] + + param._disconnect_parent() + param.remove_observer(self, self._pass_through_notify_observers) + self.constraints.shift_left(start, param.size) + + self._connect_parameters() + self._notify_parent_change() + + parent = self._parent_ + while parent is not None: + parent.size -= param.size + parent = parent._parent_ + + self._highest_parent_._connect_parameters() + self._highest_parent_._connect_fixes() + self._highest_parent_._notify_parent_change() + + def _connect_parameters(self, ignore_added_names=False): + # connect parameterlist to this parameterized object + # This just sets up the right connection for the params objects + # to be used as parameters + # it also sets the constraints for each parameter to the constraints + # of their respective parents + if not hasattr(self, "parameters") or len(self.parameters) < 1: + # no parameters for this class + return + if self.param_array.size != self.size: + self.param_array = np.empty(self.size, dtype=np.float64) + if self.gradient.size != self.size: + self._gradient_array_ = np.empty(self.size, dtype=np.float64) + + old_size = 0 + self._param_slices_ = [] + for i, p in enumerate(self.parameters): + p._parent_ = self + p._parent_index_ = i + + pslice = slice(old_size, old_size + p.size) + # first connect all children + p._propagate_param_grad(self.param_array[pslice], self.gradient_full[pslice]) + # then connect children to self + self.param_array[pslice] = p.param_array.flat # , requirements=['C', 'W']).ravel(order='C') + self.gradient_full[pslice] = p.gradient_full.flat # , requirements=['C', 'W']).ravel(order='C') + + if not p.param_array.flags['C_CONTIGUOUS']: + raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS" + + p.param_array.data = self.param_array[pslice].data + p.gradient_full.data = self.gradient_full[pslice].data + + self._param_slices_.append(pslice) + + self._add_parameter_name(p, ignore_added_names=ignore_added_names) + old_size += p.size #=========================================================================== # Get/set parameters: @@ -200,10 +271,28 @@ class Parameterized(Parameterizable): def __setattr__(self, name, val): # override the default behaviour, if setting a param, so broadcasting can by used - if hasattr(self, "_parameters_"): + if hasattr(self, "parameters"): pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False) - if name in pnames: self._parameters_[pnames.index(name)][:] = val; return + if name in pnames: self.parameters[pnames.index(name)][:] = val; return object.__setattr__(self, name, val); + + #=========================================================================== + # Pickling + #=========================================================================== + def __setstate__(self, state): + super(Parameterized, self).__setstate__(state) + self._connect_parameters() + self._connect_fixes() + self._notify_parent_change() + + self.parameters_changed() + def copy(self): + c = super(Parameterized, self).copy() + c._connect_parameters() + c._connect_fixes() + c._notify_parent_change() + return c + #=========================================================================== # Printing: #=========================================================================== @@ -211,22 +300,22 @@ class Parameterized(Parameterizable): return self.hierarchy_name() @property def flattened_parameters(self): - return [xi for x in self._parameters_ for xi in x.flattened_parameters] + return [xi for x in self.parameters for xi in x.flattened_parameters] @property def _parameter_sizes_(self): - return [x.size for x in self._parameters_] + return [x.size for x in self.parameters] @property def parameter_shapes(self): - return [xi for x in self._parameters_ for xi in x.parameter_shapes] + return [xi for x in self.parameters for xi in x.parameter_shapes] @property def _constraints_str(self): - return [cs for p in self._parameters_ for cs in p._constraints_str] + return [cs for p in self.parameters for cs in p._constraints_str] @property def _priors_str(self): - return [cs for p in self._parameters_ for cs in p._priors_str] + return [cs for p in self.parameters for cs in p._priors_str] @property def _description_str(self): - return [xi for x in self._parameters_ for xi in x._description_str] + return [xi for x in self.parameters for xi in x._description_str] @property def _ties_str(self): return [','.join(x._ties_str) for x in self.flattened_parameters] @@ -246,7 +335,7 @@ class Parameterized(Parameterizable): to_print = [] for n, d, c, t, p in itertools.izip(names, desc, constrs, ts, prirs): to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p)) - # to_print = [format_spec.format(p=p, const=c, t=t) if isinstance(p, Param) else p.__str__(header=False) for p, c, t in itertools.izip(self._parameters_, constrs, ts)] + # to_print = [format_spec.format(p=p, const=c, t=t) if isinstance(p, Param) else p.__str__(header=False) for p, c, t in itertools.izip(self.parameters, constrs, ts)] sep = '-' * (nl + sl + cl + + pl + tl + 8 * 2 + 3) if header: header = " {{0:<{0}s}} | {{1:^{1}s}} | {{2:^{2}s}} | {{3:^{3}s}} | {{4:^{4}s}}".format(nl, sl, cl, pl, tl).format(name, "Value", "Constraint", "Prior", "Tied to") diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py index 044d1592..fa32f642 100644 --- a/GPy/core/parameterization/variational.py +++ b/GPy/core/parameterization/variational.py @@ -81,7 +81,7 @@ class VariationalPosterior(Parameterized): def _raveled_index(self): index = np.empty(dtype=int, shape=0) size = 0 - for p in self._parameters_: + for p in self.parameters: index = np.hstack((index, p._raveled_index()+size)) size += p._realsize_ if hasattr(p, '_realsize_') else p.size return index @@ -96,10 +96,10 @@ class VariationalPosterior(Parameterized): dc = self.__dict__.copy() dc['mean'] = self.mean[s] dc['variance'] = self.variance[s] - dc['_parameters_'] = copy.copy(self._parameters_) + dc['parameters'] = copy.copy(self.parameters) n.__dict__.update(dc) - n._parameters_[dc['mean']._parent_index_] = dc['mean'] - n._parameters_[dc['variance']._parent_index_] = dc['variance'] + n.parameters[dc['mean']._parent_index_] = dc['mean'] + n.parameters[dc['variance']._parent_index_] = dc['variance'] n._gradient_array_ = None oversize = self.size - self.mean.size - self.variance.size n.size = n.mean.size + n.variance.size + oversize @@ -150,11 +150,11 @@ class SpikeAndSlabPosterior(VariationalPosterior): dc['mean'] = self.mean[s] dc['variance'] = self.variance[s] dc['binary_prob'] = self.binary_prob[s] - dc['_parameters_'] = copy.copy(self._parameters_) + dc['parameters'] = copy.copy(self.parameters) n.__dict__.update(dc) - n._parameters_[dc['mean']._parent_index_] = dc['mean'] - n._parameters_[dc['variance']._parent_index_] = dc['variance'] - n._parameters_[dc['binary_prob']._parent_index_] = dc['binary_prob'] + n.parameters[dc['mean']._parent_index_] = dc['mean'] + n.parameters[dc['variance']._parent_index_] = dc['variance'] + n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob'] n.ndim = n.mean.ndim n.shape = n.mean.shape n.num_data = n.mean.shape[0] diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index f5b54797..6336d1b9 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -141,10 +141,10 @@ class Add(CombinationKernel): from static import White, Bias target_mu = np.zeros(variational_posterior.shape) target_S = np.zeros(variational_posterior.shape) - for p1 in self._parameters_: + for p1 in self.parameters: #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2! eff_dL_dpsi1 = dL_dpsi1.copy() - for p2 in self._parameters_: + for p2 in self.parameters: if p2 is p1: continue if isinstance(p2, White): @@ -160,7 +160,7 @@ class Add(CombinationKernel): def add(self, other, name='sum'): if isinstance(other, Add): - other_params = other._parameters_[:] + other_params = other.parameters[:] for p in other_params: other.remove_parameter(p) self.add_parameters(*other_params) diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index b8d10f47..005faa05 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -183,9 +183,9 @@ class Kern(Parameterized): assert isinstance(other, Kern), "only kernels can be added to kernels..." from prod import Prod #kernels = [] - #if isinstance(self, Prod): kernels.extend(self._parameters_) + #if isinstance(self, Prod): kernels.extend(self.parameters) #else: kernels.append(self) - #if isinstance(other, Prod): kernels.extend(other._parameters_) + #if isinstance(other, Prod): kernels.extend(other.parameters) #else: kernels.append(other) return Prod([self, other], name) @@ -222,7 +222,7 @@ class CombinationKernel(Kern): @property def parts(self): - return self._parameters_ + return self.parameters def get_input_dim_active_dims(self, kernels, extra_dims = None): #active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int)) diff --git a/GPy/plotting/matplot_dep/kernel_plots.py b/GPy/plotting/matplot_dep/kernel_plots.py index 2b990611..3c6b911f 100644 --- a/GPy/plotting/matplot_dep/kernel_plots.py +++ b/GPy/plotting/matplot_dep/kernel_plots.py @@ -68,7 +68,7 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False): for i in range(ard_params.shape[0]): c = Tango.nextMedium() - bars.append(plot_bars(fig, ax, x, ard_params[i,:], c, kernel._parameters_[i].name, bottom=bottom)) + bars.append(plot_bars(fig, ax, x, ard_params[i,:], c, kernel.parameters[i].name, bottom=bottom)) bottom += ard_params[i,:] ax.set_xlim(-.5, kernel.input_dim - .5) diff --git a/GPy/testing/parameterized_tests.py b/GPy/testing/parameterized_tests.py index bc989637..fa15d66d 100644 --- a/GPy/testing/parameterized_tests.py +++ b/GPy/testing/parameterized_tests.py @@ -95,7 +95,7 @@ class ParameterizedTest(unittest.TestCase): self.assertListEqual(self.test1.kern.param_array.tolist(), val[:2].tolist()) def test_add_parameter_already_in_hirarchy(self): - self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0]) + self.assertRaises(HierarchyError, self.test1.add_parameter, self.white.parameters[0]) def test_default_constraints(self): self.assertIs(self.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops) diff --git a/GPy/testing/pickle_tests.py b/GPy/testing/pickle_tests.py index b62f5e45..94504645 100644 --- a/GPy/testing/pickle_tests.py +++ b/GPy/testing/pickle_tests.py @@ -89,28 +89,28 @@ class Test(ListDictTestCase): self.assertIs(pcopy.constraints, pcopy.rbf.lengthscale.constraints._param_index_ops) self.assertIs(pcopy.constraints, pcopy.linear.constraints._param_index_ops) self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) - self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist()) + self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist()) self.assertSequenceEqual(str(par), str(pcopy)) self.assertIsNot(par.param_array, pcopy.param_array) - self.assertIsNot(par.full_gradient, pcopy.full_gradient) + self.assertIsNot(par.gradient_full, pcopy.gradient_full) with tempfile.TemporaryFile('w+b') as f: par.pickle(f) f.seek(0) pcopy = pickle.load(f) self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) pcopy.gradient = 10 - np.testing.assert_allclose(par.linear.full_gradient, pcopy.linear.full_gradient) - np.testing.assert_allclose(pcopy.linear.full_gradient, 10) + np.testing.assert_allclose(par.linear.gradient_full, pcopy.linear.gradient_full) + np.testing.assert_allclose(pcopy.linear.gradient_full, 10) self.assertSequenceEqual(str(par), str(pcopy)) def test_model(self): par = toy_rbf_1d_50(optimize=0, plot=0) pcopy = par.copy() self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) - self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist()) + self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist()) self.assertSequenceEqual(str(par), str(pcopy)) self.assertIsNot(par.param_array, pcopy.param_array) - self.assertIsNot(par.full_gradient, pcopy.full_gradient) + self.assertIsNot(par.gradient_full, pcopy.gradient_full) self.assertTrue(pcopy.checkgrad()) self.assert_(np.any(pcopy.gradient!=0.0)) with tempfile.TemporaryFile('w+b') as f: @@ -118,7 +118,7 @@ class Test(ListDictTestCase): f.seek(0) pcopy = pickle.load(f) self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) - np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient) + np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full) self.assertSequenceEqual(str(par), str(pcopy)) self.assert_(pcopy.checkgrad()) @@ -126,10 +126,10 @@ class Test(ListDictTestCase): par = toy_rbf_1d_50(optimize=0, plot=0) pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy()) self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) - self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist()) + self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist()) self.assertSequenceEqual(str(par), str(pcopy)) self.assertIsNot(par.param_array, pcopy.param_array) - self.assertIsNot(par.full_gradient, pcopy.full_gradient) + self.assertIsNot(par.gradient_full, pcopy.gradient_full) self.assertTrue(pcopy.checkgrad()) self.assert_(np.any(pcopy.gradient!=0.0)) pcopy.optimize('bfgs') @@ -140,7 +140,7 @@ class Test(ListDictTestCase): f.seek(0) pcopy = pickle.load(f) self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) - np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient) + np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full) self.assertSequenceEqual(str(par), str(pcopy)) self.assert_(pcopy.checkgrad()) @@ -151,18 +151,18 @@ class Test(ListDictTestCase): par.gradient = 10 pcopy = par.copy() self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) - self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist()) + self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist()) self.assertSequenceEqual(str(par), str(pcopy)) self.assertIsNot(par.param_array, pcopy.param_array) - self.assertIsNot(par.full_gradient, pcopy.full_gradient) + self.assertIsNot(par.gradient_full, pcopy.gradient_full) with tempfile.TemporaryFile('w+b') as f: par.pickle(f) f.seek(0) pcopy = pickle.load(f) self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) pcopy.gradient = 10 - np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient) - np.testing.assert_allclose(pcopy.mean.full_gradient, 10) + np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full) + np.testing.assert_allclose(pcopy.mean.gradient_full, 10) self.assertSequenceEqual(str(par), str(pcopy)) def test_model_concat(self): @@ -170,10 +170,10 @@ class Test(ListDictTestCase): par.randomize() pcopy = par.copy() self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) - self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist()) + self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist()) self.assertSequenceEqual(str(par), str(pcopy)) self.assertIsNot(par.param_array, pcopy.param_array) - self.assertIsNot(par.full_gradient, pcopy.full_gradient) + self.assertIsNot(par.gradient_full, pcopy.gradient_full) self.assertTrue(pcopy.checkgrad()) self.assert_(np.any(pcopy.gradient!=0.0)) with tempfile.TemporaryFile('w+b') as f: @@ -181,7 +181,7 @@ class Test(ListDictTestCase): f.seek(0) pcopy = pickle.load(f) self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) - np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient) + np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full) self.assertSequenceEqual(str(par), str(pcopy)) self.assert_(pcopy.checkgrad()) From 58a05f37b768bd7e0ce4c861f9685a1f6c277e8e Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 15 May 2014 14:06:00 +0100 Subject: [PATCH 4/8] [latentfunctioninference] superclass LatentFunctionInference added, which contains a call just before and just after optimization --- GPy/core/gp.py | 20 ++++++++++- GPy/core/model.py | 2 +- .../latent_function_inference/__init__.py | 35 +++++++++++++++++-- .../latent_function_inference/dtc.py | 3 +- .../exact_gaussian_inference.py | 3 +- .../expectation_propagation.py | 3 +- .../latent_function_inference/fitc.py | 3 +- .../latent_function_inference/laplace.py | 3 +- .../latent_function_inference/var_dtc.py | 5 +-- .../latent_function_inference/var_dtc_gpu.py | 3 +- .../var_dtc_parallel.py | 3 +- 11 files changed, 69 insertions(+), 14 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 62e16de1..29d9032a 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -10,7 +10,7 @@ from model import Model from parameterization import ObsAr from .. import likelihoods from ..likelihoods.gaussian import Gaussian -from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation +from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation, LatentFunctionInference from parameterization.variational import VariationalPosterior class GP(Model): @@ -21,6 +21,7 @@ class GP(Model): :param Y: output observations :param kernel: a GPy kernel, defaults to rbf+white :param likelihood: a GPy likelihood + :param :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference_method: The inference method to use for this GP :rtype: model object .. Note:: Multiple independent outputs are allowed using columns of Y @@ -220,3 +221,20 @@ class GP(Model): """ return self.kern.input_sensitivity() + def optimize(self, optimizer=None, start=None, **kwargs): + """ + Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors. + kwargs are passed to the optimizer. They can be: + + :param max_f_eval: maximum number of function evaluations + :type max_f_eval: int + :messages: whether to display during optimisation + :type messages: bool + :param optimizer: which optimizer to use (defaults to self.preferred optimizer) + :type optimizer: string + + TODO: valid args + """ + self.inference_method.on_optimization_start() + super(GP, self).optimize(optimizer, start, **kwargs) + self.inference_method.on_optimization_end() \ No newline at end of file diff --git a/GPy/core/model.py b/GPy/core/model.py index 38e8d4cf..71b21af6 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -220,7 +220,7 @@ class Model(Parameterized): if self.is_fixed: raise RuntimeError, "Cannot optimize, when everything is fixed" if self.size == 0: - raise RuntimeError, "Model without parameters cannot be minimized" + raise RuntimeError, "Model without parameters cannot be optimized" if optimizer is None: optimizer = self.preferred_optimizer diff --git a/GPy/inference/latent_function_inference/__init__.py b/GPy/inference/latent_function_inference/__init__.py index 68004a08..878c1e4f 100644 --- a/GPy/inference/latent_function_inference/__init__.py +++ b/GPy/inference/latent_function_inference/__init__.py @@ -25,6 +25,20 @@ etc. """ +class LatentFunctionInference(object): + def on_optimization_start(self): + """ + This function gets called, just before the optimization loop to start. + """ + pass + + def on_optimization_end(self): + """ + This function gets called, just after the optimization loop ended. + """ + pass + + from exact_gaussian_inference import ExactGaussianInference from laplace import Laplace from GPy.inference.latent_function_inference.var_dtc import VarDTC @@ -38,11 +52,26 @@ from var_dtc_gpu import VarDTC_GPU # class FullLatentFunctionData(object): # # -# class LatentFunctionInference(object): -# def inference(self, kern, X, likelihood, Y, Y_metadata=None): + +# class EMLikeLatentFunctionInference(LatentFunctionInference): +# def update_approximation(self): +# """ +# This function gets called when the +# """ +# +# def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): # """ # Do inference on the latent functions given a covariance function `kern`, -# inputs and outputs `X` and `Y`, and a likelihood `likelihood`. +# inputs and outputs `X` and `Y`, inducing_inputs `Z`, and a likelihood `likelihood`. +# Additional metadata for the outputs `Y` can be given in `Y_metadata`. +# """ +# raise NotImplementedError, "Abstract base class for full inference" +# +# class VariationalLatentFunctionInference(LatentFunctionInference): +# def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): +# """ +# Do inference on the latent functions given a covariance function `kern`, +# inputs and outputs `X` and `Y`, inducing_inputs `Z`, and a likelihood `likelihood`. # Additional metadata for the outputs `Y` can be given in `Y_metadata`. # """ # raise NotImplementedError, "Abstract base class for full inference" diff --git a/GPy/inference/latent_function_inference/dtc.py b/GPy/inference/latent_function_inference/dtc.py index 1a84da6b..1b6b1dbd 100644 --- a/GPy/inference/latent_function_inference/dtc.py +++ b/GPy/inference/latent_function_inference/dtc.py @@ -4,9 +4,10 @@ from posterior import Posterior from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv import numpy as np +from . import LatentFunctionInference log_2_pi = np.log(2*np.pi) -class DTC(object): +class DTC(LatentFunctionInference): """ An object for inference when the likelihood is Gaussian, but we want to do sparse inference. diff --git a/GPy/inference/latent_function_inference/exact_gaussian_inference.py b/GPy/inference/latent_function_inference/exact_gaussian_inference.py index c0177e9f..0c02efe3 100644 --- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py +++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py @@ -5,10 +5,11 @@ from posterior import Posterior from ...util.linalg import pdinv, dpotrs, tdot from ...util import diag import numpy as np +from . import LatentFunctionInference log_2_pi = np.log(2*np.pi) -class ExactGaussianInference(object): +class ExactGaussianInference(LatentFunctionInference): """ An object for inference when the likelihood is Gaussian. diff --git a/GPy/inference/latent_function_inference/expectation_propagation.py b/GPy/inference/latent_function_inference/expectation_propagation.py index 172f43fb..c2dea824 100644 --- a/GPy/inference/latent_function_inference/expectation_propagation.py +++ b/GPy/inference/latent_function_inference/expectation_propagation.py @@ -1,9 +1,10 @@ import numpy as np from ...util.linalg import pdinv,jitchol,DSYR,tdot,dtrtrs, dpotrs from posterior import Posterior +from . import LatentFunctionInference log_2_pi = np.log(2*np.pi) -class EP(object): +class EP(LatentFunctionInference): def __init__(self, epsilon=1e-6, eta=1., delta=1.): """ The expectation-propagation algorithm. diff --git a/GPy/inference/latent_function_inference/fitc.py b/GPy/inference/latent_function_inference/fitc.py index de47e5d5..a184c6c4 100644 --- a/GPy/inference/latent_function_inference/fitc.py +++ b/GPy/inference/latent_function_inference/fitc.py @@ -5,9 +5,10 @@ from posterior import Posterior from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv from ...util import diag import numpy as np +from . import LatentFunctionInference log_2_pi = np.log(2*np.pi) -class FITC(object): +class FITC(LatentFunctionInference): """ An object for inference when the likelihood is Gaussian, but we want to do sparse inference. diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py index 9ba3f83f..1c153518 100644 --- a/GPy/inference/latent_function_inference/laplace.py +++ b/GPy/inference/latent_function_inference/laplace.py @@ -16,8 +16,9 @@ from ...util.misc import param_to_array from posterior import Posterior import warnings from scipy import optimize +from . import LatentFunctionInference -class Laplace(object): +class Laplace(LatentFunctionInference): def __init__(self): """ diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index 0cc841ed..3043a7e8 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -7,9 +7,10 @@ from ...util import diag from ...core.parameterization.variational import VariationalPosterior import numpy as np from ...util.misc import param_to_array +from . import LatentFunctionInference log_2_pi = np.log(2*np.pi) -class VarDTC(object): +class VarDTC(LatentFunctionInference): """ An object for inference when the likelihood is Gaussian, but we want to do sparse inference. @@ -190,7 +191,7 @@ class VarDTC(object): post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) return post, log_marginal, grad_dict -class VarDTCMissingData(object): +class VarDTCMissingData(LatentFunctionInference): const_jitter = 1e-6 def __init__(self, limit=1, inan=None): from ...util.caching import Cacher diff --git a/GPy/inference/latent_function_inference/var_dtc_gpu.py b/GPy/inference/latent_function_inference/var_dtc_gpu.py index 9b2da1c9..d346d01f 100644 --- a/GPy/inference/latent_function_inference/var_dtc_gpu.py +++ b/GPy/inference/latent_function_inference/var_dtc_gpu.py @@ -7,6 +7,7 @@ from ...util import diag from ...core.parameterization.variational import VariationalPosterior import numpy as np from ...util.misc import param_to_array +from . import LatentFunctionInference log_2_pi = np.log(2*np.pi) from ...util import gpu_init @@ -19,7 +20,7 @@ try: except: pass -class VarDTC_GPU(object): +class VarDTC_GPU(LatentFunctionInference): """ An object for inference when the likelihood is Gaussian, but we want to do sparse inference. diff --git a/GPy/inference/latent_function_inference/var_dtc_parallel.py b/GPy/inference/latent_function_inference/var_dtc_parallel.py index 87236e2a..ffda0aba 100644 --- a/GPy/inference/latent_function_inference/var_dtc_parallel.py +++ b/GPy/inference/latent_function_inference/var_dtc_parallel.py @@ -7,9 +7,10 @@ from ...util import diag from ...core.parameterization.variational import VariationalPosterior import numpy as np from ...util.misc import param_to_array +from . import LatentFunctionInference log_2_pi = np.log(2*np.pi) -class VarDTC_minibatch(object): +class VarDTC_minibatch(LatentFunctionInference): """ An object for inference when the likelihood is Gaussian, but we want to do sparse inference. From 75d5209b98ff4e275eab2a690acece327f4ec4c7 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 15 May 2014 14:10:24 +0100 Subject: [PATCH 5/8] minor changes --- .../expectation_propagation_dtc.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py index 3625a5bf..be1efd3a 100644 --- a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py +++ b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py @@ -5,7 +5,13 @@ from posterior import Posterior log_2_pi = np.log(2*np.pi) class EPDTC(EP): - #def __init__(self, epsilon=1e-6, eta=1., delta=1.): + def __init__(self, epsilon=1e-6, eta=1., delta=1.): + self.epsilon, self.eta, self.delta = epsilon, eta, delta + self.reset() + + def reset(self): + self.old_mutilde, self.old_vtilde = None, None + self._ep_approximation = None def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): num_data, output_dim = X.shape @@ -20,8 +26,10 @@ class EPDTC(EP): KmmiKmn = np.dot(Kmmi,Kmn) K = np.dot(Kmn.T,KmmiKmn) - - mu, Sigma, mu_tilde, tau_tilde, Z_hat = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata) + if self._ep_approximation is None: + mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata) + else: + mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde)) From bd7a80dde5ce80585458bd40316badefc28f704c Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 15 May 2014 14:11:11 +0100 Subject: [PATCH 6/8] flags added --- .../latent_function_inference/expectation_propagation.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/GPy/inference/latent_function_inference/expectation_propagation.py b/GPy/inference/latent_function_inference/expectation_propagation.py index 172f43fb..603ff68c 100644 --- a/GPy/inference/latent_function_inference/expectation_propagation.py +++ b/GPy/inference/latent_function_inference/expectation_propagation.py @@ -21,6 +21,7 @@ class EP(object): def reset(self): self.old_mutilde, self.old_vtilde = None, None + self._ep_approximation = None def inference(self, kern, X, likelihood, Y, Y_metadata=None, Z=None): num_data, output_dim = X.shape @@ -28,7 +29,10 @@ class EP(object): K = kern.K(X) - mu, Sigma, mu_tilde, tau_tilde, Z_hat = self.expectation_propagation(K, Y, likelihood, Y_metadata) + if self._ep_approximation is None: + mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata) + else: + mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde)) From 3d76664af054eac7a53e1a9fffeb3cf800330f1e Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 15 May 2014 16:36:03 +0100 Subject: [PATCH 7/8] EP is back. --- GPy/examples/classification.py | 9 +- .../expectation_propagation.py | 10 +- .../expectation_propagation_dtc.py | 243 ++++++++++++++++-- GPy/likelihoods/bernoulli.py | 3 + 4 files changed, 238 insertions(+), 27 deletions(-) diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py index 2b0a201d..ae9d8eb8 100644 --- a/GPy/examples/classification.py +++ b/GPy/examples/classification.py @@ -96,15 +96,11 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot= # Optimize if optimize: - #m.update_likelihood_approximation() - # Parameters optimization: try: m.optimize('scg', messages=1) except Exception as e: return m - #m.pseudo_EM() - # Plot if plot: fig, axes = pb.subplots(2, 1) @@ -133,10 +129,7 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti # Optimize if optimize: - #m.update_likelihood_approximation() - # Parameters optimization: - #m.optimize() - m.pseudo_EM() + m.optimize() # Plot if plot: diff --git a/GPy/inference/latent_function_inference/expectation_propagation.py b/GPy/inference/latent_function_inference/expectation_propagation.py index 870f2310..de8b0931 100644 --- a/GPy/inference/latent_function_inference/expectation_propagation.py +++ b/GPy/inference/latent_function_inference/expectation_propagation.py @@ -24,6 +24,13 @@ class EP(LatentFunctionInference): self.old_mutilde, self.old_vtilde = None, None self._ep_approximation = None + def on_optimization_start(self): + self._ep_approximation = None + + def on_optimization_end(self): + # TODO: update approximation in the end as well? Maybe even with a switch? + pass + def inference(self, kern, X, likelihood, Y, Y_metadata=None, Z=None): num_data, output_dim = X.shape assert output_dim ==1, "ep in 1D only (for now!)" @@ -47,8 +54,6 @@ class EP(LatentFunctionInference): return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL} - - def expectation_propagation(self, K, Y, likelihood, Y_metadata): num_data, data_dim = Y.shape @@ -113,4 +118,3 @@ class EP(LatentFunctionInference): mu_tilde = v_tilde/tau_tilde return mu, Sigma, mu_tilde, tau_tilde, Z_hat - diff --git a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py index be1efd3a..85d8cc89 100644 --- a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py +++ b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py @@ -1,14 +1,56 @@ import numpy as np -from ...util.linalg import pdinv,jitchol,DSYR,tdot,dtrtrs, dpotrs -from expectation_propagation import EP +from ...util import diag +from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify, DSYR +from ...util.misc import param_to_array +from ...core.parameterization.variational import VariationalPosterior +from . import LatentFunctionInference from posterior import Posterior log_2_pi = np.log(2*np.pi) -class EPDTC(EP): - def __init__(self, epsilon=1e-6, eta=1., delta=1.): +class EPDTC(LatentFunctionInference): + const_jitter = 1e-6 + def __init__(self, epsilon=1e-6, eta=1., delta=1., limit=1): + from ...util.caching import Cacher + self.limit = limit + self.get_trYYT = Cacher(self._get_trYYT, limit) + self.get_YYTfactor = Cacher(self._get_YYTfactor, limit) + self.epsilon, self.eta, self.delta = epsilon, eta, delta self.reset() + def set_limit(self, limit): + self.get_trYYT.limit = limit + self.get_YYTfactor.limit = limit + + def _get_trYYT(self, Y): + return param_to_array(np.sum(np.square(Y))) + + def __getstate__(self): + # has to be overridden, as Cacher objects cannot be pickled. + return self.limit + + def __setstate__(self, state): + # has to be overridden, as Cacher objects cannot be pickled. + self.limit = state + from ...util.caching import Cacher + self.get_trYYT = Cacher(self._get_trYYT, self.limit) + self.get_YYTfactor = Cacher(self._get_YYTfactor, self.limit) + + def _get_YYTfactor(self, Y): + """ + find a matrix L which satisfies LLT = YYT. + + Note that L may have fewer columns than Y. + """ + N, D = Y.shape + if (N>=D): + return param_to_array(Y) + else: + return jitchol(tdot(Y)) + + def get_VVTfactor(self, Y, prec): + return Y * prec # TODO chache this, and make it effective + def reset(self): self.old_mutilde, self.old_vtilde = None, None self._ep_approximation = None @@ -20,28 +62,131 @@ class EPDTC(EP): Kmm = kern.K(Z) Kmn = kern.K(Z,X) - Lm = jitchol(Kmm) - Lmi = dtrtrs(Lm,np.eye(Lm.shape[0]))[0] - Kmmi = np.dot(Lmi.T,Lmi) - KmmiKmn = np.dot(Kmmi,Kmn) - K = np.dot(Kmn.T,KmmiKmn) - if self._ep_approximation is None: mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata) else: mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation - Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde)) - alpha, _ = dpotrs(LW, mu_tilde, lower=1) + if isinstance(X, VariationalPosterior): + uncertain_inputs = True + psi0 = kern.psi0(Z, X) + psi1 = Kmn.T#kern.psi1(Z, X) + psi2 = kern.psi2(Z, X) + else: + uncertain_inputs = False + psi0 = kern.Kdiag(X) + psi1 = Kmn.T#kern.K(X, Z) + psi2 = None - log_marginal = 0.5*(-num_data * log_2_pi - W_logdet - np.sum(alpha * mu_tilde)) # TODO: add log Z_hat?? + #see whether we're using variational uncertain inputs - dL_dK = 0.5 * (tdot(alpha[:,None]) - Wi) + _, output_dim = Y.shape + + #see whether we've got a different noise variance for each datum + #beta = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6) + beta = tau_tilde + VVT_factor = beta[:,None]*mu_tilde[:,None] + trYYT = self.get_trYYT(mu_tilde[:,None]) + + # do the inference: + het_noise = beta.size > 1 + num_inducing = Z.shape[0] + num_data = Y.shape[0] + # kernel computations, using BGPLVM notation + + Kmm = kern.K(Z).copy() + diag.add(Kmm, self.const_jitter) + Lm = jitchol(Kmm) + + # The rather complex computations of A + if uncertain_inputs: + if het_noise: + psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0) + else: + psi2_beta = psi2.sum(0) * beta + LmInv = dtrtri(Lm) + A = LmInv.dot(psi2_beta.dot(LmInv.T)) + else: + if het_noise: + tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1))) + else: + tmp = psi1 * (np.sqrt(beta)) + tmp, _ = dtrtrs(Lm, tmp.T, lower=1) + A = tdot(tmp) #print A.sum() + + # factor B + B = np.eye(num_inducing) + A + LB = jitchol(B) + psi1Vf = np.dot(psi1.T, VVT_factor) + # back substutue C into psi1Vf + tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0) + _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0) + tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1) + Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1) + + # data fit and derivative of L w.r.t. Kmm + delit = tdot(_LBi_Lmi_psi1Vf) + data_fit = np.trace(delit) + DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit) + delit = -0.5 * DBi_plus_BiPBi + delit += -0.5 * B * output_dim + delit += output_dim * np.eye(num_inducing) + # Compute dL_dKmm + dL_dKmm = backsub_both_sides(Lm, delit) + + # derivatives of L w.r.t. psi + dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, + VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, + psi1, het_noise, uncertain_inputs) + + # log marginal likelihood + log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, + psi0, A, LB, trYYT, data_fit, VVT_factor) + + #put the gradients in the right places + dL_dR = _compute_dL_dR(likelihood, + het_noise, uncertain_inputs, LB, + _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, + psi0, psi1, beta, + data_fit, num_data, output_dim, trYYT, mu_tilde[:,None]) + + dL_dthetaL = 0#likelihood.exact_inference_gradients(dL_dR,Y_metadata) + + if uncertain_inputs: + grad_dict = {'dL_dKmm': dL_dKmm, + 'dL_dpsi0':dL_dpsi0, + 'dL_dpsi1':dL_dpsi1, + 'dL_dpsi2':dL_dpsi2, + 'dL_dthetaL':dL_dthetaL} + else: + grad_dict = {'dL_dKmm': dL_dKmm, + 'dL_dKdiag':dL_dpsi0, + 'dL_dKnm':dL_dpsi1, + 'dL_dthetaL':dL_dthetaL} + + #get sufficient things for posterior prediction + #TODO: do we really want to do this in the loop? + if VVT_factor.shape[1] == Y.shape[1]: + woodbury_vector = Cpsi1Vf # == Cpsi1V + else: + print 'foobar' + psi1V = np.dot(mu_tilde[:,None].T*beta, psi1).T + tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) + tmp, _ = dpotrs(LB, tmp, lower=1) + woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1) + Bi, _ = dpotri(LB, lower=1) + symmetrify(Bi) + Bi = -dpotri(LB, lower=1)[0] + diag.add(Bi, 1) + + woodbury_inv = backsub_both_sides(Lm, Bi) + + #construct a posterior object + post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) + return post, log_marginal, grad_dict - dL_dthetaL = np.zeros(likelihood.size)#TODO: derivatives of the likelihood parameters - return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL} @@ -129,3 +274,69 @@ class EPDTC(EP): mu_tilde = v_tilde/tau_tilde return mu, Sigma, mu_tilde, tau_tilde, Z_hat + +def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs): + dL_dpsi0 = -0.5 * output_dim * (beta[:,None] * np.ones([num_data, 1])).flatten() + dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T) + dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi) + if het_noise: + if uncertain_inputs: + dL_dpsi2 = beta[:, None, None] * dL_dpsi2_beta[None, :, :] + else: + dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (psi1 * beta.reshape(num_data, 1)).T).T + dL_dpsi2 = None + else: + dL_dpsi2 = beta * dL_dpsi2_beta + if uncertain_inputs: + # repeat for each of the N psi_2 matrices + dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], num_data, axis=0) + else: + # subsume back into psi1 (==Kmn) + dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2) + dL_dpsi2 = None + + return dL_dpsi0, dL_dpsi1, dL_dpsi2 + + +def _compute_dL_dR(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT, Y): + # the partial derivative vector for the likelihood + if likelihood.size == 0: + # save computation here. + dL_dR = None + elif het_noise: + if uncertain_inputs: + raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented" + else: + #from ...util.linalg import chol_inv + #LBi = chol_inv(LB) + LBi, _ = dtrtrs(LB,np.eye(LB.shape[0])) + + Lmi_psi1, nil = dtrtrs(Lm, psi1.T, lower=1, trans=0) + _LBi_Lmi_psi1, _ = dtrtrs(LB, Lmi_psi1, lower=1, trans=0) + + dL_dR = -0.5 * beta + 0.5 * (beta*Y)**2 + dL_dR += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * beta**2 + + dL_dR += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*beta**2 + + dL_dR += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * Y * beta**2 + dL_dR += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * beta**2 + else: + # likelihood is not heteroscedatic + dL_dR = -0.5 * num_data * output_dim * beta + 0.5 * trYYT * beta ** 2 + dL_dR += 0.5 * output_dim * (psi0.sum() * beta ** 2 - np.trace(A) * beta) + dL_dR += beta * (0.5 * np.sum(A * DBi_plus_BiPBi) - data_fit) + return dL_dR + +def _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, psi0, A, LB, trYYT, data_fit,Y): + #compute log marginal likelihood + if het_noise: + lik_1 = -0.5 * num_data * output_dim * np.log(2. * np.pi) + 0.5 * np.sum(np.log(beta)) - 0.5 * np.sum(beta * np.square(Y).sum(axis=-1)) + lik_2 = -0.5 * output_dim * (np.sum(beta.flatten() * psi0) - np.trace(A)) + else: + lik_1 = -0.5 * num_data * output_dim * (np.log(2. * np.pi) - np.log(beta)) - 0.5 * beta * trYYT + lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A)) + lik_3 = -output_dim * (np.sum(np.log(np.diag(LB)))) + lik_4 = 0.5 * data_fit + log_marginal = lik_1 + lik_2 + lik_3 + lik_4 + return log_marginal diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py index 7b867954..6c53958b 100644 --- a/GPy/likelihoods/bernoulli.py +++ b/GPy/likelihoods/bernoulli.py @@ -227,3 +227,6 @@ class Bernoulli(Likelihood): ns = np.ones_like(gp, dtype=int) Ysim = np.random.binomial(ns, self.gp_link.transf(gp)) return Ysim.reshape(orig_shape) + + def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None): + pass From a796b7bc81cf9d2b314b8882fea66003517d7f1b Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Thu, 15 May 2014 17:56:07 +0100 Subject: [PATCH 8/8] fix the bug in mocap demos --- GPy/examples/dimensionality_reduction.py | 8 +++++--- GPy/plotting/matplot_dep/visualize.py | 17 ++++++++++------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index a15c2a93..484970b8 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -414,9 +414,10 @@ def stick(kernel=None, optimize=True, verbose=True, plot=True): ax = m.plot_latent() y = m.Y[0, :] data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect']) - vis = GPy.plotting.matplot_dep.visualize.lvm(m.X[:1, :].copy(), m, data_show, latent_axes=ax) + lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[:1, :].copy(), m, data_show, latent_axes=ax) raw_input('Press enter to finish') - + lvm_visualizer.close() + data_show.close() return m def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True): @@ -515,9 +516,10 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose ax = m.plot_latent() y = m.Y[0, :] data_show = GPy.plotting.matplot_dep.visualize.skeleton_show(y[None, :], data['skel']) - lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[0].copy(), m, data_show, latent_axes=ax) raw_input('Press enter to finish') lvm_visualizer.close() + data_show.close() return m diff --git a/GPy/plotting/matplot_dep/visualize.py b/GPy/plotting/matplot_dep/visualize.py index 6abd3872..d7884730 100644 --- a/GPy/plotting/matplot_dep/visualize.py +++ b/GPy/plotting/matplot_dep/visualize.py @@ -102,7 +102,8 @@ class lvm(matplotlib_show): vals = param_to_array(model.X.mean) else: vals = param_to_array(model.X) - + if len(vals.shape)==1: + vals = vals[None,:] matplotlib_show.__init__(self, vals, axes=latent_axes) if isinstance(latent_axes,mpl.axes.Axes): @@ -393,14 +394,13 @@ class mocap_data_show_vpython(vpython_show): def process_values(self): raise NotImplementedError, "this needs to be implemented to use the data_show class" - class mocap_data_show(matplotlib_show): """Base class for visualizing motion capture data.""" def __init__(self, vals, axes=None, connect=None): if axes==None: fig = plt.figure() - axes = fig.add_subplot(111, projection='3d') + axes = fig.add_subplot(111, projection='3d',aspect='equal') matplotlib_show.__init__(self, vals, axes) self.connect = connect @@ -445,11 +445,12 @@ class mocap_data_show(matplotlib_show): def process_values(self): raise NotImplementedError, "this needs to be implemented to use the data_show class" - def initialize_axes(self): + def initialize_axes(self, boundary=0.05): """Set up the axes with the right limits and scaling.""" - self.x_lim = np.array([self.vals[:, 0].min(), self.vals[:, 0].max()]) - self.y_lim = np.array([self.vals[:, 1].min(), self.vals[:, 1].max()]) - self.z_lim = np.array([self.vals[:, 2].min(), self.vals[:, 2].max()]) + bs = [(self.vals[:, i].max()-self.vals[:, i].min())*boundary for i in xrange(3)] + self.x_lim = np.array([self.vals[:, 0].min()-bs[0], self.vals[:, 0].max()+bs[0]]) + self.y_lim = np.array([self.vals[:, 1].min()-bs[1], self.vals[:, 1].max()+bs[1]]) + self.z_lim = np.array([self.vals[:, 2].min()-bs[2], self.vals[:, 2].max()+bs[2]]) def initialize_axes_modify(self): self.points_handle.remove() @@ -472,6 +473,8 @@ class mocap_data_show(matplotlib_show): class stick_show(mocap_data_show): """Show a three dimensional point cloud as a figure. Connect elements of the figure together using the matrix connect.""" def __init__(self, vals, connect=None, axes=None): + if len(vals.shape)==1: + vals = vals[None,:] mocap_data_show.__init__(self, vals, axes=axes, connect=connect) def process_values(self):