From 22d30d9d39c70f806fe5bcb815cce9c8eb0f8dca Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Mon, 10 Nov 2014 16:24:24 +0000 Subject: [PATCH 1/9] new ssrbf implementation --- GPy/kern/_src/psi_comp/ssrbf_psi_comp.py | 561 +++++++++++++++-------- 1 file changed, 380 insertions(+), 181 deletions(-) diff --git a/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py b/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py index 6302a590..f6a24c86 100644 --- a/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py +++ b/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py @@ -7,193 +7,392 @@ The package for the psi statistics computation import numpy as np -def psicomputations(variance, lengthscale, Z, variational_posterior): - """ - Z - MxQ - mu - NxQ - S - NxQ - gamma - NxQ - """ - # here are the "statistics" for psi0, psi1 and psi2 - # Produced intermediate results: - # _psi1 NxM - mu = variational_posterior.mean - S = variational_posterior.variance - gamma = variational_posterior.binary_prob +try: + from scipy import weave + + def _psicomputations(variance, lengthscale, Z, variational_posterior): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi0, psi1 and psi2 + # Produced intermediate results: + # _psi1 NxM + mu = variational_posterior.mean + S = variational_posterior.variance + gamma = variational_posterior.binary_prob + + N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1] + l2 = np.square(lengthscale) + log_denom1 = np.log(S/l2+1) + log_denom2 = np.log(2*S/l2+1) + log_gamma = np.log(gamma) + log_gamma1 = np.log(1.-gamma) + variance = float(variance) + psi0 = np.empty(N) + psi0[:] = variance + psi1 = np.empty((N,M)) + psi2n = np.empty((N,M,M)) + + from ....util.misc import param_to_array + S = param_to_array(S) + mu = param_to_array(mu) + gamma = param_to_array(gamma) + Z = param_to_array(Z) + + support_code = """ + #include + """ + code = """ + for(int n=0; npsi1_exp2)?psi1_exp1+log1p(exp(psi1_exp2-psi1_exp1)):psi1_exp2+log1p(exp(psi1_exp1-psi1_exp2)); + } + // Compute Psi_2 + double muZhat = mu(n,q) - (Zm1q+Zm2q)/2.; + double Z2 = Zm1q*Zm1q+ Zm2q*Zm2q; + double dZ = Zm1q - Zm2q; + + double psi2_exp1 = dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q); + double psi2_exp2 = log_gamma1(n,q) - Z2/(2.*lq); + log_psi2_n += (psi2_exp1>psi2_exp2)?psi2_exp1+log1p(exp(psi2_exp2-psi2_exp1)):psi2_exp2+log1p(exp(psi2_exp1-psi2_exp2)); + } + double exp_psi2_n = exp(log_psi2_n); + psi2n(n,m1,m2) = variance*variance*exp_psi2_n; + if(m1!=m2) { psi2n(n,m2,m1) = variance*variance*exp_psi2_n;} + } + psi1(n,m1) = variance*exp(log_psi1); + } + } + """ + weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz) + + psi2 = psi2n.sum(axis=0) + return psi0,psi1,psi2,psi2n + + from GPy.util.caching import Cacher + psicomputations = Cacher(_psicomputations, limit=1) + + def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): + ARD = (len(lengthscale)!=1) + + _,psi1,_,psi2n = psicomputations(variance, lengthscale, Z, variational_posterior) + + mu = variational_posterior.mean + S = variational_posterior.variance + gamma = variational_posterior.binary_prob + N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1] + l2 = np.square(lengthscale) + log_denom1 = np.log(S/l2+1) + log_denom2 = np.log(2*S/l2+1) + log_gamma = np.log(gamma) + log_gamma1 = np.log(1.-gamma) + variance = float(variance) + + dvar = np.zeros(1) + dmu = np.zeros((N,Q)) + dS = np.zeros((N,Q)) + dgamma = np.zeros((N,Q)) + dl = np.zeros(Q) + dZ = np.zeros((M,Q)) + dvar += np.sum(dL_dpsi0) + + from ....util.misc import param_to_array + S = param_to_array(S) + mu = param_to_array(mu) + gamma = param_to_array(gamma) + Z = param_to_array(Z) + + support_code = """ + #include + """ + code = """ + for(int n=0; nexp2) { + d_exp1 = 1.; + d_exp2 = exp(exp2-exp1); + } else { + d_exp1 = exp(exp1-exp2); + d_exp2 = 1.; + } + double exp_sum = d_exp1+d_exp2; + + dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum); + dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.; + dgamma(n,q) += lpsi1*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum; + dl(q) += lpsi1*((Zmu2_denom+Snq/lq)/denom*d_exp1+Zm1q*Zm1q/(lq*lq)*d_exp2)/(2.*exp_sum); + dZ(m1,q) += lpsi1*(-Zmu/denom*d_exp1-Zm1q/lq*d_exp2)/exp_sum; + } + // Compute Psi_2 + double lpsi2 = psi2n(n,m1,m2)*dL_dpsi2(m1,m2); + if(q==0) {dvar(0) += lpsi2*2/variance;} + + double dZm1m2 = Zm1q - Zm2q; + double Z2 = Zm1q*Zm1q+Zm2q*Zm2q; + double muZhat = mu_nq - (Zm1q + Zm2q)/2.; + double denom = 2.*Snq+lq; + double muZhat2_denom = muZhat*muZhat/denom; + + double exp1 = dZm1m2*dZm1m2/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q); + double exp2 = log_gamma1(n,q) - Z2/(2.*lq); + double d_exp1,d_exp2; + if(exp1>exp2) { + d_exp1 = 1.; + d_exp2 = exp(exp2-exp1); + } else { + d_exp1 = exp(exp1-exp2); + d_exp2 = 1.; + } + double exp_sum = d_exp1+d_exp2; + + dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum; + dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum; + dgamma(n,q) += lpsi2*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum; + dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum; + dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum; + } + } + } + } + """ + weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz) + + dl *= 2.*lengthscale + if not ARD: + dl = dl.sum() + + return dvar, dl, dZ, dmu, dS, dgamma + +except: + + def psicomputations(variance, lengthscale, Z, variational_posterior): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi0, psi1 and psi2 + # Produced intermediate results: + # _psi1 NxM + mu = variational_posterior.mean + S = variational_posterior.variance + gamma = variational_posterior.binary_prob + + psi0 = np.empty(mu.shape[0]) + psi0[:] = variance + psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma) + psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma) + return psi0, psi1, psi2 - psi0 = np.empty(mu.shape[0]) - psi0[:] = variance - psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma) - psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma) - return psi0, psi1, psi2 - -def _psi1computations(variance, lengthscale, Z, mu, S, gamma): - """ - Z - MxQ - mu - NxQ - S - NxQ - gamma - NxQ - """ - # here are the "statistics" for psi1 - # Produced intermediate results: - # _psi1 NxM - - lengthscale2 = np.square(lengthscale) - - # psi1 - _psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ - _psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ - _psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ - _psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ - _psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ - _psi1_exponent1 = np.log(gamma[:,None,:]) - (_psi1_dist_sq + np.log(_psi1_denom))/2. # NxMxQ - _psi1_exponent2 = np.log(1.-gamma[:,None,:]) - (np.square(Z[None,:,:])/lengthscale2)/2. # NxMxQ - _psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2) - _psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ - _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM - _psi1 = variance * np.exp(_psi1_exp_sum) # NxM - - return _psi1 - -def _psi2computations(variance, lengthscale, Z, mu, S, gamma): - """ - Z - MxQ - mu - NxQ - S - NxQ - gamma - NxQ - """ - # here are the "statistics" for psi2 - # Produced intermediate results: - # _psi2 MxM + def _psi1computations(variance, lengthscale, Z, mu, S, gamma): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi1 + # Produced intermediate results: + # _psi1 NxM - lengthscale2 = np.square(lengthscale) + lengthscale2 = np.square(lengthscale) - _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q - _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q - _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q - _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ - - # psi2 - _psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ - _psi2_denom_sqrt = np.sqrt(_psi2_denom) - _psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q - _psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom) - _psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ - _psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q - _psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ - _psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2) - _psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max)) - _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM - _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM - - return _psi2 - -def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): - ARD = (len(lengthscale)!=1) + # psi1 + _psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ + _psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ + _psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ + _psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ + _psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ + _psi1_exponent1 = np.log(gamma[:,None,:]) - (_psi1_dist_sq + np.log(_psi1_denom))/2. # NxMxQ + _psi1_exponent2 = np.log(1.-gamma[:,None,:]) - (np.square(Z[None,:,:])/lengthscale2)/2. # NxMxQ + _psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2) + _psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ + _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM + _psi1 = variance * np.exp(_psi1_exp_sum) # NxM - dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) - dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) - - dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2 + return _psi1 - dL_dlengscale = dl_psi1 + dl_psi2 - if not ARD: - dL_dlengscale = dL_dlengscale.sum() - - dL_dgamma = dgamma_psi1 + dgamma_psi2 - dL_dmu = dmu_psi1 + dmu_psi2 - dL_dS = dS_psi1 + dS_psi2 - dL_dZ = dZ_psi1 + dZ_psi2 + def _psi2computations(variance, lengthscale, Z, mu, S, gamma): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi2 + # Produced intermediate results: + # _psi2 MxM + + lengthscale2 = np.square(lengthscale) + + _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q + _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q + _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q + _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ - return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma - -def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma): - """ - dL_dpsi1 - NxM - Z - MxQ - mu - NxQ - S - NxQ - gamma - NxQ - """ - # here are the "statistics" for psi1 - # Produced intermediate results: dL_dparams w.r.t. psi1 - # _dL_dvariance 1 - # _dL_dlengthscale Q - # _dL_dZ MxQ - # _dL_dgamma NxQ - # _dL_dmu NxQ - # _dL_dS NxQ + # psi2 + _psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ + _psi2_denom_sqrt = np.sqrt(_psi2_denom) + _psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q + _psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom) + _psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ + _psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q + _psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ + _psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2) + _psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max)) + _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM + _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM - lengthscale2 = np.square(lengthscale) - - # psi1 - _psi1_denom = S / lengthscale2 + 1. # NxQ - _psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ - _psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ - _psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom[:,None,:]) # NxMxQ - _psi1_common = gamma / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #NxQ - _psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom[:, None,:])) # NxMxQ - _psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ - _psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2) - _psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ - _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM - _psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ - _psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ - _psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ - _psi1 = variance * np.exp(_psi1_exp_sum) # NxM - _dL_dvariance = np.einsum('nm,nm->',dL_dpsi1, _psi1)/variance # 1 - _dL_dgamma = np.einsum('nm,nmq,nmq->nq',dL_dpsi1, _psi1_q, (_psi1_exp_dist_sq/_psi1_denom_sqrt[:,None,:]-_psi1_exp_Z)) # NxQ - _dL_dmu = np.einsum('nm, nmq, nmq, nmq, nq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_dist,_psi1_common) # NxQ - _dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ - _dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z)) - _dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z)) - - return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma - -def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma): - """ - Z - MxQ - mu - NxQ - S - NxQ - gamma - NxQ - dL_dpsi2 - MxM - """ - # here are the "statistics" for psi2 - # Produced the derivatives w.r.t. psi2: - # _dL_dvariance 1 - # _dL_dlengthscale Q - # _dL_dZ MxQ - # _dL_dgamma NxQ - # _dL_dmu NxQ - # _dL_dS NxQ + return _psi2 - lengthscale2 = np.square(lengthscale) + def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): + ARD = (len(lengthscale)!=1) + + dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + + dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2 + + dL_dlengscale = dl_psi1 + dl_psi2 + if not ARD: + dL_dlengscale = dL_dlengscale.sum() + + dL_dgamma = dgamma_psi1 + dgamma_psi2 + dL_dmu = dmu_psi1 + dmu_psi2 + dL_dS = dS_psi1 + dS_psi2 + dL_dZ = dZ_psi1 + dZ_psi2 + + return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma - _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q - _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q - _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q - _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ - - # psi2 - _psi2_denom = 2.*S / lengthscale2 + 1. # NxQ - _psi2_denom_sqrt = np.sqrt(_psi2_denom) - _psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q - _psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom[:,None,None,:]) - _psi2_common = gamma/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # NxQ - _psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom[:,None,None,:])+np.log(gamma[:,None,None,:]) #N,M,M,Q - _psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ - _psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2) - _psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max)) - _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM - _psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ - _psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ - _psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ - _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM - _dL_dvariance = np.einsum('mo,mo->',dL_dpsi2,_psi2)*2./variance - _dL_dgamma = np.einsum('mo,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,(_psi2_exp_dist_sq/_psi2_denom_sqrt[:,None,None,:] - _psi2_exp_Z)) - _dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq) - _dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq) - _dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z)) - _dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z)) - - return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma + def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma): + """ + dL_dpsi1 - NxM + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi1 + # Produced intermediate results: dL_dparams w.r.t. psi1 + # _dL_dvariance 1 + # _dL_dlengthscale Q + # _dL_dZ MxQ + # _dL_dgamma NxQ + # _dL_dmu NxQ + # _dL_dS NxQ + + lengthscale2 = np.square(lengthscale) + + # psi1 + _psi1_denom = S / lengthscale2 + 1. # NxQ + _psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ + _psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ + _psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom[:,None,:]) # NxMxQ + _psi1_common = gamma / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #NxQ + _psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom[:, None,:])) # NxMxQ + _psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ + _psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2) + _psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ + _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM + _psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ + _psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ + _psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ + _psi1 = variance * np.exp(_psi1_exp_sum) # NxM + _dL_dvariance = np.einsum('nm,nm->',dL_dpsi1, _psi1)/variance # 1 + _dL_dgamma = np.einsum('nm,nmq,nmq->nq',dL_dpsi1, _psi1_q, (_psi1_exp_dist_sq/_psi1_denom_sqrt[:,None,:]-_psi1_exp_Z)) # NxQ + _dL_dmu = np.einsum('nm, nmq, nmq, nmq, nq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_dist,_psi1_common) # NxQ + _dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ + _dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z)) + _dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z)) + + return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma + + def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + dL_dpsi2 - MxM + """ + # here are the "statistics" for psi2 + # Produced the derivatives w.r.t. psi2: + # _dL_dvariance 1 + # _dL_dlengthscale Q + # _dL_dZ MxQ + # _dL_dgamma NxQ + # _dL_dmu NxQ + # _dL_dS NxQ + + lengthscale2 = np.square(lengthscale) + + _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q + _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q + _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q + _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ + + # psi2 + _psi2_denom = 2.*S / lengthscale2 + 1. # NxQ + _psi2_denom_sqrt = np.sqrt(_psi2_denom) + _psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q + _psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom[:,None,None,:]) + _psi2_common = gamma/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # NxQ + _psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom[:,None,None,:])+np.log(gamma[:,None,None,:]) #N,M,M,Q + _psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ + _psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2) + _psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max)) + _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM + _psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ + _psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ + _psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ + _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM + _dL_dvariance = np.einsum('mo,mo->',dL_dpsi2,_psi2)*2./variance + _dL_dgamma = np.einsum('mo,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,(_psi2_exp_dist_sq/_psi2_denom_sqrt[:,None,None,:] - _psi2_exp_Z)) + _dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq) + _dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq) + _dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z)) + _dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z)) + + return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma From ca0523c6ee50f957eb237739511eba8265c43e80 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Tue, 11 Nov 2014 09:56:15 +0000 Subject: [PATCH 2/9] [setup] tweaks for submission --- setup.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/setup.py b/setup.py index e3593b3c..c4963bcc 100644 --- a/setup.py +++ b/setup.py @@ -20,6 +20,7 @@ setup(name = 'GPy', url = "http://sheffieldml.github.com/GPy/", packages = ["GPy.models", "GPy.inference.optimization", + "GPy.inference.mcmc", "GPy.inference", "GPy.inference.latent_function_inference", "GPy.likelihoods", "GPy.mappings", @@ -31,14 +32,20 @@ setup(name = 'GPy', "GPy.plotting.matplot_dep.latent_space_visualizations", "GPy.plotting.matplot_dep", "GPy.plotting"], package_dir={'GPy': 'GPy'}, - package_data = {'GPy': ['defaults.cfg', 'installation.cfg', 'util/data_resources.json', 'util/football_teams.json']}, + package_data = {'GPy': ['defaults.cfg', 'installation.cfg', + 'util/data_resources.json', + 'util/football_teams.json']}, + include_package_data = True, py_modules = ['GPy.__init__'], + test_suite = 'GPy.testing', long_description=read('README.md'), - install_requires=['numpy>=1.8', 'scipy>=0.14'], - extras_require = { - 'docs':['matplotlib>=1.4','Sphinx','ipython'], - }, - classifiers=[ - "License :: OSI Approved :: BSD License"], - zip_safe = False + install_requires=['numpy>=1.7', 'scipy>=0.12'], + extras_require = {'docs':['matplotlib >=1.3','Sphinx','IPython']}, + classifiers=['License :: OSI Approved :: BSD License', + 'Natural Language :: English', + 'Operating System :: MacOS :: MacOS X', + 'Operating System :: Microsoft :: Windows', + 'Operating System :: POSIX :: Linux', + 'Programming Language :: Python :: 2.7', + 'Topic :: Scientific/Engineering :: Artificial Intelligence'] ) From 1e6ed9f873f7518dee4490227f17dbe8e16a1bfd Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Tue, 11 Nov 2014 09:56:36 +0000 Subject: [PATCH 3/9] [linalg] ppca with missing data removed --- GPy/util/linalg.py | 99 ---------------------------------------------- 1 file changed, 99 deletions(-) diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index cef699cf..dffd438a 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -328,105 +328,6 @@ def ppca(Y, Q, iterations=100): pass return np.asarray_chkfinite(exp_x), np.asarray_chkfinite(W) -def ppca_missing_data_at_random(Y, Q, iters=100): - """ - EM implementation of Probabilistic pca for when there is missing data. - - Taken from - - .. math: - \\mathbf{Y} = \mathbf{XW} + \\epsilon \\text{, where} - \\epsilon = \\mathcal{N}(0, \\sigma^2 \mathbf{I}) - - :returns: X, W, sigma^2 - """ - from numpy.ma import dot as madot - import diag - from GPy.util.subarray_and_sorting import common_subarrays - import time - debug = 1 - # Initialise W randomly - N, D = Y.shape - W = np.random.randn(Q, D) * 1e-3 - Y = np.ma.masked_invalid(Y, copy=1) - nu = 1. - #num_obs_i = 1./Y.count() - Ycentered = Y - Y.mean(0) - - X = np.zeros((N,Q)) - cs = common_subarrays(Y.mask) - cr = common_subarrays(Y.mask, 1) - Sigma = np.zeros((N, Q, Q)) - Sigma2 = np.zeros((N, Q, Q)) - mu = np.zeros(D) - """ - if debug: - import matplotlib.pyplot as pylab - fig = pylab.figure("FIT MISSING DATA"); - ax = fig.gca() - ax.cla() - lines = pylab.plot(np.zeros((N,Q)).dot(W)) - """ - W2 = np.zeros((Q,D)) - - for i in range(iters): -# Sigma = np.linalg.solve(diag.add(madot(W,W.T), nu), diag.times(np.eye(Q),nu)) -# exp_x = madot(madot(Ycentered, W.T),Sigma)/nu -# Ycentered = (Y - exp_x.dot(W).mean(0)) -# #import ipdb;ipdb.set_trace() -# #Ycentered = mu -# W = np.linalg.solve(madot(exp_x.T,exp_x) + Sigma, madot(exp_x.T, Ycentered)) -# nu = (((Ycentered - madot(exp_x, W))**2).sum(0) + madot(W.T,madot(Sigma,W)).sum(0)).sum()/N - for csi, (mask, index) in enumerate(cs.iteritems()): - mask = ~np.array(mask) - Sigma2[index, :, :] = nu * np.linalg.inv(diag.add(W2[:,mask].dot(W2[:,mask].T), nu)) - #X[index,:] = madot((Sigma[csi]/nu),madot(W,Ycentered[index].T))[:,0] - X2 = ((Sigma2/nu) * (madot(Ycentered,W2.T).base)[:,:,None]).sum(-1) - mu2 = (Y - X.dot(W)).mean(0) - for n in range(N): - Sigma[n] = nu * np.linalg.inv(diag.add(W[:,~Y.mask[n]].dot(W[:,~Y.mask[n]].T), nu)) - X[n, :] = (Sigma[n]/nu).dot(W[:,~Y.mask[n]].dot(Ycentered[n,~Y.mask[n]].T)) - for d in range(D): - mu[d] = (Y[~Y.mask[:,d], d] - X[~Y.mask[:,d]].dot(W[:, d])).mean() - Ycentered = (Y - mu) - nu3 = 0. - for cri, (mask, index) in enumerate(cr.iteritems()): - mask = ~np.array(mask) - W2[:,index] = np.linalg.solve(X[mask].T.dot(X[mask]) + Sigma[mask].sum(0), madot(X[mask].T, Ycentered[mask,index]))[:,None] - W2[:,index] = np.linalg.solve(X.T.dot(X) + Sigma.sum(0), madot(X.T, Ycentered[:,index])) - #nu += (((Ycentered[mask,index] - X[mask].dot(W[:,index]))**2).sum(0) + W[:,index].T.dot(Sigma[mask].sum(0).dot(W[:,index])).sum(0)).sum() - nu3 += (((Ycentered[index] - X.dot(W[:,index]))**2).sum(0) + W[:,index].T.dot(Sigma.sum(0).dot(W[:,index])).sum(0)).sum() - nu3 /= N - nu = 0. - nu2 = 0. - W = np.zeros((Q,D)) - for j in range(D): - W[:,j] = np.linalg.solve(X[~Y.mask[:,j]].T.dot(X[~Y.mask[:,j]]) + Sigma[~Y.mask[:,j]].sum(0), madot(X[~Y.mask[:,j]].T, Ycentered[~Y.mask[:,j],j])) - nu2f = np.tensordot(W[:,j].T, Sigma[~Y.mask[:,j],:,:], [0,1]).dot(W[:,j]) - nu2s = W[:,j].T.dot(Sigma[~Y.mask[:,j],:,:].sum(0).dot(W[:,j])) - nu2 += (((Ycentered[~Y.mask[:,j],j] - X[~Y.mask[:,j],:].dot(W[:,j]))**2) + nu2f).sum() - for i in range(N): - if not Y.mask[i,j]: - nu += ((Ycentered[i,j] - X[i,:].dot(W[:,j]))**2) + W[:,j].T.dot(Sigma[i,:,:].dot(W[:,j])) - nu /= N - nu2 /= N - nu4 = (((Ycentered - X.dot(W))**2).sum(0) + W.T.dot(Sigma.sum(0).dot(W)).sum(0)).sum()/N - import ipdb;ipdb.set_trace() - """ - if debug: - #print Sigma[0] - print "nu:", nu, "sum(X):", X.sum() - pred_y = X.dot(W) - for x, l in zip(pred_y.T, lines): - l.set_ydata(x) - ax.autoscale_view() - ax.set_ylim(pred_y.min(), pred_y.max()) - fig.canvas.draw() - time.sleep(.3) - """ - return np.asarray_chkfinite(X), np.asarray_chkfinite(W), nu - - def tdot_numpy(mat, out=None): return np.dot(mat, mat.T, out) From 07a5e290dea76cafcd063c4f49affe4930cc34af Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Tue, 11 Nov 2014 10:20:34 +0000 Subject: [PATCH 4/9] [updates] starting to extract out standalone modules --- GPy/core/gp.py | 8 +- GPy/core/parameterization/observable.py | 69 ++++++++++ GPy/core/parameterization/observable_array.py | 5 +- GPy/core/parameterization/parameter_core.py | 124 ++---------------- GPy/core/parameterization/updateable.py | 63 +++++++++ GPy/util/caching.py | 4 +- 6 files changed, 149 insertions(+), 124 deletions(-) create mode 100644 GPy/core/parameterization/observable.py create mode 100644 GPy/core/parameterization/updateable.py diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 7835f8b9..0b59c0ea 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -96,7 +96,7 @@ class GP(Model): def set_XY(self, X=None, Y=None): """ Set the input / output of the model - + :param X: input observations :param Y: output observations """ @@ -384,16 +384,16 @@ class GP(Model): print "KeyboardInterrupt caught, calling on_optimization_end() to round things up" self.inference_method.on_optimization_end() raise - + def infer_newX(self, Y_new, optimize=True, ): """ Infer the distribution of X for the new observed data *Y_new*. - + :param Y_new: the new observed data for inference :type Y_new: numpy.ndarray :param optimize: whether to optimize the location of new X (True by default) :type optimize: boolean - :return: a tuple containing the posterior estimation of X and the model that optimize X + :return: a tuple containing the posterior estimation of X and the model that optimize X :rtype: (GPy.core.parameterization.variational.VariationalPosterior or numpy.ndarray, GPy.core.Model) """ from ..inference.latent_function_inference.inferenceX import infer_newX diff --git a/GPy/core/parameterization/observable.py b/GPy/core/parameterization/observable.py new file mode 100644 index 00000000..ad55b44c --- /dev/null +++ b/GPy/core/parameterization/observable.py @@ -0,0 +1,69 @@ +''' +Created on 30 Oct 2014 + +@author: maxz +''' + + +class Observable(object): + """ + Observable pattern for parameterization. + + This Object allows for observers to register with self and a (bound!) function + as an observer. Every time the observable changes, it sends a notification with + self as only argument to all its observers. + """ + def __init__(self, *args, **kwargs): + super(Observable, self).__init__() + from lists_and_dicts import ObserverList + self.observers = ObserverList() + + def add_observer(self, observer, callble, priority=0): + """ + Add an observer `observer` with the callback `callble` + and priority `priority` to this observers list. + """ + self.observers.add(priority, observer, callble) + + def remove_observer(self, observer, callble=None): + """ + Either (if callble is None) remove all callables, + which were added alongside observer, + or remove callable `callble` which was added alongside + the observer `observer`. + """ + to_remove = [] + for poc in self.observers: + _, obs, clble = poc + if callble is not None: + if (obs is observer) and (callble == clble): + to_remove.append(poc) + else: + if obs is observer: + to_remove.append(poc) + for r in to_remove: + self.observers.remove(*r) + + def notify_observers(self, which=None, min_priority=None): + """ + Notifies all observers. Which is the element, which kicked off this + notification loop. The first argument will be self, the second `which`. + + NOTE: notifies only observers with priority p > min_priority! + ^^^^^^^^^^^^^^^^ + :param min_priority: only notify observers with priority > min_priority + if min_priority is None, notify all observers in order + """ + if which is None: + which = self + if min_priority is None: + [callble(self, which=which) for _, _, callble in self.observers] + else: + for p, _, callble in self.observers: + if p <= min_priority: + break + callble(self, which=which) + + def change_priority(self, observer, callble, priority): + self.remove_observer(observer, callble) + self.add_observer(observer, callble, priority) \ No newline at end of file diff --git a/GPy/core/parameterization/observable_array.py b/GPy/core/parameterization/observable_array.py index 1253caae..4a7bdf85 100644 --- a/GPy/core/parameterization/observable_array.py +++ b/GPy/core/parameterization/observable_array.py @@ -1,10 +1,11 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -__updated__ = '2014-10-29' +__updated__ = '2014-11-11' import numpy as np -from parameter_core import Observable, Pickleable +from parameter_core import Pickleable +from observable import Observable class ObsAr(np.ndarray, Pickleable, Observable): """ diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index f8f7f7cc..4048fbae 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -17,6 +17,7 @@ from transformations import Transformation,Logexp, NegativeLogexp, Logistic, __f import numpy as np import re import logging +from updateable import Updateable class HierarchyError(Exception): """ @@ -40,115 +41,6 @@ def adjust_name_for_printing(name): return '' -class Observable(object): - """ - Observable pattern for parameterization. - - This Object allows for observers to register with self and a (bound!) function - as an observer. Every time the observable changes, it sends a notification with - self as only argument to all its observers. - """ - _updates = True - def __init__(self, *args, **kwargs): - super(Observable, self).__init__() - from lists_and_dicts import ObserverList - self.observers = ObserverList() - - @property - def updates(self): - raise DeprecationWarning("updates is now a function, see update(True|False|None)") - - @updates.setter - def updates(self, ups): - raise DeprecationWarning("updates is now a function, see update(True|False|None)") - - def update_model(self, updates=None): - """ - Get or set, whether automatic updates are performed. When updates are - off, the model might be in a non-working state. To make the model work - turn updates on again. - - :param bool|None updates: - - bool: whether to do updates - None: get the current update state - """ - if updates is None: - p = getattr(self, '_highest_parent_', None) - if p is not None: - self._updates = p._updates - return self._updates - assert isinstance(updates, bool), "updates are either on (True) or off (False)" - p = getattr(self, '_highest_parent_', None) - if p is not None: - p._updates = updates - else: - self._updates = updates - self.trigger_update() - - def toggle_update(self): - self.update_model(not self.update()) - - def trigger_update(self, trigger_parent=True): - """ - Update the model from the current state. - Make sure that updates are on, otherwise this - method will do nothing - - :param bool trigger_parent: Whether to trigger the parent, after self has updated - """ - if not self.update_model() or (hasattr(self, "_in_init_") and self._in_init_): - #print "Warning: updates are off, updating the model will do nothing" - return - self._trigger_params_changed(trigger_parent) - - def add_observer(self, observer, callble, priority=0): - """ - Add an observer `observer` with the callback `callble` - and priority `priority` to this observers list. - """ - self.observers.add(priority, observer, callble) - - def remove_observer(self, observer, callble=None): - """ - Either (if callble is None) remove all callables, - which were added alongside observer, - or remove callable `callble` which was added alongside - the observer `observer`. - """ - to_remove = [] - for poc in self.observers: - _, obs, clble = poc - if callble is not None: - if (obs is observer) and (callble == clble): - to_remove.append(poc) - else: - if obs is observer: - to_remove.append(poc) - for r in to_remove: - self.observers.remove(*r) - - def notify_observers(self, which=None, min_priority=None): - """ - Notifies all observers. Which is the element, which kicked off this - notification loop. The first argument will be self, the second `which`. - - NOTE: notifies only observers with priority p > min_priority! - ^^^^^^^^^^^^^^^^ - :param min_priority: only notify observers with priority > min_priority - if min_priority is None, notify all observers in order - """ - if not self.update_model(): - return - if which is None: - which = self - if min_priority is None: - [callble(self, which=which) for _, _, callble in self.observers] - else: - for p, _, callble in self.observers: - if p <= min_priority: - break - callble(self, which=which) class Parentable(object): """ @@ -307,11 +199,11 @@ class Gradcheckable(Pickleable, Parentable): :param float step: the stepsize for the numerical three point gradient estimate. :param float tolerance: the tolerance for the gradient ratio or difference. :param float df_tolerance: the tolerance for df_tolerance - + Note:- - The *dF_ratio* indicates the limit of accuracy of numerical gradients. - If it is too small, e.g., smaller than 1e-12, the numerical gradients - are usually not accurate enough for the tests (shown with blue). + The *dF_ratio* indicates the limit of accuracy of numerical gradients. + If it is too small, e.g., smaller than 1e-12, the numerical gradients + are usually not accurate enough for the tests (shown with blue). """ if self.has_parent(): return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance, df_tolerance=df_tolerance) @@ -363,7 +255,7 @@ class Nameable(Gradcheckable): return adjust(self.name) -class Indexable(Nameable, Observable): +class Indexable(Nameable, Updateable): """ Make an object constrainable with Priors and Transformations. TODO: Mappings!! @@ -726,7 +618,7 @@ class OptimizationHandlable(Indexable): fixes[self.constraints[__fixed__]] = FIXED return self._optimizer_copy_[np.logical_and(fixes, self._highest_parent_.tie.getTieFlag(self))] elif self._has_fixes(): - return self._optimizer_copy_[self._fixes_] + return self._optimizer_copy_[self._fixes_] self._optimizer_copy_transformed = True @@ -757,7 +649,7 @@ class OptimizationHandlable(Indexable): #self._highest_parent_.tie.propagate_val() self._optimizer_copy_transformed = False - self._trigger_params_changed() + self.trigger_update() def _get_params_transformed(self): raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!" diff --git a/GPy/core/parameterization/updateable.py b/GPy/core/parameterization/updateable.py new file mode 100644 index 00000000..daf07d3c --- /dev/null +++ b/GPy/core/parameterization/updateable.py @@ -0,0 +1,63 @@ +''' +Created on 11 Nov 2014 + +@author: maxz +''' +from observable import Observable + + +class Updateable(Observable): + """ + A model can be updated or not. + Make sure updates can be switched on and off. + """ + _updates = True + def __init__(self, *args, **kwargs): + super(Updateable, self).__init__(*args, **kwargs) + + @property + def updates(self): + raise DeprecationWarning("updates is now a function, see update(True|False|None)") + + @updates.setter + def updates(self, ups): + raise DeprecationWarning("updates is now a function, see update(True|False|None)") + + def update_model(self, updates=None): + """ + Get or set, whether automatic updates are performed. When updates are + off, the model might be in a non-working state. To make the model work + turn updates on again. + + :param bool|None updates: + + bool: whether to do updates + None: get the current update state + """ + if updates is None: + p = getattr(self, '_highest_parent_', None) + if p is not None: + self._updates = p._updates + return self._updates + assert isinstance(updates, bool), "updates are either on (True) or off (False)" + p = getattr(self, '_highest_parent_', None) + if p is not None: + p._updates = updates + self._updates = updates + self.trigger_update() + + def toggle_update(self): + self.update_model(not self.update_model()) + + def trigger_update(self, trigger_parent=True): + """ + Update the model from the current state. + Make sure that updates are on, otherwise this + method will do nothing + + :param bool trigger_parent: Whether to trigger the parent, after self has updated + """ + if not self.update_model() or (hasattr(self, "_in_init_") and self._in_init_): + #print "Warning: updates are off, updating the model will do nothing" + return + self._trigger_params_changed(trigger_parent) diff --git a/GPy/util/caching.py b/GPy/util/caching.py index 85c4caa8..6e954fc7 100644 --- a/GPy/util/caching.py +++ b/GPy/util/caching.py @@ -1,5 +1,5 @@ -from ..core.parameterization.parameter_core import Observable -import collections, weakref, logging +from ..core.parameterization.observable import Observable +import collections, weakref class Cacher(object): def __init__(self, operation, limit=5, ignore_args=(), force_kwargs=()): From 5736caa8fd4fa0eb5435a9e6d11ea0a165abc17e Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Tue, 11 Nov 2014 15:41:35 +0000 Subject: [PATCH 5/9] add the function of saving all the parameters into a HDF5 file --- GPy/core/parameterization/parameter_core.py | 25 +++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 4048fbae..a8f11606 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -1009,3 +1009,28 @@ class Parameterizable(OptimizationHandlable): updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer`` """ pass + + def save_params_H5(self, filename): + """ + Save all the model parameters into a HDF5 file. + """ + from . import Param + from ...util.misc import param_to_array + def gather_params(self, plist): + if isinstance(self,Param): + plist.append(self) + plist = [] + self.traverse(gather_params, plist) + names = self.parameter_names(adjust_for_printing=True) + try: + import h5py + f = h5py.File(filename,'w') + for p,n in zip(plist,names): + n = n.replace('.','_') + p = param_to_array(p) + d = f.create_dataset(n,p.shape,dtype=p.dtype) + d[:] = p + f.close() + except: + raise 'Fails to write the parameters into a HDF5 file!' + From 0018b5e583f9b8ad661ab0650e3577489ee655d9 Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Tue, 11 Nov 2014 15:44:50 +0000 Subject: [PATCH 6/9] rename the save_params_H5 function to be a general function save which can potentially support other file format --- GPy/core/parameterization/parameter_core.py | 27 +++++++++++---------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index a8f11606..6add95b0 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -1010,9 +1010,9 @@ class Parameterizable(OptimizationHandlable): """ pass - def save_params_H5(self, filename): + def save(self, filename, ftype='HDF5'): """ - Save all the model parameters into a HDF5 file. + Save all the model parameters into a file (HDF5 by default). """ from . import Param from ...util.misc import param_to_array @@ -1022,15 +1022,16 @@ class Parameterizable(OptimizationHandlable): plist = [] self.traverse(gather_params, plist) names = self.parameter_names(adjust_for_printing=True) - try: - import h5py - f = h5py.File(filename,'w') - for p,n in zip(plist,names): - n = n.replace('.','_') - p = param_to_array(p) - d = f.create_dataset(n,p.shape,dtype=p.dtype) - d[:] = p - f.close() - except: - raise 'Fails to write the parameters into a HDF5 file!' + if ftype=='HDF5': + try: + import h5py + f = h5py.File(filename,'w') + for p,n in zip(plist,names): + n = n.replace('.','_') + p = param_to_array(p) + d = f.create_dataset(n,p.shape,dtype=p.dtype) + d[:] = p + f.close() + except: + raise 'Fails to write the parameters into a HDF5 file!' From a5c3e88f83d618b9cd8bd828b1acafe6b0743ff8 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Wed, 12 Nov 2014 11:34:44 +0000 Subject: [PATCH 7/9] [MRD] init and sim nicer now --- GPy/examples/dimensionality_reduction.py | 88 ++++++++++++------------ GPy/models/bayesian_gplvm_minibatch.py | 13 ++-- GPy/models/mrd.py | 31 ++++----- GPy/models/sparse_gp_minibatch.py | 1 + 4 files changed, 67 insertions(+), 66 deletions(-) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 1da855bc..dc0b3dea 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -2,7 +2,7 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as _np -#default_seed = _np.random.seed(123344) +# default_seed = _np.random.seed(123344) def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan=False): """ @@ -28,7 +28,7 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T # k = GPy.kern.RBF_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) - #k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) + # k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) # k = GPy.kern.RBF(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001) # k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.RBF(input_dim, .3, _np.ones(input_dim) * .2, ARD=True) # k = GPy.kern.RBF(input_dim, .5, 2., ARD=0) + GPy.kern.RBF(input_dim, .3, .2, ARD=0) @@ -40,30 +40,30 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan if nan: m.inference_method = GPy.inference.latent_function_inference.var_dtc.VarDTCMissingData() - m.Y[_np.random.binomial(1,p,size=(Y.shape)).astype(bool)] = _np.nan + m.Y[_np.random.binomial(1, p, size=(Y.shape)).astype(bool)] = _np.nan m.parameters_changed() #=========================================================================== # randomly obstruct data with percentage p #=========================================================================== - #m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing) - #m.lengthscales = lengthscales + # m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing) + # m.lengthscales = lengthscales if plot: import matplotlib.pyplot as pb m.plot() pb.title('PCA initialisation') - #m2.plot() - #pb.title('PCA initialisation') + # m2.plot() + # pb.title('PCA initialisation') if optimize: m.optimize('scg', messages=verbose) - #m2.optimize('scg', messages=verbose) + # m2.optimize('scg', messages=verbose) if plot: m.plot() pb.title('After optimisation') - #m2.plot() - #pb.title('After optimisation') + # m2.plot() + # pb.title('After optimisation') return m @@ -169,7 +169,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, data = GPy.util.datasets.oil() - kernel = GPy.kern.RBF(Q, 1., 1./_np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2)) + kernel = GPy.kern.RBF(Q, 1., 1. / _np.random.uniform(0, 1, (Q,)), ARD=True) # + GPy.kern.Bias(Q, _np.exp(-2)) Y = data['X'][:N] m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k) m.data_labels = data['Y'][:N].argmax(axis=1) @@ -180,8 +180,8 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, if plot: fig, (latent_axes, sense_axes) = plt.subplots(1, 2) m.plot_latent(ax=latent_axes, labels=m.data_labels) - data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0,:])) - lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean.values[0:1,:], # @UnusedVariable + data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0, :])) + lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean.values[0:1, :], # @UnusedVariable m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels) raw_input('Press enter to finish') plt.close(fig) @@ -195,7 +195,7 @@ def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40 _np.random.seed(0) data = pods.datasets.oil() - kernel = GPy.kern.RBF(Q, 1., 1./_np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2)) + kernel = GPy.kern.RBF(Q, 1., 1. / _np.random.uniform(0, 1, (Q,)), ARD=True) # + GPy.kern.Bias(Q, _np.exp(-2)) Y = data['X'][:N] m = GPy.models.SSGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k) m.data_labels = data['Y'][:N].argmax(axis=1) @@ -206,8 +206,8 @@ def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40 if plot: fig, (latent_axes, sense_axes) = plt.subplots(1, 2) m.plot_latent(ax=latent_axes, labels=m.data_labels) - data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0,:])) - lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean.values[0:1,:], # @UnusedVariable + data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0, :])) + lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean.values[0:1, :], # @UnusedVariable m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels) raw_input('Press enter to finish') plt.close(fig) @@ -219,10 +219,12 @@ def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False): import numpy as np np.random.seed(3000) - k = GPy.kern.Matern32(Q_signal, 10., lengthscale=1+(np.random.uniform(1,6,Q_signal)), ARD=1) - t = np.c_[[np.linspace(-1,5,N) for _ in range(Q_signal)]].T + k = GPy.kern.Matern32(Q_signal, 1., lengthscale=(np.random.uniform(1, 6, Q_signal)), ARD=1) + for i in range(Q_signal): + k += GPy.kern.PeriodicExponential(1, variance=1., active_dims=[i], period=3., lower=-2, upper=6) + t = np.c_[[np.linspace(-1, 5, N) for _ in range(Q_signal)]].T K = k.K(t) - s2, s1, s3, sS = np.random.multivariate_normal(np.zeros(K.shape[0]), K, size=(4))[:,:,None] + s2, s1, s3, sS = np.random.multivariate_normal(np.zeros(K.shape[0]), K, size=(4))[:, :, None] Y1, Y2, Y3, S1, S2, S3 = _generate_high_dimensional_output(D1, D2, D3, s1, s2, s3, sS) @@ -243,7 +245,7 @@ def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False): ax.legend() for i, Y in enumerate(Ylist): ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i) - ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable + ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable ax.set_title("Y{}".format(i + 1)) plt.draw() plt.tight_layout() @@ -255,7 +257,7 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim=False): x = _np.linspace(0, 4 * _np.pi, N)[:, None] s1 = _np.vectorize(lambda x: _np.sin(x)) - s2 = _np.vectorize(lambda x: _np.cos(x)**2) + s2 = _np.vectorize(lambda x: _np.cos(x) ** 2) s3 = _np.vectorize(lambda x:-_np.exp(-_np.cos(2 * x))) sS = _np.vectorize(lambda x: _np.cos(x)) @@ -288,7 +290,7 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim=False): ax.legend() for i, Y in enumerate(Ylist): ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i) - ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable + ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable ax.set_title("Y{}".format(i + 1)) plt.draw() plt.tight_layout() @@ -323,10 +325,10 @@ def bgplvm_simulation(optimize=True, verbose=1, D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 3, 9 _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim) Y = Ylist[0] - k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) - #k = kern.RBF(Q, ARD=True, lengthscale=10.) + k = kern.Linear(Q, ARD=True) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) + # k = kern.RBF(Q, ARD=True, lengthscale=10.) m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k) - m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape) + m.X.variance[:] = _np.random.uniform(0, .01, m.X.shape) m.likelihood.variance = .1 if optimize: @@ -348,10 +350,10 @@ def ssgplvm_simulation(optimize=True, verbose=1, D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 3, 9 _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim) Y = Ylist[0] - k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) - #k = kern.RBF(Q, ARD=True, lengthscale=10.) + k = kern.Linear(Q, ARD=True) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) + # k = kern.RBF(Q, ARD=True, lengthscale=10.) m = SSGPLVM(Y, Q, init="pca", num_inducing=num_inducing, kernel=k) - m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape) + m.X.variance[:] = _np.random.uniform(0, .01, m.X.shape) m.likelihood.variance = .1 if optimize: @@ -365,7 +367,7 @@ def ssgplvm_simulation(optimize=True, verbose=1, def bgplvm_simulation_missing_data(optimize=True, verbose=1, plot=True, plot_sim=False, - max_iters=2e4, + max_iters=2e4, percent_missing=.1, ): from GPy import kern from GPy.models.bayesian_gplvm_minibatch import BayesianGPLVMMiniBatch @@ -373,9 +375,9 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1, D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4 _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim) Y = Ylist[0] - k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) + k = kern.Linear(Q, ARD=True) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) - inan = _np.random.binomial(1, .2, size=Y.shape).astype(bool) # 80% missing data + inan = _np.random.binomial(1, percent_missing, size=Y.shape).astype(bool) # 80% missing data Ymissing = Y.copy() Ymissing[inan] = _np.nan @@ -401,11 +403,11 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5 _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim) - #Ylist = [Ylist[0]] + # Ylist = [Ylist[0]] k = kern.Linear(Q, ARD=True) m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, initx="PCA_concat", initz='permute', **kw) - m['.*noise'] = [Y.var()/40. for Y in Ylist] + m['.*noise'] = [Y.var() / 40. for Y in Ylist] if optimize: print "Optimizing Model:" @@ -422,7 +424,7 @@ def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5 _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim) - #Ylist = [Ylist[0]] + # Ylist = [Ylist[0]] k = kern.Linear(Q, ARD=True) inanlist = [] @@ -553,7 +555,7 @@ def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True): data = pods.datasets.osu_run1() # optimize - back_kernel=GPy.kern.RBF(data['Y'].shape[1], lengthscale=5.) + back_kernel = GPy.kern.RBF(data['Y'].shape[1], lengthscale=5.) mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel) m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping) if optimize: m.optimize(messages=verbose, max_f_eval=10000) @@ -563,7 +565,7 @@ def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True): y = m.likelihood.Y[0, :] data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect']) GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) - #raw_input('Press enter to finish') + # raw_input('Press enter to finish') return m @@ -627,7 +629,7 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose Y = data['Y'] Y_mean = Y.mean(0) Y_std = Y.std(0) - m = GPy.models.GPLVM((Y-Y_mean)/Y_std, 2) + m = GPy.models.GPLVM((Y - Y_mean) / Y_std, 2) if optimize: m.optimize(messages=verbose, max_f_eval=10000) if plot: @@ -651,17 +653,17 @@ def ssgplvm_simulation_linear(): x = np.empty(Q) dies = np.random.rand(Q) for q in xrange(Q): - if dies[q]'.format(hex(id(i)))) self.Z.gradient[:] += b.full_values['Zgrad'] - grad_dict = b.grad_dict + grad_dict = b.full_values - if isinstance(self.X, VariationalPosterior): - dL_dmean, dL_dS = b.kern.gradients_qX_expectations( - grad_dict['dL_dpsi0'], - grad_dict['dL_dpsi1'], - grad_dict['dL_dpsi2'], - self.Z, self.X) - self.X.mean.gradient += dL_dmean - self.X.variance.gradient += dL_dS - - else: - #gradients wrt kernel - self.X.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'], self.X, self.Z) + self.X.mean.gradient += grad_dict['meangrad'] + self.X.variance.gradient += grad_dict['vargrad'] if isinstance(self.X, VariationalPosterior): # update for the KL divergence @@ -238,7 +235,7 @@ class MRD(BayesianGPLVMMiniBatch): pass if axes is None: ax = fig.add_subplot(1, len(self.bgplvms), i + 1, sharex=sharex_ax, sharey=sharey_ax) - elif isinstance(axes, (tuple, list)): + elif isinstance(axes, (tuple, list, np.ndarray)): ax = axes[i] else: raise ValueError("Need one axes per latent dimension input_dim") @@ -286,7 +283,7 @@ class MRD(BayesianGPLVMMiniBatch): titles = [r'${}$'.format(name) for name in self.names] ymax = reduce(max, [np.ceil(max(g.kern.input_sensitivity())) for g in self.bgplvms]) def plotf(i, g, ax): - ax.set_ylim([0,ymax]) + #ax.set_ylim([0,ymax]) return g.kern.plot_ARD(ax=ax, title=titles[i], *args, **kwargs) fig = self._handle_plotting(fignum, ax, plotf, sharex=sharex, sharey=sharey) return fig diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py index 00929462..ec2e28f5 100644 --- a/GPy/models/sparse_gp_minibatch.py +++ b/GPy/models/sparse_gp_minibatch.py @@ -56,6 +56,7 @@ Created on 3 Nov 2014 raise NotImplementedError, "what to do what to do?" print "defaulting to ", inference_method, "for latent function inference" + self.kl_factr = 1. self.Z = Param('inducing inputs', Z) self.num_inducing = Z.shape[0] From 3c642a5600d8dca0f9ea12c5b3c521f46e09e03d Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Wed, 12 Nov 2014 15:08:38 +0000 Subject: [PATCH 8/9] [MRD] updates and nicer plotting --- GPy/__init__.py | 9 +++++++-- GPy/core/model.py | 12 ++++++++---- GPy/core/parameterization/transformations.py | 7 +++++++ GPy/models/bayesian_gplvm_minibatch.py | 2 ++ GPy/models/mrd.py | 2 +- GPy/plotting/matplot_dep/dim_reduction_plots.py | 2 +- 6 files changed, 26 insertions(+), 8 deletions(-) diff --git a/GPy/__init__.py b/GPy/__init__.py index 99a91fe8..5e091170 100644 --- a/GPy/__init__.py +++ b/GPy/__init__.py @@ -39,6 +39,11 @@ def load(file_path): :param file_name: path/to/file.pickle """ import cPickle as pickle - with open(file_path, 'rb') as f: - m = pickle.load(f) + try: + with open(file_path, 'rb') as f: + m = pickle.load(f) + except: + import pickle as pickle + with open(file_path, 'rb') as f: + m = pickle.load(f) return m diff --git a/GPy/core/model.py b/GPy/core/model.py index 029b8fc8..ac5a9732 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -238,6 +238,10 @@ class Model(Parameterized): if self.size == 0: print 'nothing to optimize' + if not self.update_model(): + print "setting updates on again" + self.update_model(True) + if start == None: start = self.optimizer_array @@ -279,10 +283,10 @@ class Model(Parameterized): Note:- The gradient is considered correct if the ratio of the analytical and numerical gradients is within of unity. - - The *dF_ratio* indicates the limit of numerical accuracy of numerical gradients. - If it is too small, e.g., smaller than 1e-12, the numerical gradients are usually - not accurate enough for the tests (shown with blue). + + The *dF_ratio* indicates the limit of numerical accuracy of numerical gradients. + If it is too small, e.g., smaller than 1e-12, the numerical gradients are usually + not accurate enough for the tests (shown with blue). """ x = self.optimizer_array.copy() diff --git a/GPy/core/parameterization/transformations.py b/GPy/core/parameterization/transformations.py index 01b0f324..291076a1 100644 --- a/GPy/core/parameterization/transformations.py +++ b/GPy/core/parameterization/transformations.py @@ -131,6 +131,13 @@ class NormalTheta(Transformation): def __str__(self): return "theta" + def __getstate__(self): + return [self.mu_indices, self.var_indices] + + def __setstate__(self, state): + self.mu_indices = state[0] + self.var_indices = state[1] + class NormalNaturalAntti(NormalTheta): _instances = [] _logexp = Logexp() diff --git a/GPy/models/bayesian_gplvm_minibatch.py b/GPy/models/bayesian_gplvm_minibatch.py index 5b7d3162..80abba59 100644 --- a/GPy/models/bayesian_gplvm_minibatch.py +++ b/GPy/models/bayesian_gplvm_minibatch.py @@ -54,6 +54,8 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): self.variational_prior = NormalPrior() X = NormalPosterior(X, X_variance) + self.kl_factr = 1. + if inference_method is None: from ..inference.latent_function_inference.var_dtc import VarDTC self.logger.debug("creating inference_method var_dtc") diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index 91d34aa2..645cdf88 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -100,7 +100,7 @@ class MRD(BayesianGPLVMMiniBatch): self.logger.info("building kernels") if kernel is None: from ..kern import RBF - kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i]) for i in range(len(Ylist))] + kernels = [RBF(input_dim, ARD=1, lengthscale=1./fracs[i]) for i in range(len(Ylist))] elif isinstance(kernel, Kern): kernels = [] for i in range(len(Ylist)): diff --git a/GPy/plotting/matplot_dep/dim_reduction_plots.py b/GPy/plotting/matplot_dep/dim_reduction_plots.py index 7ec0b59d..25a1166f 100644 --- a/GPy/plotting/matplot_dep/dim_reduction_plots.py +++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py @@ -162,7 +162,7 @@ def plot_latent(model, labels=None, which_indices=None, else: x = X[index, input_1] y = X[index, input_2] - ax.scatter(x, y, marker=m, s=s, c=Tango.nextMedium(), label=this_label, linewidth=.5, edgecolor='k', alpha=.9) + ax.scatter(x, y, marker=m, s=s, c=Tango.nextMedium(), label=this_label, linewidth=.2, edgecolor='k', alpha=.9) ax.set_xlabel('latent dimension %i' % input_1) ax.set_ylabel('latent dimension %i' % input_2) From 7f7b0da7b9e6dd873323781c2497fc166a57c74c Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Wed, 12 Nov 2014 20:19:56 +0000 Subject: [PATCH 9/9] spike and slab binary variable numerical enhancement --- GPy/core/parameterization/variational.py | 30 ++++++++++++++++++++---- GPy/kern/_src/psi_comp/ssrbf_psi_comp.py | 20 +++++++--------- GPy/models/ss_gplvm.py | 5 +--- 3 files changed, 34 insertions(+), 21 deletions(-) diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py index e2a24008..bb7678ff 100644 --- a/GPy/core/parameterization/variational.py +++ b/GPy/core/parameterization/variational.py @@ -8,6 +8,8 @@ import numpy as np from parameterized import Parameterized from param import Param from transformations import Logexp, Logistic,__fixed__ +from GPy.util.misc import param_to_array +from GPy.util.caching import Cache_this class VariationalPrior(Parameterized): def __init__(self, name='latent space', **kw): @@ -48,7 +50,8 @@ class SpikeAndSlabPrior(VariationalPrior): def KL_divergence(self, variational_posterior): mu = variational_posterior.mean S = variational_posterior.variance - gamma = variational_posterior.binary_prob + gamma,gamma1 = variational_posterior.gamma_probabilities() + log_gamma,log_gamma1 = variational_posterior.gamma_log_prob() if len(self.pi.shape)==2: idx = np.unique(gamma._raveled_index()/gamma.shape[-1]) pi = self.pi[idx] @@ -57,20 +60,21 @@ class SpikeAndSlabPrior(VariationalPrior): var_mean = np.square(mu)/self.variance var_S = (S/self.variance - np.log(S)) - var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum() + var_gamma = (gamma*(log_gamma-np.log(pi))).sum()+(gamma1*(log_gamma1-np.log(1-pi))).sum() return var_gamma+ (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2. def update_gradients_KL(self, variational_posterior): mu = variational_posterior.mean S = variational_posterior.variance - gamma = variational_posterior.binary_prob + gamma,gamma1 = variational_posterior.gamma_probabilities() + log_gamma,log_gamma1 = variational_posterior.gamma_log_prob() if len(self.pi.shape)==2: idx = np.unique(gamma._raveled_index()/gamma.shape[-1]) pi = self.pi[idx] else: pi = self.pi - gamma.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2. + variational_posterior.binary_prob.gradient -= (np.log((1-pi)/pi)+log_gamma-log_gamma1+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.)*gamma*gamma1 mu.gradient -= gamma*mu/self.variance S.gradient -= (1./self.variance - 1./S) * gamma /2. if self.learnPi: @@ -158,8 +162,24 @@ class SpikeAndSlabPosterior(VariationalPosterior): binary_prob : the probability of the distribution on the slab part. """ super(SpikeAndSlabPosterior, self).__init__(means, variances, name) - self.gamma = Param("binary_prob",binary_prob, Logistic(1e-10,1.-1e-10)) + self.gamma = Param("binary_prob",binary_prob) self.link_parameter(self.gamma) + + @Cache_this(limit=5) + def gamma_probabilities(self): + prob = np.zeros_like(param_to_array(self.gamma)) + prob[self.gamma>-710] = 1./(1.+np.exp(-self.gamma[self.gamma>-710])) + prob1 = np.zeros_like(param_to_array(self.gamma)) + prob1[self.gamma<710] = 1./(1.+np.exp(self.gamma[self.gamma<710])) + return prob, prob1 + + @Cache_this(limit=5) + def gamma_log_prob(self): + loggamma = param_to_array(self.gamma).copy() + loggamma[loggamma>-40] = -np.log1p(np.exp(-loggamma[loggamma>-40])) + loggamma1 = param_to_array(self.gamma).copy() + loggamma1[loggamma1<40] = -np.log1p(np.exp(loggamma1[loggamma1<40])) + return loggamma,loggamma1 def set_gradients(self, grad): self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad diff --git a/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py b/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py index f6a24c86..18a4d751 100644 --- a/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py +++ b/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py @@ -22,14 +22,12 @@ try: # _psi1 NxM mu = variational_posterior.mean S = variational_posterior.variance - gamma = variational_posterior.binary_prob N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1] l2 = np.square(lengthscale) log_denom1 = np.log(S/l2+1) log_denom2 = np.log(2*S/l2+1) - log_gamma = np.log(gamma) - log_gamma1 = np.log(1.-gamma) + log_gamma,log_gamma1 = variational_posterior.gamma_log_prob() variance = float(variance) psi0 = np.empty(N) psi0[:] = variance @@ -39,7 +37,6 @@ try: from ....util.misc import param_to_array S = param_to_array(S) mu = param_to_array(mu) - gamma = param_to_array(gamma) Z = param_to_array(Z) support_code = """ @@ -82,7 +79,7 @@ try: } } """ - weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz) + weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz) psi2 = psi2n.sum(axis=0) return psi0,psi1,psi2,psi2n @@ -97,13 +94,12 @@ try: mu = variational_posterior.mean S = variational_posterior.variance - gamma = variational_posterior.binary_prob N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1] l2 = np.square(lengthscale) log_denom1 = np.log(S/l2+1) log_denom2 = np.log(2*S/l2+1) - log_gamma = np.log(gamma) - log_gamma1 = np.log(1.-gamma) + log_gamma,log_gamma1 = variational_posterior.gamma_log_prob() + gamma, gamma1 = variational_posterior.gamma_probabilities() variance = float(variance) dvar = np.zeros(1) @@ -117,7 +113,6 @@ try: from ....util.misc import param_to_array S = param_to_array(S) mu = param_to_array(mu) - gamma = param_to_array(gamma) Z = param_to_array(Z) support_code = """ @@ -135,6 +130,7 @@ try: double Zm1q = Z(m1,q); double Zm2q = Z(m2,q); double gnq = gamma(n,q); + double g1nq = gamma1(n,q); double mu_nq = mu(n,q); if(m2==0) { @@ -160,7 +156,7 @@ try: dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum); dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.; - dgamma(n,q) += lpsi1*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum; + dgamma(n,q) += lpsi1*(d_exp1*g1nq-d_exp2*gnq)/exp_sum; dl(q) += lpsi1*((Zmu2_denom+Snq/lq)/denom*d_exp1+Zm1q*Zm1q/(lq*lq)*d_exp2)/(2.*exp_sum); dZ(m1,q) += lpsi1*(-Zmu/denom*d_exp1-Zm1q/lq*d_exp2)/exp_sum; } @@ -188,7 +184,7 @@ try: dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum; dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum; - dgamma(n,q) += lpsi2*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum; + dgamma(n,q) += lpsi2*(d_exp1*g1nq-d_exp2*gnq)/exp_sum; dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum; dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum; } @@ -196,7 +192,7 @@ try: } } """ - weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz) + weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','gamma1','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz) dl *= 2.*lengthscale if not ARD: diff --git a/GPy/models/ss_gplvm.py b/GPy/models/ss_gplvm.py index 04006d84..a61ad2a0 100644 --- a/GPy/models/ss_gplvm.py +++ b/GPy/models/ss_gplvm.py @@ -39,10 +39,7 @@ class SSGPLVM(SparseGP_MPI): X_variance = np.random.uniform(0,.1,X.shape) if Gamma is None: - gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation - gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim) - gamma[gamma>1.-1e-9] = 1.-1e-9 - gamma[gamma<1e-9] = 1e-9 + gamma = np.random.randn(X.shape[0], input_dim) else: gamma = Gamma.copy()