Overwrite my changes with James's.

This commit is contained in:
Neil Lawrence 2013-05-14 09:26:20 +01:00
commit 784f7349b9
16 changed files with 408 additions and 325 deletions

View file

@ -92,7 +92,7 @@ def BGPLVM_oil(optimize=True, N=100, Q=10, M=20, max_f_eval=300, plot=False):
plt.sca(latent_axes)
m.plot_latent()
data_show = GPy.util.visualize.vector_show(y)
lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], m, data_show, latent_axes=latent_axes) # , sense_axes=sense_axes)
lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes) # , sense_axes=sense_axes)
raw_input('Press enter to finish')
plt.close('all')
# # plot
@ -376,7 +376,7 @@ def brendan_faces():
ax = m.plot_latent()
y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :], m, data_show, ax)
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish')
plt.close('all')
@ -389,11 +389,12 @@ def stick():
# optimize
m.ensure_default_constraints()
m.optimize(messages=1, max_f_eval=10000)
m._set_params(m._get_params())
ax = m.plot_latent()
y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :], m, data_show, ax)
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish')
plt.close('all')
@ -415,7 +416,7 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True):
ax = m.plot_latent()
y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.skeleton_show(y[None, :], data['skel'])
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :], m, data_show, ax)
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish')
plt.close('all')

View file

@ -97,51 +97,66 @@ class opt_SGD(Optimizer):
return subset
def shift_constraints(self, j):
# back them up
bounded_i = copy.deepcopy(self.model.constrained_bounded_indices)
bounded_l = copy.deepcopy(self.model.constrained_bounded_lowers)
bounded_u = copy.deepcopy(self.model.constrained_bounded_uppers)
for b in range(len(bounded_i)): # for each group of constraints
for bc in range(len(bounded_i[b])):
pos = np.where(j == bounded_i[b][bc])[0]
constrained_indices = copy.deepcopy(self.model.constrained_indices)
for c, constraint in enumerate(constrained_indices):
mask = (np.ones_like(constrained_indices[c]) == 1)
for i in range(len(constrained_indices[c])):
pos = np.where(j == constrained_indices[c][i])[0]
if len(pos) == 1:
pos2 = np.where(self.model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0]
self.model.constrained_bounded_indices[b][pos2] = pos[0]
self.model.constrained_indices[c][i] = pos
else:
if len(self.model.constrained_bounded_indices[b]) == 1:
# if it's the last index to be removed
# the logic here is just a mess. If we remove the last one, then all the
# b-indices change and we have to iterate through everything to find our
# current index. Can't deal with this right now.
raise NotImplementedError
mask[i] = False
else: # just remove it from the indices
mask = self.model.constrained_bounded_indices[b] != bc
self.model.constrained_bounded_indices[b] = self.model.constrained_bounded_indices[b][mask]
self.model.constrained_indices[c] = self.model.constrained_indices[c][mask]
return constrained_indices
# back them up
# bounded_i = copy.deepcopy(self.model.constrained_bounded_indices)
# bounded_l = copy.deepcopy(self.model.constrained_bounded_lowers)
# bounded_u = copy.deepcopy(self.model.constrained_bounded_uppers)
# for b in range(len(bounded_i)): # for each group of constraints
# for bc in range(len(bounded_i[b])):
# pos = np.where(j == bounded_i[b][bc])[0]
# if len(pos) == 1:
# pos2 = np.where(self.model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0]
# self.model.constrained_bounded_indices[b][pos2] = pos[0]
# else:
# if len(self.model.constrained_bounded_indices[b]) == 1:
# # if it's the last index to be removed
# # the logic here is just a mess. If we remove the last one, then all the
# # b-indices change and we have to iterate through everything to find our
# # current index. Can't deal with this right now.
# raise NotImplementedError
# else: # just remove it from the indices
# mask = self.model.constrained_bounded_indices[b] != bc
# self.model.constrained_bounded_indices[b] = self.model.constrained_bounded_indices[b][mask]
# here we shif the positive constraints. We cycle through each positive
# constraint
positive = self.model.constrained_positive_indices.copy()
mask = (np.ones_like(positive) == 1)
for p in range(len(positive)):
# we now check whether the constrained index appears in the j vector
# (the vector of the "active" indices)
pos = np.where(j == self.model.constrained_positive_indices[p])[0]
if len(pos) == 1:
self.model.constrained_positive_indices[p] = pos
else:
mask[p] = False
self.model.constrained_positive_indices = self.model.constrained_positive_indices[mask]
# # here we shif the positive constraints. We cycle through each positive
# # constraint
# positive = self.model.constrained_positive_indices.copy()
# mask = (np.ones_like(positive) == 1)
# for p in range(len(positive)):
# # we now check whether the constrained index appears in the j vector
# # (the vector of the "active" indices)
# pos = np.where(j == self.model.constrained_positive_indices[p])[0]
# if len(pos) == 1:
# self.model.constrained_positive_indices[p] = pos
# else:
# mask[p] = False
# self.model.constrained_positive_indices = self.model.constrained_positive_indices[mask]
return (bounded_i, bounded_l, bounded_u), positive
# return (bounded_i, bounded_l, bounded_u), positive
def restore_constraints(self, b, p):
self.model.constrained_bounded_indices = b[0]
self.model.constrained_bounded_lowers = b[1]
self.model.constrained_bounded_uppers = b[2]
self.model.constrained_positive_indices = p
def restore_constraints(self, c):#b, p):
# self.model.constrained_bounded_indices = b[0]
# self.model.constrained_bounded_lowers = b[1]
# self.model.constrained_bounded_uppers = b[2]
# self.model.constrained_positive_indices = p
self.model.constrained_indices = c
def get_param_shapes(self, N = None, Q = None):
model_name = self.model.__class__.__name__
@ -168,9 +183,15 @@ class opt_SGD(Optimizer):
if self.model.N == 0 or Y.std() == 0.0:
return 0, step, self.model.N
self.model.likelihood._mean = Y.mean()
self.model.likelihood._std = Y.std()
self.model.likelihood._bias = Y.mean()
self.model.likelihood._scale = Y.std()
self.model.likelihood.set_data(Y)
# self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision
sigma = self.model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache
self.model.likelihood._set_params(sigma)
j = self.subset_parameter_vector(self.x_opt, samples, shapes)
self.model.X = X[samples]
@ -181,27 +202,30 @@ class opt_SGD(Optimizer):
self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T)
self.model.likelihood.trYYT = np.trace(self.model.likelihood.YYT)
b, p = self.shift_constraints(j)
ci = self.shift_constraints(j)
f, fp = f_fp(self.x_opt[j])
step[j] = self.momentum * step[j] + self.learning_rate[j] * fp
self.x_opt[j] -= step[j]
self.restore_constraints(ci)
self.restore_constraints(b, p)
# restore likelihood _mean and _std, otherwise when we call set_data(y) on
self.model.grads[j] = fp
# restore likelihood _bias and _scale, otherwise when we call set_data(y) on
# the next feature, it will get normalized with the mean and std of this one.
self.model.likelihood._mean = 0
self.model.likelihood._std = 1
self.model.likelihood._bias = 0
self.model.likelihood._scale = 1
return f, step, self.model.N
def opt(self, f_fp=None, f=None, fp=None):
self.x_opt = self.model._get_params_transformed()
self.model.grads = np.zeros_like(self.x_opt)
X, Y = self.model.X.copy(), self.model.likelihood.Y.copy()
self.model.likelihood.YYT = None
self.model.likelihood.trYYT = None
self.model.likelihood._mean = 0.0
self.model.likelihood._std = 1.0
self.model.likelihood._bias = 0.0
self.model.likelihood._scale = 1.0
N, Q = self.model.X.shape
D = self.model.likelihood.Y.shape[1]
@ -225,6 +249,11 @@ class opt_SGD(Optimizer):
self.model.D = len(j)
self.model.likelihood.D = len(j)
self.model.likelihood.set_data(Y[:, j])
# self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision
sigma = self.model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache
self.model.likelihood._set_params(sigma)
if missing_data:
shapes = self.get_param_shapes(N, Q)
@ -250,7 +279,6 @@ class opt_SGD(Optimizer):
# plt.clf()
# plt.plot(self.param_traces['noise'])
# import pdb; pdb.set_trace()
# for k in self.param_traces.keys():
# self.param_traces[k].append(self.model.get(k)[0])
@ -262,7 +290,10 @@ class opt_SGD(Optimizer):
self.model.likelihood.N = N
self.model.likelihood.D = D
self.model.likelihood.Y = Y
sigma = self.model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache
self.model.likelihood._set_params(sigma)
self.trace.append(self.f_opt)
if self.iteration_file is not None:
f = open(self.iteration_file + "iteration%d.pickle" % it, 'w')

View file

@ -38,7 +38,6 @@ class bias(kernpart):
def dK_dtheta(self,dL_dKdiag,X,X2,target):
target += dL_dKdiag.sum()
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += dL_dKdiag.sum()

View file

@ -252,7 +252,7 @@ class kern(parameterised):
which_parts = [True]*self.Nparts
assert X.shape[1] == self.D
target = np.zeros(X.shape[0])
[p.Kdiag(X[:, i_s], target=target) for p, i_s in zip(self.parts, self.input_slices)]
[p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self.parts, self.input_slices, which_parts) if part_on]
return target
def dKdiag_dtheta(self, dL_dKdiag, X):

View file

@ -203,7 +203,7 @@ class linear(kernpart):
target_mu(n,q) += factor*tmp;
target_S(n,q) += factor*AZZA_2(q,m,mm,q);
}
}
}
}
}
"""

View file

@ -1,6 +1,6 @@
import numpy as np
from scipy import stats, linalg
from ..util.linalg import pdinv,mdot,jitchol
from ..util.linalg import pdinv,mdot,jitchol,DSYR
from likelihood import likelihood
class EP(likelihood):
@ -113,11 +113,12 @@ class EP(likelihood):
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
si=Sigma[:,i].reshape(self.N,1)
Sigma = Sigma - Delta_tau/(1.+ Delta_tau*Sigma[i,i])*np.dot(si,si.T)
DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i])))
#si=Sigma[:,i:i+1]
#Sigma -= Delta_tau/(1.+ Delta_tau*Sigma[i,i])*np.dot(si,si.T)#DSYR
mu = np.dot(Sigma,self.v_tilde)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon

View file

@ -7,6 +7,7 @@ from scipy import stats
import scipy as sp
import pylab as pb
from ..util.plot import gpplot
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
class likelihood_function:
"""
@ -37,11 +38,11 @@ class probit(likelihood_function):
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
if data_i == 0: data_i = -1 #NOTE Binary classification algorithm works better with classes {-1,1}, 1D-plotting works better with classes {0,1}.
#if data_i == 0: data_i = -1 #NOTE Binary classification algorithm works better with classes {-1,1}, 1D-plotting works better with classes {0,1}.
# TODO: some version of assert
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = stats.norm.cdf(z)
phi = stats.norm.pdf(z)
Z_hat = std_norm_cdf(z)
phi = std_norm_pdf(z)
mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
return Z_hat, mu_hat, sigma2_hat

View file

@ -16,36 +16,6 @@ def backsub_both_sides(L,X):
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T
class FITC(sparse_GP):
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
self.Z = Z
self.M = self.Z.shape[0]
self.true_precision = likelihood.precision
#ERASEME
N = likelihood.Y.size
self.beta_star = np.random.rand(N,1)
self.Kmm_ = kernel.K(self.Z).copy()
self.Kmmi_,a,b,c = pdinv(self.Kmm_)
self.psi1_ = kernel.K(self.Z,X).copy()
Haux = np.random.rand(self.M,self.M)
self.matA = np.dot(Haux,Haux.T) + np.eye(self.M)*100.
self.matV = np.random.rand(N,1)
self.H_ = np.dot(Haux,Haux.T) + np.eye(self.M)*3.
self.Hi_, l1,l2,l3 = pdinv(self.H_)
#self.V_star_ = np.random.rand(N,1)
sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False)
def _set_params(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
self._compute_kernel_matrices()
self.scale_factor = 1.
self._computations()
def update_likelihood_approximation(self):
"""
@ -63,35 +33,16 @@ class FITC(sparse_GP):
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP
@profile
def _computations(self):
#factor Kmm
self.Lm = jitchol(self.Kmm)
self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1)
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1).copy()
self.Diag0 = self.psi0 - np.diag(self.Qnn)
#TODO eraseme
self.psi1 = self.psi1_
self.Lm = jitchol(self.Kmm_)
self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1)
self.true_psi1 = self.kern.K(self.Z,self.X)
#self.Qnn = mdot(self.true_psi1.T,self.Lmi.T,self.Lmi,self.true_psi1)
#self.Kmmi, a,b,c = pdinv(self.Kmm)
#self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1)
#self.Diag0 = self.psi0 #- np.diag(self.Qnn)
#self.Diag0 = - np.diag(self.Qnn)
#Kmmi,Lm,Lmi,logdetK = pdinv(self.Kmm)
#self.Lambda = self.Kmmi_ + mdot(self.Kmmi_,self.psi1_,self.beta_star*self.psi1_.T,self.Kmmi_) + np.eye(self.M)*100
#self.Lambdai, LLm, LLmi, self.logdetLambda = pdinv(self.Lambda)
#TODO uncomment
#self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #Includes Diag0 in the precision
self.true_beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #Includes Diag0 in the precision
self.true_V_star = self.true_beta_star * self.likelihood.Y
self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #Includes Diag0 in the precision
self.V_star = self.beta_star * self.likelihood.Y
# The rather complex computations of self.A
@ -99,7 +50,7 @@ class FITC(sparse_GP):
raise NotImplementedError
else:
if self.likelihood.is_heteroscedastic:
assert self.likelihood.D == 1 # TODO: what if the likelihood is heterscedatic and there are multiple independent outputs?
assert self.likelihood.D == 1
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.N)))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
@ -114,143 +65,121 @@ class FITC(sparse_GP):
tmp, info1 = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(tmp), lower=1, trans=0)
# dlogbeta_dtheta
Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1)
b_psi1_Ki = self.beta_star * Kmmipsi1.T
Ki_pbp_Ki = np.dot(Kmmipsi1,b_psi1_Ki)
dlogB_dpsi0 = -.5*self.kern.dKdiag_dtheta(self.beta_star,X=self.X)
dlogB_dpsi1 = self.kern.dK_dtheta(b_psi1_Ki,self.X,self.Z)
dlogB_dKmm = -.5*self.kern.dK_dtheta(Ki_pbp_Ki,X=self.Z)
self.dlogbeta_dtheta = dlogB_dpsi0 + dlogB_dpsi1 + dlogB_dKmm
# dyby_dtheta
Kmmi = np.dot(self.Lmi.T,self.Lmi)
LBiLmi = np.dot(self.LBi,self.Lmi)
LBL_inv = np.dot(LBiLmi.T,LBiLmi)
VVT = np.outer(self.V_star,self.V_star)
VV_p_Ki = np.dot(VVT,Kmmipsi1.T)
Ki_pVVp_Ki = np.dot(Kmmipsi1,VV_p_Ki)
dyby_dpsi0 = .5 * self.kern.dKdiag_dtheta(self.V_star**2,self.X)
dyby_dpsi1 = 0
dyby_dKmm = 0
dyby_dtheta = dyby_dpsi0
for psi1_n,V_n,X_n in zip(self.psi1.T,self.V_star,self.X):
dyby_dpsi1 = -V_n**2 * np.dot(psi1_n[None,:],Kmmi)
dyby_dtheta += self.kern.dK_dtheta(dyby_dpsi1,X_n[:,None],self.Z)
for psi1_n,V_n,X_n in zip(self.psi1.T,self.V_star,self.X):
psin_K = np.dot(psi1_n[None,:],Kmmi)
tmp = np.dot(psin_K.T,psin_K)
dyby_dKmm = .5*V_n**2 * tmp
dyby_dtheta += self.kern.dK_dtheta(dyby_dKmm,self.Z)
self.dyby_dtheta = dyby_dtheta
# dlogB_dtheta : C
#C_B
dC_B = -.5*Kmmi
C_B = self.kern.dK_dtheta(dC_B,self.Z) #check
#C_A
LBiLmi = np.dot(self.LBi,self.Lmi)
LBL_inv = np.dot(LBiLmi.T,LBiLmi)
dC_AA = .5*LBL_inv
C_AA = self.kern.dK_dtheta(dC_AA,self.Z) #check
#C_AB
psi1beta = self.psi1*self.beta_star.T
dC_ABA = mdot(LBL_inv,psi1beta,Kmmipsi1.T)
C_ABA = self.kern.dK_dtheta(dC_ABA,self.Z)
dC_ABB = -np.dot(psi1beta.T,LBL_inv) #check
C_ABB = self.kern.dK_dtheta(dC_ABB,self.X,self.Z) #check
# C_ABC
H = self.Kmm + mdot(self.psi1,self.beta_star*self.psi1.T)
Hi, LH, LHi, logdetH = pdinv(H)
betapsi1TLmiLBi = np.dot(psi1beta.T,LBiLmi.T)
alpha = np.array([np.dot(a.T,a) for a in betapsi1TLmiLBi])[:,None]
dC_ABCA = .5 *alpha
C_ABCA = self.kern.dKdiag_dtheta(dC_ABCA,self.X) #check
C_ABCB = 0
for psi1_n,alpha_n,X_n in zip(self.psi1.T,alpha,self.X):
dC_ABCB_n = - alpha_n * np.dot(psi1_n[None,:],Kmmi)
C_ABCB += self.kern.dK_dtheta(dC_ABCB_n,X_n[:,None],self.Z) #check
C_ABCC = 0
for psi1_n,alpha_n,X_n in zip(self.psi1.T,alpha,self.X):
psin_K = np.dot(psi1_n[None,:],Kmmi)
tmp = np.dot(psin_K.T,psin_K)
dC_ABCC = .5 * alpha_n * tmp
C_ABCC += self.kern.dK_dtheta(dC_ABCC,self.Z) #check
self.dlogB_dtheta = C_B + C_AA + C_ABA + C_ABB + C_ABCA + C_ABCB + C_ABCC
# dD_dtheta
#FIXME
H = self.Kmm + mdot(self.psi1,self.beta_star*self.psi1.T)
H = self.Kmm_ + mdot(self.psi1,self.beta_star*self.psi1.T)
H = self.Kmm_ + mdot(self.psi1,self.true_beta_star*self.psi1.T)
Hi, LH, LHi, logdetH = pdinv(H)
self.q1 = .5* mdot(self.V_star.T,self.true_psi1.T,Hi,self.true_psi1,self.V_star)
self.q1 = .5* mdot(self.true_V_star.T,self.psi1.T,Hi,self.psi1,self.true_V_star)
#self.q1 = .5* mdot(self.true_V_star.T,self.true_psi1.T,Hi,self.true_psi1,self.true_V_star)
#self.q2 = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
# D_B
gamma_1 = mdot(VVT,self.psi1.T,Hi) #TODO restore
#gamma_1 = mdot(VVT,self.true_psi1.T,Hi)
dD_B = gamma_1
D_B = self.kern.dK_dtheta(dD_B,self.X,self.Z) #check
# D_C
#dD_CA = -.5 * mdot(Hi,self.psi1,gamma_1) #TODO restore
dD_CA = -.5 * mdot(Hi,self.true_psi1,gamma_1)
D_CA = self.kern.dK_dtheta(dD_CA,self.Z) #check
# D_CB
dD_CBA = - mdot(psi1beta.T,Hi,self.psi1,gamma_1)
D_CBA = self.kern.dK_dtheta(dD_CBA,self.X,self.Z)
# D_CBB
gamma_1 = mdot(VVT,self.psi1.T,Hi)
pHip = mdot(self.psi1.T,Hi,self.psi1)
gamma_2 = mdot(self.beta_star*pHip,self.V_star)
D_CBBA = .5 * self.kern.dKdiag_dtheta(gamma_2**2,self.X)
gamma_3 = self.V_star * mdot(self.V_star.T,pHip*self.beta_star).T
D_CBBB = 0
for psi1_n,gamma_n,X_n in zip(self.psi1.T,gamma_2,self.X):
dD_CBBB = - gamma_n**2 * np.dot(psi1_n[None,:],Kmmi)
D_CBBB += self.kern.dK_dtheta(dD_CBBB,X_n[:,None],self.Z)
dA_dpsi0_1 = -0.5 * self.beta_star
dA_dpsi0 = .5 * self.V_star**2
dC_dpsi0 = .5 *alpha
dD_dpsi0 = 0.5*mdot(self.beta_star*pHip,self.V_star)**2
dD_dpsi1 = gamma_1
dD_dpsi1 += -mdot(psi1beta.T,Hi,self.psi1,gamma_1)
dD_dpsi0 += -self.V_star * mdot(self.V_star.T,pHip*self.beta_star).T
dA_dpsi1 = b_psi1_Ki
dC_dpsi1 = -np.dot(psi1beta.T,LBL_inv)
dA_dKmm = -0.5 * np.dot(Kmmipsi1,b_psi1_Ki)
dC_dKmm = -.5*Kmmi
dC_dKmm += .5*LBL_inv
dC_dKmm += mdot(LBL_inv,psi1beta,Kmmipsi1.T)
dD_dKmm = -.5 * mdot(Hi,self.psi1,gamma_1)
dA_dpsi0_theta = self.kern.dKdiag_dtheta(dA_dpsi0,X=self.X)
dA_dpsi1_theta = 0
dA_dpsi1_X = 0
dA_dKmm_theta = 0
dA_dKmm_X = 0
_dC_dpsi1_dtheta = 0
_dC_dpsi1_dX = 0
_dC_dKmm_dtheta = 0
_dC_dKmm_dX = 0
_dD_dpsi1_dtheta_1 = 0
_dD_dpsi1_dX_1 = 0
_dD_dKmm_dtheta_1 = 0
_dD_dKmm_dX_1 = 0
_dD_dpsi1_dtheta_2 = 0
_dD_dpsi1_dX_2 = 0
_dD_dKmm_dtheta_2 = 0
_dD_dKmm_dX_2 = 0
for psi1_n,V_n,X_n,alpha_n,gamma_n,gamma_k in zip(self.psi1.T,self.V_star,self.X,alpha,gamma_2,gamma_3):
D_CBBC = 0
for psi1_n,gamma_n,X_n in zip(self.psi1.T,gamma_2,self.X):
psin_K = np.dot(psi1_n[None,:],Kmmi)
tmp = np.dot(psin_K.T,psin_K)
dD_CBBC = .5*gamma_n**2 * tmp
D_CBBC += self.kern.dK_dtheta(dD_CBBC,self.Z)
# D_A
pHip = mdot(self.psi1.T,Hi,self.psi1) #TODO remove defined above
gamma_3 = self.true_V_star * mdot(self.true_V_star.T,pHip*self.true_beta_star).T
#gamma_3 = self.V_star * mdot(self.V_star.T,pHip*self.beta_star).T
dD_AA = - gamma_3
D_AA = self.kern.dKdiag_dtheta(dD_AA,self.X) #check
_dA_dpsi1 = -V_n**2 * np.dot(psi1_n[None,:],Kmmi)
_dC_dpsi1 = - alpha_n * np.dot(psi1_n[None,:],Kmmi)
_dD_dpsi1_1 = - gamma_n**2 * np.dot(psi1_n[None,:],Kmmi)
_dD_dpsi1_2 = 2. * gamma_k * np.dot(psi1_n[None,:],Kmmi)
D_AB = 0
#for psi1_n,gamma_n,X_n in zip(self.psi1.T,gamma_3,self.X):
for psi1_n,gamma_n,X_n in zip(self.true_psi1.T,gamma_3,self.X):
dD_AB = 2. * gamma_n * np.dot(psi1_n[None,:],Kmmi)
D_AB += self.kern.dK_dtheta(dD_AB,X_n[:,None],self.Z) #check
_dA_dKmm = .5*V_n**2 * np.dot(psin_K.T,psin_K)
_dC_dKmm = .5 * alpha_n * np.dot(psin_K.T,psin_K)
_dD_dKmm_1 = .5*gamma_n**2 * np.dot(psin_K.T,psin_K)
_dD_dKmm_2 = - gamma_n * np.dot(psin_K.T,psin_K)
D_AC = 0
#for psi1_n,gamma_n,X_n in zip(self.psi1.T,gamma_3,self.X):
for psi1_n,gamma_n,X_n in zip(self.true_psi1.T,gamma_3,self.X):
psin_K = np.dot(psi1_n[None,:],Kmmi)
tmp = np.dot(psin_K.T,psin_K)
dD_AC = - gamma_n * tmp
D_AC += self.kern.dK_dtheta(dD_AC,self.Z) #check
dA_dpsi1_theta += self.kern.dK_dtheta(_dA_dpsi1,X_n[None,:],self.Z)
_dC_dpsi1_dtheta += self.kern.dK_dtheta(_dC_dpsi1,X_n[None,:],self.Z)
_dD_dpsi1_dtheta_1 += self.kern.dK_dtheta(_dD_dpsi1_1,X_n[None,:],self.Z)
_dD_dpsi1_dtheta_2 += self.kern.dK_dtheta(_dD_dpsi1_2,X_n[None,:],self.Z)
dA_dKmm_theta += self.kern.dK_dtheta(_dA_dKmm,self.Z)
_dC_dKmm_dtheta += self.kern.dK_dtheta(_dC_dKmm,self.Z)
_dD_dKmm_dtheta_1 += self.kern.dK_dtheta(_dD_dKmm_1,self.Z)
_dD_dKmm_dtheta_2 += self.kern.dK_dtheta(_dD_dKmm_2,self.Z)
dA_dpsi1_X += self.kern.dK_dX(_dA_dpsi1.T,self.Z,X_n[None,:])
_dC_dpsi1_dX += self.kern.dK_dX(_dC_dpsi1.T,self.Z,X_n[None,:])
_dD_dpsi1_dX_1 += self.kern.dK_dX(_dD_dpsi1_1.T,self.Z,X_n[None,:])
_dD_dpsi1_dX_2 += self.kern.dK_dX(_dD_dpsi1_2.T,self.Z,X_n[None,:])
dA_dKmm_X += 2.*self.kern.dK_dX(_dA_dKmm,self.Z)
_dC_dKmm_dX += 2.*self.kern.dK_dX(_dC_dKmm,self.Z)
_dD_dKmm_dX_1 += 2.*self.kern.dK_dX(_dD_dKmm_1,self.Z)
_dD_dKmm_dX_2 += 2.*self.kern.dK_dX(_dD_dKmm_2,self.Z)
dA_dX_1 = self.kern.dK_dX(dA_dpsi1.T,self.Z,self.X) + 2. * self.kern.dK_dX(dA_dKmm,X=self.Z)
dA_dtheta_1 = self.kern.dKdiag_dtheta(dA_dpsi0_1,X=self.X) + self.kern.dK_dtheta(dA_dpsi1,self.X,self.Z) + self.kern.dK_dtheta(dA_dKmm,X=self.Z)
dA_dtheta_2 = dA_dpsi0_theta + dA_dpsi1_theta + dA_dKmm_theta
dA_dX_2 = dA_dpsi1_X + dA_dKmm_X
self.dA_dtheta = dA_dtheta_1 + dA_dtheta_2
self.dA_dX = dA_dX_1 + dA_dX_2
self.dlogB_dtheta = self.kern.dK_dtheta(dC_dKmm,self.Z) + self.kern.dK_dtheta(dC_dpsi1,self.X,self.Z) + self.kern.dKdiag_dtheta(dC_dpsi0,self.X) + _dC_dpsi1_dtheta + _dC_dKmm_dtheta
self.dlogB_dX = 2.*self.kern.dK_dX(dC_dKmm,self.Z) + self.kern.dK_dX(dC_dpsi1.T,self.Z,self.X) + _dC_dpsi1_dX + _dC_dKmm_dX
self.dD_dtheta = self.kern.dKdiag_dtheta(dD_dpsi0,self.X) + self.kern.dK_dtheta(dD_dKmm,self.Z) + self.kern.dK_dtheta(dD_dpsi1,self.X,self.Z) + _dD_dpsi1_dtheta_2 + _dD_dKmm_dtheta_2 + _dD_dpsi1_dtheta_1 + _dD_dKmm_dtheta_1
self.dD_dX = 2.*self.kern.dK_dX(dD_dKmm,self.Z) + self.kern.dK_dX(dD_dpsi1.T,self.Z,self.X) + _dD_dpsi1_dX_2 + _dD_dKmm_dX_2 + _dD_dpsi1_dX_1 + _dD_dKmm_dX_1
#self.dD_dtheta = D_AA + D_AB + D_AC + D_B + D_CA + D_CBA + D_CBBA + D_CBBB + D_CBBC
self.dD_dtheta = D_AA + D_AB + D_AC
#self.q1 = .5* mdot(self.V_star_.T,self.true_psi1.T,self.Hi_,self.true_psi1,self.V_star_)
@ -262,68 +191,117 @@ class FITC(sparse_GP):
raise NotImplementedError, "heteroscedatic derivates not implemented"
else:
# likelihood is not heterscedatic
self.partial_for_likelihood = 0 #FIXME
#self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
#self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
#self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
dbstar_dnoise = self.likelihood.precision * (self.beta_star**2 * self.Diag0[:,None] - self.beta_star)
Lmi_psi1 = mdot(self.Lmi,self.psi1)
LBiLmipsi1 = np.dot(self.LBi,Lmi_psi1)
aux_0 = np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_2 = np.dot(LBiLmipsi1.T,self._LBi_Lmi_psi1V)
dA_dnoise = 0.5 * self.D * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.D * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T)
alpha = mdot(LBiLmipsi1,self.V_star)
alpha_ = mdot(LBiLmipsi1.T,alpha)
dD_dnoise_2 = -0.5 * self.D * np.sum(alpha_**2 * dbstar_dnoise )
dD_dnoise_1 = mdot(self.V_star.T,self.psi1.T,self.Lmi.T,self.LBi.T,self.LBi,self.Lmi,self.psi1,dbstar_dnoise*self.likelihood.Y)
dD_dnoise_2 = 0.5*mdot(self.V_star.T,self.psi1.T,Hi,self.psi1,dbstar_dnoise*self.psi1.T,Hi,self.psi1,self.V_star)
dD_dnoise = dD_dnoise_1 + dD_dnoise_2
self.partial_for_likelihood = dA_dnoise + dC_dnoise + dD_dnoise
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
#A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
#C = -self.D * (np.sum(np.log(np.diag(self.LB))))
#D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
D = self.q1
"""
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
#B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
C = -self.D * (np.sum(np.log(np.diag(self.LB))))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + C + D # +B
"""
#return A+C
return D
return A + C + D
def _log_likelihood_gradients(self):
pass
return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
def dL_dtheta(self):
#dL_dtheta = self.dlogB_dtheta
#dL_dtheta = self.dyby_dtheta
#dL_dtheta = self.dlogbeta_dtheta + self.dyby_dtheta
#dL_dtheta = self.dlogbeta_dtheta + self.dyby_dtheta + self.dlogB_dtheta
dL_dtheta = self.dD_dtheta
"""
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance)
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.Z, self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
"""
dL_dtheta = self.dA_dtheta + self.dlogB_dtheta + self.dD_dtheta
return dL_dtheta
def dL_dZ(self):
dL_dZ = np.zeros(self.M)
"""
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
dL_dZ += self.kern.dK_dX(self.dL_dpsi1, self.Z, self.X)
"""
dL_dZ = self.dA_dX + self.dlogB_dX + self.dD_dX
return dL_dZ
def _raw_predict(self, Xnew, which_parts, full_cov=False):
if self.likelihood.is_heteroscedastic:
Iplus_Dprod_i = 1./(1.+ self.Diag0 * self.likelihood.precision.flatten())
self.Diag = self.Diag0 * Iplus_Dprod_i
self.P = Iplus_Dprod_i[:,None] * self.psi1.T
self.RPT0 = np.dot(self.Lmi,self.psi1)
self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T))
self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
self.w = self.Diag * self.likelihood.v_tilde
self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde))
self.mu = self.w + np.dot(self.P,self.gamma)
"""
Make a prediction for the generalized FITC model
Arguments
---------
X : Input prediction data - Nx1 numpy array (floats)
"""
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
# Ci = I + (RPT0)Di(RPT0).T
# C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T
# = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T
# = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
# = I - V.T * V
U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1)
C = np.eye(self.M) - np.dot(V.T,V)
mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:])
#self.C = C
#self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
#self.mu_u = mu_u
#self.U = U
# q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T)
mu_H = np.dot(mu_u,self.mu)
self.mu_H = mu_H
Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
KR0T = np.dot(Kx.T,self.Lmi.T)
mu_star = np.dot(KR0T,mu_H)
if full_cov:
Kxx = self.kern.K(Xnew,which_parts=which_parts)
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
else:
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
Kxx_ = self.kern.K(Xnew,which_parts=which_parts) # TODO: RA, is this line needed?
var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) # TODO: RA, is this line needed?
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None]
return mu_star[:,None],var
else:
raise NotImplementedError, "homoscedastic fitc not implemented"
"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu,var[:,None]
"""

View file

@ -3,10 +3,11 @@
import numpy as np
from scipy import linalg
import pylab as pb
from .. import kern
from ..core import model
from ..util.linalg import pdinv, mdot
from ..util.linalg import pdinv, mdot, tdot
from ..util.plot import gpplot, x_frame1D, x_frame2D, Tango
from ..likelihoods import EP
@ -58,13 +59,12 @@ class GP(model):
"""
TODO: one day we might like to learn Z by gradient methods?
"""
#FIXME: this doesn;t live here.
return np.zeros_like(self.Z)
def _set_params(self, p):
self.kern._set_params_transformed(p[:self.kern.Nparam_transformed()])
# self.likelihood._set_params(p[self.kern.Nparam:]) # test by Nicolas
self.likelihood._set_params(p[self.kern.Nparam_transformed():]) # test by Nicolas
self.likelihood._set_params(p[self.kern.Nparam_transformed():])
self.K = self.kern.K(self.X)
self.K += self.likelihood.covariance_matrix
@ -73,10 +73,14 @@ class GP(model):
# the gradient of the likelihood wrt the covariance matrix
if self.likelihood.YYT is None:
alpha = np.dot(self.Ki, self.likelihood.Y)
self.dL_dK = 0.5 * (np.dot(alpha, alpha.T) - self.D * self.Ki)
#alpha = np.dot(self.Ki, self.likelihood.Y)
alpha,_ = linalg.lapack.flapack.dpotrs(self.L, self.likelihood.Y,lower=1)
self.dL_dK = 0.5 * (tdot(alpha) - self.D * self.Ki)
else:
tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
#tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1)
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
self.dL_dK = 0.5 * (tmp - self.D * self.Ki)
def _get_params(self):
@ -100,7 +104,9 @@ class GP(model):
Computes the model fit using YYT if it's available
"""
if self.likelihood.YYT is None:
return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y)))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.L, np.asfortranarray(self.likelihood.Y), lower=1)
return -0.5 * np.sum(np.square(tmp))
#return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y)))
else:
return -0.5 * np.sum(np.multiply(self.Ki, self.likelihood.YYT))
@ -123,17 +129,14 @@ class GP(model):
"""
return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False):
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False,stop=False):
"""
Internal helper function for making predictions, does not account
for normalization or likelihood
#TODO: which_parts does nothing
"""
Kx = self.kern.K(self.X, _Xnew,which_parts=which_parts)
KiKx = np.dot(self.Ki, Kx)
Kx = self.kern.K(_Xnew,self.X,which_parts=which_parts).T
#KiKx = np.dot(self.Ki, Kx)
KiKx, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(Kx), lower=1)
mu = np.dot(KiKx.T, self.likelihood.Y)
if full_cov:
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
@ -142,6 +145,8 @@ class GP(model):
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
var = Kxx - np.sum(np.multiply(KiKx, Kx), 0)
var = var[:, None]
if stop:
debug_this
return mu, var
@ -178,7 +183,8 @@ class GP(model):
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False):
"""
Plot the GP's view of the world, where the data is normalized and the likelihood is Gaussian
Plot the GP's view of the world, where the data is normalized and the
likelihood is Gaussian.
:param samples: the number of a posteriori samples to plot
:param which_data: which if the training data to plot (default all)
@ -193,8 +199,8 @@ class GP(model):
- In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, we've no implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions using which_data and which_functions
Plot the data's view of the world, with non-normalized values and GP predictions passed through the likelihood
Can plot only part of the data and part of the posterior functions
using which_data and which_functions
"""
if which_data == 'all':
which_data = slice(None)

View file

@ -45,7 +45,7 @@ class GPLVM(GP):
return np.hstack((self.X.flatten(), GP._get_params(self)))
def _set_params(self,x):
self.X = x[:self.X.size].reshape(self.N,self.Q).copy()
self.X = x[:self.N*self.Q].reshape(self.N,self.Q).copy()
GP._set_params(self, x[self.X.size:])
def _log_likelihood_gradients(self):

View file

@ -3,17 +3,12 @@
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, tdot, symmetrify
from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides
from ..util.plot import gpplot
from .. import kern
from GP import GP
from scipy import linalg
def backsub_both_sides(L, X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1)
return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T
class sparse_GP(GP):
"""
Variational sparse GP model

View file

@ -23,6 +23,7 @@ class warpedGP(GP):
self.warping_function = TanhWarpingFunction_d(warping_terms)
self.warping_params = (np.random.randn(self.warping_function.n_terms*3+1,) * 1)
Y = self._scale_data(Y)
self.has_uncertain_inputs = False
self.Y_untransformed = Y.copy()
self.predict_in_warped_space = False
@ -30,6 +31,14 @@ class warpedGP(GP):
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
def _scale_data(self, Y):
self._Ymax = Y.max()
self._Ymin = Y.min()
return (Y-self._Ymin)/(self._Ymax-self._Ymin) - 0.5
def _unscale_data(self, Y):
return (Y + 0.5)*(self._Ymax - self._Ymin) + self._Ymin
def _set_params(self, x):
self.warping_params = x[:self.warping_function.num_parameters]
Y = self.transform_data()
@ -79,5 +88,5 @@ class warpedGP(GP):
if self.predict_in_warped_space:
mu = self.warping_function.f_inv(mu, self.warping_params)
var = self.warping_function.f_inv(var, self.warping_params)
mu = self._unscale_data(mu)
return mu, var

View file

@ -124,8 +124,9 @@ def pdinv(A, *args):
L = jitchol(A, *args)
logdet = 2.*np.sum(np.log(np.diag(L)))
Li = chol_inv(L)
Ai = linalg.lapack.flapack.dpotri(L)[0]
Ai = np.tril(Ai) + np.tril(Ai,-1).T
Ai, _ = linalg.lapack.flapack.dpotri(L)
#Ai = np.tril(Ai) + np.tril(Ai,-1).T
symmetrify(Ai)
return Ai, L, Li, logdet
@ -235,6 +236,18 @@ def tdot(*args, **kwargs):
else:
return tdot_numpy(*args,**kwargs)
def DSYR(A,x,alpha=1.):
N = c_int(A.shape[0])
LDA = c_int(A.shape[0])
UPLO = c_char('l')
ALPHA = c_double(alpha)
A_ = A.ctypes.data_as(ctypes.c_void_p)
x_ = x.ctypes.data_as(ctypes.c_void_p)
INCX = c_int(1)
_blaslib.dsyr_(byref(UPLO), byref(N), byref(ALPHA),
x_, byref(INCX), A_, byref(LDA))
symmetrify(A,upper=True)
def symmetrify(A,upper=False):
"""
Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
@ -244,33 +257,38 @@ def symmetrify(A,upper=False):
N,M = A.shape
assert N==M
c_contig_code = """
int iN;
for (int i=1; i<N; i++){
iN = i*N;
for (int j=0; j<i; j++){
A[i+j*N] = A[i*N+j];
A[i+j*N] = A[iN+j];
}
}
"""
f_contig_code = """
int iN;
for (int i=1; i<N; i++){
iN = i*N;
for (int j=0; j<i; j++){
A[i*N+j] = A[i+j*N];
A[iN+j] = A[i+j*N];
}
}
"""
if A.flags['C_CONTIGUOUS'] and upper:
weave.inline(f_contig_code,['A','N'])
weave.inline(f_contig_code,['A','N'], extra_compile_args=['-O3'])
elif A.flags['C_CONTIGUOUS'] and not upper:
weave.inline(c_contig_code,['A','N'])
weave.inline(c_contig_code,['A','N'], extra_compile_args=['-O3'])
elif A.flags['F_CONTIGUOUS'] and upper:
weave.inline(c_contig_code,['A','N'])
weave.inline(c_contig_code,['A','N'], extra_compile_args=['-O3'])
elif A.flags['F_CONTIGUOUS'] and not upper:
weave.inline(f_contig_code,['A','N'])
weave.inline(f_contig_code,['A','N'], extra_compile_args=['-O3'])
else:
tmp = np.tril(A)
A[:] = 0.0
A += tmp
A += np.tril(tmp,-1).T
def symmetrify_murray(A):
A += A.T
nn = A.shape[0]
@ -303,3 +321,8 @@ def cholupdate(L,x):
x = x.copy()
N = x.size
weave.inline(code, support_code=support_code, arg_names=['N','L','x'], type_converters=weave.converters.blitz)
def backsub_both_sides(L, X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1)
return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T

View file

@ -0,0 +1,35 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import weave
def std_norm_pdf(x):
"""Standard Gaussian density function"""
return 1./np.sqrt(2.*np.pi)*np.exp(-.5*x**2)
def std_norm_cdf(x):
"""
Cumulative standard Gaussian distribution
Based on Abramowitz, M. and Stegun, I. (1970)
"""
support_code = "#include <math.h>"
code = """
double sign = 1.0;
if (x < 0.0){
sign = -1.0;
x = -x;
}
x = x/sqrt(2.0);
double t = 1.0/(1.0 + 0.3275911*x);
double erf = 1. - exp(-x*x)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))));
return_val = 0.5*(1.0 + sign*erf);
"""
x = float(x)
return weave.inline(code,arg_names=['x'],support_code=support_code)

View file

@ -14,7 +14,7 @@ class data_show:
"""
def __init__(self, vals, axes=None):
self.vals = vals
self.vals = vals.copy()
# If no axes are defined, create some.
if axes==None:
fig = plt.figure()
@ -32,12 +32,12 @@ class vector_show(data_show):
"""
def __init__(self, vals, axes=None):
data_show.__init__(self, vals, axes)
self.vals = vals.T
self.vals = vals.T.copy()
self.handle = self.axes.plot(np.arange(0, len(vals))[:, None], self.vals)[0]
def modify(self, vals):
xdata, ydata = self.handle.get_data()
self.vals = vals.T
self.vals = vals.T.copy()
self.handle.set_data(xdata, self.vals)
self.axes.figure.canvas.draw()
@ -52,7 +52,7 @@ class lvm(data_show):
:param latent_axes: the axes where the latent visualization should be plotted.
"""
if vals == None:
vals = model.X[0]
vals = model.X[0].copy()
data_show.__init__(self, vals, axes=latent_axes)
@ -76,7 +76,7 @@ class lvm(data_show):
self.latent_index = latent_index
self.latent_dim = model.Q
# The red cross which shows current latent point.
# The red cross which shows current latent point.
self.latent_values = vals
self.latent_handle = self.latent_axes.plot([0],[0],'rx',mew=2)[0]
self.modify(vals)
@ -84,7 +84,6 @@ class lvm(data_show):
def modify(self, vals):
"""When latent values are modified update the latent representation and ulso update the output visualization."""
y = self.model.predict(vals)[0]
print y
self.data_visualize.modify(y)
@ -107,7 +106,7 @@ class lvm(data_show):
if self.called and self.move_on:
# Call modify code on move
self.latent_values[self.latent_index[0]]=event.xdata
self.latent_values[self.latent_index[1]]=event.ydata
self.latent_values[self.latent_index[1]]=event.ydata
self.modify(self.latent_values)
def show_sensitivities(self):
@ -230,11 +229,11 @@ class image_show(data_show):
plt.show()
def set_image(self, vals):
self.vals = np.reshape(vals, self.dimensions, order='F')
self.vals = np.reshape(vals, self.dimensions, order='F').copy()
if self.transpose:
self.vals = self.vals.T
self.vals = self.vals.T.copy()
if not self.scale:
self.vals = self.vals
self.vals = self.vals.copy()
#if self.invert:
# self.vals = -self.vals
@ -318,8 +317,7 @@ class stick_show(mocap_data_show):
mocap_data_show.__init__(self, vals, axes, connect)
def process_values(self, vals):
self.vals = vals.reshape((3, vals.shape[1]/3)).T
print vals
self.vals = vals.reshape((3, vals.shape[1]/3)).T.copy()
class skeleton_show(mocap_data_show):
"""data_show class for visualizing motion capture data encoded as a skeleton with angles."""

View file

@ -81,7 +81,7 @@ class TanhWarpingFunction(WarpingFunction):
iterations: number of N.R. iterations
"""
y = y.copy()
z = np.ones_like(y)
@ -176,7 +176,7 @@ class TanhWarpingFunction_d(WarpingFunction):
mpsi = psi.copy()
d = psi[-1]
mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
#3. transform data
z = d*y.copy()
for i in range(len(mpsi)):
@ -185,7 +185,7 @@ class TanhWarpingFunction_d(WarpingFunction):
return z
def f_inv(self, y, psi, iterations = 30):
def f_inv(self, z, psi, max_iterations = 1000):
"""
calculate the numerical inverse of f
@ -194,13 +194,19 @@ class TanhWarpingFunction_d(WarpingFunction):
"""
y = y.copy()
z = np.ones_like(y)
z = z.copy()
y = np.ones_like(z)
it = 0
update = np.inf
for i in range(iterations):
z -= (self.f(z, psi) - y)/self.fgrad_y(z,psi)
return z
while it == 0 or (np.abs(update).sum() > 1e-10 and it < max_iterations):
update = (self.f(y, psi) - z)/self.fgrad_y(y, psi)
y -= update
it += 1
if it == max_iterations:
print "WARNING!!! Maximum number of iterations reached in f_inv "
return y
def fgrad_y(self, y, psi, return_precalc = False):