mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-08 15:05:15 +02:00
Merge branch 'master' of github.com:SheffieldML/GPy
This commit is contained in:
commit
dcbe8e405b
3 changed files with 61 additions and 82 deletions
|
|
@ -121,9 +121,6 @@ class model(parameterised):
|
|||
else:
|
||||
raise AttributeError, "no parameter matches %s"%name
|
||||
|
||||
|
||||
|
||||
|
||||
def log_prior(self):
|
||||
"""evaluate the prior"""
|
||||
return np.sum([p.lnpdf(x) for p, x in zip(self.priors,self._get_params()) if p is not None])
|
||||
|
|
@ -135,12 +132,11 @@ class model(parameterised):
|
|||
[np.put(ret,i,p.lnpdf_grad(xx)) for i,(p,xx) in enumerate(zip(self.priors,x)) if not p is None]
|
||||
return ret
|
||||
|
||||
def _log_likelihood_gradients_transformed(self):
|
||||
def _transform_gradients(self, g):
|
||||
"""
|
||||
Use self.log_likelihood_gradients and self.prior_gradients to get the gradients of the model.
|
||||
Adjust the gradient for constraints and ties, return.
|
||||
Takes a list of gradients and return an array of transformed gradients (positive/negative/tied/and so on)
|
||||
"""
|
||||
g = self._log_likelihood_gradients() + self._log_prior_gradients()
|
||||
|
||||
x = self._get_params()
|
||||
g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices]
|
||||
g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices]
|
||||
|
|
@ -152,6 +148,7 @@ class model(parameterised):
|
|||
else:
|
||||
return g
|
||||
|
||||
|
||||
def randomize(self):
|
||||
"""
|
||||
Randomize the model.
|
||||
|
|
@ -241,6 +238,27 @@ class model(parameterised):
|
|||
print "Warning! constraining %s postive"%name
|
||||
|
||||
|
||||
def objective_function(self, x):
|
||||
"""
|
||||
The objective function passed to the optimizer. It combines the likelihood and the priors.
|
||||
"""
|
||||
self._set_params_transformed(x)
|
||||
return -self.log_likelihood() - self.log_prior()
|
||||
|
||||
def objective_function_gradients(self, x):
|
||||
"""
|
||||
Gets the gradients from the likelihood and the priors.
|
||||
"""
|
||||
self._set_params_transformed(x)
|
||||
LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
|
||||
prior_gradients = self._transform_gradients(self._log_prior_gradients())
|
||||
return -LL_gradients - prior_gradients
|
||||
|
||||
def objective_and_gradients(self, x):
|
||||
obj_f = self.objective_function(x)
|
||||
obj_grads = self.objective_function_gradients(x)
|
||||
return obj_f, obj_grads
|
||||
|
||||
def optimize(self, optimizer=None, start=None, **kwargs):
|
||||
"""
|
||||
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
|
||||
|
|
@ -254,22 +272,12 @@ class model(parameterised):
|
|||
if optimizer is None:
|
||||
optimizer = self.preferred_optimizer
|
||||
|
||||
def f(x):
|
||||
self._set_params_transformed(x)
|
||||
return -self.log_likelihood()-self.log_prior()
|
||||
def fp(x):
|
||||
self._set_params_transformed(x)
|
||||
return -self._log_likelihood_gradients_transformed()
|
||||
def f_fp(x):
|
||||
self._set_params_transformed(x)
|
||||
return -self.log_likelihood()-self.log_prior(),-self._log_likelihood_gradients_transformed()
|
||||
|
||||
if start == None:
|
||||
start = self._get_params_transformed()
|
||||
|
||||
optimizer = optimization.get_optimizer(optimizer)
|
||||
opt = optimizer(start, model = self, **kwargs)
|
||||
opt.run(f_fp=f_fp, f=f, fp=fp)
|
||||
opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients)
|
||||
self.optimization_runs.append(opt)
|
||||
|
||||
self._set_params_transformed(opt.x_opt)
|
||||
|
|
@ -357,12 +365,9 @@ class model(parameterised):
|
|||
dx = step*np.sign(np.random.uniform(-1,1,x.size))
|
||||
|
||||
#evaulate around the point x
|
||||
self._set_params_transformed(x+dx)
|
||||
f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
|
||||
self._set_params_transformed(x-dx)
|
||||
f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
|
||||
self._set_params_transformed(x)
|
||||
gradient = self._log_likelihood_gradients_transformed()
|
||||
f1, g1 = self.objective_and_gradients(x+dx)
|
||||
f2, g2 = self.objective_and_gradients(x-dx)
|
||||
gradient = self.objective_function_gradients(x)
|
||||
|
||||
numerical_gradient = (f1-f2)/(2*dx)
|
||||
global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
|
||||
|
|
@ -398,14 +403,10 @@ class model(parameterised):
|
|||
for i in param_list:
|
||||
xx = x.copy()
|
||||
xx[i] += step
|
||||
self._set_params_transformed(xx)
|
||||
f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()[i]
|
||||
f1, g1 = self.objective_and_gradients(xx)
|
||||
xx[i] -= 2.*step
|
||||
self._set_params_transformed(xx)
|
||||
f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()[i]
|
||||
self._set_params_transformed(x)
|
||||
gradient = self._log_likelihood_gradients_transformed()[i]
|
||||
|
||||
f2, g2 = self.objective_and_gradients(xx)
|
||||
gradient = self.objective_function_gradients(x)[i]
|
||||
|
||||
numerical_gradient = (f1-f2)/(2*step)
|
||||
ratio = (f1-f2)/(2*step*gradient)
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ m.constrain_positive('(rbf|bias|S|linear|white|noise)')
|
|||
# m.unconstrain('white')
|
||||
# m.constrain_bounded('white', 1e-6, 10.0)
|
||||
# plot_oil(m.X, np.array([1,1]), labels, 'PCA initialization')
|
||||
m.optimize(messages = True)
|
||||
#m.optimize(messages = True)
|
||||
# m.optimize('tnc', messages = True)
|
||||
# plot_oil(m.X, m.kern.parts[0].lengthscale, labels, 'B-GPLVM')
|
||||
# # pb.figure()
|
||||
|
|
|
|||
|
|
@ -371,16 +371,17 @@ class kern(parameterised):
|
|||
|
||||
def psi2(self,Z,mu,S,slices1=None,slices2=None):
|
||||
"""
|
||||
:Z: np.ndarray of inducing inputs (M x Q)
|
||||
: mu, S: np.ndarrays of means and variacnes (each N x Q)
|
||||
:returns psi2: np.ndarray (N,M,M,Q) """
|
||||
:param Z: np.ndarray of inducing inputs (M x Q)
|
||||
:param mu, S: np.ndarrays of means and variances (each N x Q)
|
||||
:returns psi2: np.ndarray (N,M,M)
|
||||
"""
|
||||
target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[0]))
|
||||
slices1, slices2 = self._process_slices(slices1,slices2)
|
||||
[p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s1,s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
|
||||
|
||||
#compute the "cross" terms
|
||||
for p1, p2 in itertools.combinations(self.parts,2):
|
||||
#white doesn;t compine with anything
|
||||
#white doesn;t combine with anything
|
||||
if p1.name=='white' or p2.name=='white':
|
||||
pass
|
||||
#rbf X bias
|
||||
|
|
@ -396,28 +397,9 @@ class kern(parameterised):
|
|||
else:
|
||||
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# "crossterms". Here we are recomputing psi1 for white (we don't need to), but it's
|
||||
# not really expensive, since it's just a matrix of zeroes.
|
||||
# psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts]
|
||||
# [p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)]
|
||||
|
||||
crossterms = 0.0
|
||||
# for 3 kernels this returns something like
|
||||
# [(0,1), (0,2), (1,2)]
|
||||
# in theory, we should also account for (1,0), (2,0) and so on, but
|
||||
# the transpose deals exactly with that
|
||||
# for a,b in itertools.combinations(psi1_matrices, 2):
|
||||
# tmp = np.multiply(a,b)
|
||||
# crossterms += tmp[:,None,:] + tmp[:, :,None]
|
||||
|
||||
return target + crossterms
|
||||
return target
|
||||
|
||||
def dpsi2_dtheta(self,partial,partial1,Z,mu,S,slices1=None,slices2=None):
|
||||
"""Returns shape (N,M,M,Ntheta)"""
|
||||
slices1, slices2 = self._process_slices(slices1,slices2)
|
||||
target = np.zeros(self.Nparam)
|
||||
[p.dpsi2_dtheta(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)]
|
||||
|
|
@ -429,7 +411,7 @@ class kern(parameterised):
|
|||
ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
|
||||
ps1, ps2 = self.param_slices[i1], self.param_slices[i2]
|
||||
|
||||
#white doesn;t compine with anything
|
||||
#white doesn;t combine with anything
|
||||
if p1.name=='white' or p2.name=='white':
|
||||
pass
|
||||
#rbf X bias
|
||||
|
|
@ -447,26 +429,6 @@ class kern(parameterised):
|
|||
else:
|
||||
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||
|
||||
# # "crossterms"
|
||||
# # 1. get all the psi1 statistics
|
||||
# psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts]
|
||||
# [p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)]
|
||||
|
||||
# partial1 = np.ones_like(partial1)
|
||||
# # 2. get all the dpsi1/dtheta gradients
|
||||
# psi1_gradients = [np.zeros(self.Nparam) for p in self.parts]
|
||||
# [p.dpsi1_dtheta(partial1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],psi1g_target[ps]) for p,ps,s1,s2,i_s,psi1g_target in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices,psi1_gradients)]
|
||||
|
||||
|
||||
# # 3. multiply them somehow
|
||||
# for a,b in itertools.combinations(range(len(psi1_matrices)), 2):
|
||||
|
||||
# tmp = (psi1_gradients[a][None, None] * psi1_matrices[b][:,:, None])
|
||||
# # target += (tmp[None] + tmp[:,None]).sum(0).sum(0).sum(0)
|
||||
# # gne = (psi1_gradients[a].sum()*psi1_matrices[b].sum())
|
||||
# # target += gne
|
||||
# #target += (gne[None] + gne[:, None]).sum(0)
|
||||
# target += (partial.sum(0)[:,:,None] * (tmp[:, None] + tmp[:,:,None]).sum(0)).sum(0).sum(0)
|
||||
return self._transform_gradients(target)
|
||||
|
||||
def dpsi2_dZ(self,partial,Z,mu,S,slices1=None,slices2=None):
|
||||
|
|
@ -475,16 +437,15 @@ class kern(parameterised):
|
|||
[p.dpsi2_dZ(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
|
||||
|
||||
#compute the "cross" terms
|
||||
#TODO: slices (need to iterate around the input slices also...)
|
||||
for p1, p2 in itertools.combinations(self.parts,2):
|
||||
#white doesn;t compine with anything
|
||||
#white doesn;t combine with anything
|
||||
if p1.name=='white' or p2.name=='white':
|
||||
pass
|
||||
#rbf X bias
|
||||
elif p1.name=='bias' and p2.name=='rbf':
|
||||
target += p2.dpsi1_dX(partial.sum(1)*p1.variance,Z,mu,S)
|
||||
target += p2.dpsi1_dX(partial.sum(1)*p1.variance,Z,mu,S,target)
|
||||
elif p2.name=='bias' and p1.name=='rbf':
|
||||
target += p1.dpsi1_dZ(partial.sum(2)*p2.variance,Z,mu,S)
|
||||
target += p1.dpsi1_dZ(partial.sum(2)*p2.variance,Z,mu,S,target)
|
||||
#rbf X linear
|
||||
elif p1.name=='linear' and p2.name=='rbf':
|
||||
raise NotImplementedError #TODO
|
||||
|
|
@ -502,7 +463,24 @@ class kern(parameterised):
|
|||
target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1]))
|
||||
[p.dpsi2_dmuS(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
|
||||
|
||||
#TODO: there are some extra terms to compute here!
|
||||
#compute the "cross" terms
|
||||
for p1, p2 in itertools.combinations(self.parts,2):
|
||||
#white doesn;t combine with anything
|
||||
if p1.name=='white' or p2.name=='white':
|
||||
pass
|
||||
#rbf X bias
|
||||
elif p1.name=='bias' and p2.name=='rbf':
|
||||
target += p2.dpsi1_dmuS(partial.sum(1)*p1.variance,Z,mu,S,target_mu,target_S)
|
||||
elif p2.name=='bias' and p1.name=='rbf':
|
||||
target += p1.dpsi1_dmuS(partial.sum(2)*p2.variance,Z,mu,S,target_mu,target_S)
|
||||
#rbf X linear
|
||||
elif p1.name=='linear' and p2.name=='rbf':
|
||||
raise NotImplementedError #TODO
|
||||
elif p2.name=='linear' and p1.name=='rbf':
|
||||
raise NotImplementedError #TODO
|
||||
else:
|
||||
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||
|
||||
return target_mu, target_S
|
||||
|
||||
def plot(self, x = None, plot_limits=None,which_functions='all',resolution=None,*args,**kwargs):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue