Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
James Hensman 2014-08-08 11:07:23 +01:00
commit 3651374617
9 changed files with 209 additions and 37 deletions

View file

@ -349,7 +349,7 @@ class Model(Parameterized):
numerical_gradient = (f1 - f2) / (2 * step) numerical_gradient = (f1 - f2) / (2 * step)
if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind] if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind]
else: ratio = (f1 - f2) / (2 * step * gradient[xind]) else: ratio = (f1 - f2) / (2 * step * gradient[xind])
difference = np.abs((f1 - f2) / 2 / step - gradient[xind]) difference = np.abs(numerical_gradient - gradient[xind])
if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance: if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
formatted_name = "\033[92m {0} \033[0m".format(names[nind]) formatted_name = "\033[92m {0} \033[0m".format(names[nind])

View file

@ -281,7 +281,7 @@ class Gradcheckable(Pickleable, Parentable):
""" """
if self.has_parent(): if self.has_parent():
return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance) return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)
return self._checkgrad(self[''], verbose=verbose, step=step, tolerance=tolerance) return self._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)
def _checkgrad(self, param, verbose=0, step=1e-6, tolerance=1e-3): def _checkgrad(self, param, verbose=0, step=1e-6, tolerance=1e-3):
""" """
@ -372,8 +372,9 @@ class Indexable(Nameable, Observable):
basically just sums up the parameter sizes which come before param. basically just sums up the parameter sizes which come before param.
""" """
if param.has_parent(): if param.has_parent():
if param._parent_._get_original(param) in self.parameters: p = param._parent_._get_original(param)
return self._param_slices_[param._parent_._get_original(param)._parent_index_].start if p in self.parameters:
return reduce(lambda a,b: a + b.size, self.parameters[:p._parent_index_], 0)
return self._offset_for(param._parent_) + param._parent_._offset_for(param) return self._offset_for(param._parent_) + param._parent_._offset_for(param)
return 0 return 0
@ -699,36 +700,10 @@ class OptimizationHandlable(Indexable):
def _get_params_transformed(self): def _get_params_transformed(self):
raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!" raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
# # transformed parameters (apply un-transformation rules)
# p = self.param_array.copy()
# [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
# if self.has_parent() and self.constraints[__fixed__].size != 0:
# fixes = np.ones(self.size).astype(bool)
# fixes[self.constraints[__fixed__]] = FIXED
# return p[fixes]
# elif self._has_fixes():
# return p[self._fixes_]
# return p
# #
def _set_params_transformed(self, p): def _set_params_transformed(self, p):
raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!" raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
# """
# Set parameters p, but make sure they get transformed before setting.
# This means, the optimizer sees p, whereas the model sees transformed(p),
# such that, the parameters the model sees are in the right domain.
# """
# if not(p is self.param_array):
# if self.has_parent() and self.constraints[__fixed__].size != 0:
# fixes = np.ones(self.size).astype(bool)
# fixes[self.constraints[__fixed__]] = FIXED
# self.param_array.flat[fixes] = p
# elif self._has_fixes(): self.param_array.flat[self._fixes_] = p
# else: self.param_array.flat = p
# [np.put(self.param_array, ind, c.f(self.param_array.flat[ind]))
# for c, ind in self.constraints.iteritems() if c != __fixed__]
# self._trigger_params_changed()
def _trigger_params_changed(self, trigger_parent=True): def _trigger_params_changed(self, trigger_parent=True):
""" """
First tell all children to update, First tell all children to update,
@ -736,7 +711,7 @@ class OptimizationHandlable(Indexable):
If trigger_parent is True, we will tell the parent, otherwise not. If trigger_parent is True, we will tell the parent, otherwise not.
""" """
[p._trigger_params_changed(trigger_parent=False) for p in self.parameters] [p._trigger_params_changed(trigger_parent=False) for p in self.parameters if not p.is_fixed]
self.notify_observers(None, None if trigger_parent else -np.inf) self.notify_observers(None, None if trigger_parent else -np.inf)
def _size_transformed(self): def _size_transformed(self):
@ -888,6 +863,25 @@ class Parameterizable(OptimizationHandlable):
self._param_array_ = np.empty(self.size, dtype=np.float64) self._param_array_ = np.empty(self.size, dtype=np.float64)
return self._param_array_ return self._param_array_
@property
def unfixed_param_array(self):
"""
Array representing the parameters of this class.
There is only one copy of all parameters in memory, two during optimization.
!WARNING!: setting the parameter array MUST always be done in memory:
m.param_array[:] = m_copy.param_array
"""
if self.__dict__.get('_param_array_', None) is None:
self._param_array_ = np.empty(self.size, dtype=np.float64)
if self.constraints[__fixed__].size !=0:
fixes = np.ones(self.size).astype(bool)
fixes[self.constraints[__fixed__]] = FIXED
return self._param_array_[fixes]
else:
return self._param_array_
@param_array.setter @param_array.setter
def param_array(self, arr): def param_array(self, arr):
self._param_array_ = arr self._param_array_ = arr

View file

@ -56,7 +56,7 @@ class EPDTC(LatentFunctionInference):
self._ep_approximation = None self._ep_approximation = None
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
num_data, output_dim = X.shape num_data, output_dim = Y.shape
assert output_dim ==1, "ep in 1D only (for now!)" assert output_dim ==1, "ep in 1D only (for now!)"
Kmm = kern.K(Z) Kmm = kern.K(Z)

View file

@ -1,2 +1,3 @@
from scg import SCG from scg import SCG
from optimization import * from optimization import *
from hmc import HMC,HMC_shortcut

View file

@ -0,0 +1,157 @@
"""HMC implementation"""
import numpy as np
class HMC:
def __init__(self,model,M=None,stepsize=1e-1):
self.model = model
self.stepsize = stepsize
self.p = np.empty_like(model.optimizer_array.copy())
if M is None:
self.M = np.eye(self.p.size)
else:
self.M = M
self.Minv = np.linalg.inv(self.M)
def sample(self, m_iters=1000, hmc_iters=20):
params = np.empty((m_iters,self.p.size))
for i in xrange(m_iters):
self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
H_old = self._computeH()
theta_old = self.model.optimizer_array.copy()
params[i] = self.model.unfixed_param_array
#Matropolis
self._update(hmc_iters)
H_new = self._computeH()
if H_old>H_new:
k = 1.
else:
k = np.exp(H_old-H_new)
if np.random.rand()<k:
params[i] = self.model.unfixed_param_array
else:
self.model.optimizer_array = theta_old
return params
def _update(self, hmc_iters):
for i in xrange(hmc_iters):
self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
self.model.optimizer_array = self.model.optimizer_array + self.stepsize*np.dot(self.Minv, self.p)
self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
def _computeH(self,):
return self.model.objective_function()+self.p.size*np.log(2*np.pi)/2.+np.log(np.linalg.det(self.M))/2.+np.dot(self.p, np.dot(self.Minv,self.p[:,None]))/2.
class HMC_shortcut:
def __init__(self,model,M=None,stepsize_range=[1e-6, 1e-1],groupsize=5, Hstd_th=[1e-5, 3.]):
self.model = model
self.stepsize_range = np.log(stepsize_range)
self.p = np.empty_like(model.optimizer_array.copy())
self.groupsize = groupsize
self.Hstd_th = Hstd_th
if M is None:
self.M = np.eye(self.p.size)
else:
self.M = M
self.Minv = np.linalg.inv(self.M)
def sample(self, m_iters=1000, hmc_iters=20):
params = np.empty((m_iters,self.p.size))
for i in xrange(m_iters):
# sample a stepsize from the uniform distribution
stepsize = np.exp(np.random.rand()*(self.stepsize_range[1]-self.stepsize_range[0])+self.stepsize_range[0])
self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
H_old = self._computeH()
params[i] = self.model.unfixed_param_array
theta_old = self.model.optimizer_array.copy()
#Matropolis
self._update(hmc_iters, stepsize)
H_new = self._computeH()
if H_old>H_new:
k = 1.
else:
k = np.exp(H_old-H_new)
if np.random.rand()<k:
params[i] = self.model.unfixed_param_array
else:
self.model.optimizer_array = theta_old
return params
def _update(self, hmc_iters, stepsize):
theta_buf = np.empty((2*hmc_iters+1,self.model.optimizer_array.size))
p_buf = np.empty((2*hmc_iters+1,self.p.size))
H_buf = np.empty((2*hmc_iters+1,))
# Set initial position
theta_buf[hmc_iters] = self.model.optimizer_array
p_buf[hmc_iters] = self.p
H_buf[hmc_iters] = self._computeH()
reversal = []
pos = 1
i=0
while i<hmc_iters:
self.p[:] += -stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
self.model.optimizer_array = self.model.optimizer_array + stepsize*np.dot(self.Minv, self.p)
self.p[:] += -stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
theta_buf[hmc_iters+pos] = self.model.optimizer_array
p_buf[hmc_iters+pos] = self.p
H_buf[hmc_iters+pos] = self._computeH()
i+=1
if i<self.groupsize:
pos += 1
continue
else:
if len(reversal)==0:
Hlist = range(hmc_iters+pos,hmc_iters+pos-self.groupsize,-1)
if self._testH(H_buf[Hlist]):
pos += 1
else:
# Reverse the trajectory for the 1st time
reversal.append(pos)
if hmc_iters-i>pos:
pos = -1
i += pos
self.model.optimizer_array = theta_buf[hmc_iters]
self.p[:] = -p_buf[hmc_iters]
else:
pos_new = pos-hmc_iters+i
self.model.optimizer_array = theta_buf[hmc_iters+pos_new]
self.p[:] = -p_buf[hmc_iters+pos_new]
break
else:
Hlist = range(hmc_iters+pos,hmc_iters+pos+self.groupsize)
# print Hlist
# print self._testH(H_buf[Hlist])
if self._testH(H_buf[Hlist]):
pos += -1
else:
# Reverse the trajectory for the 2nd time
r = (hmc_iters - i)%((reversal[0]-pos)*2)
if r>(reversal[0]-pos):
pos_new = 2*reversal[0] - r - pos
else:
pos_new = pos + r
self.model.optimizer_array = theta_buf[hmc_iters+pos_new]
self.p[:] = p_buf[hmc_iters+pos_new] # the sign of momentum might be wrong!
# print reversal[0],pos,pos_new
# print H_buf
break
def _testH(self, Hlist):
Hstd = np.std(Hlist)
# print Hlist
# print Hstd
if Hstd<self.Hstd_th[0] or Hstd>self.Hstd_th[1]:
return False
else:
return True
def _computeH(self,):
return self.model.objective_function()+self.p.size*np.log(2*np.pi)/2.+np.log(np.linalg.det(self.M))/2.+np.dot(self.p, np.dot(self.Minv,self.p[:,None]))/2.

View file

@ -10,7 +10,7 @@ class Add(CombinationKernel):
""" """
Add given list of kernels together. Add given list of kernels together.
propagates gradients through. propagates gradients through.
This kernel will take over the active dims of it's subkernels passed in. This kernel will take over the active dims of it's subkernels passed in.
""" """
def __init__(self, subkerns, name='add'): def __init__(self, subkerns, name='add'):
@ -40,7 +40,7 @@ class Add(CombinationKernel):
return reduce(np.add, (p.Kdiag(X) for p in which_parts)) return reduce(np.add, (p.Kdiag(X) for p in which_parts))
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts] [p.update_gradients_full(dL_dK, X, X2) for p in self.parts if not p.is_fixed]
def update_gradients_diag(self, dL_dK, X): def update_gradients_diag(self, dL_dK, X):
[p.update_gradients_diag(dL_dK, X) for p in self.parts] [p.update_gradients_diag(dL_dK, X) for p in self.parts]

View file

@ -84,6 +84,22 @@ class BayesianGPLVM(SparseGP):
self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2']) self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
# This is testing code -------------------------
# i = np.random.randint(self.X.shape[0])
# X_ = self.X.mean
# which = np.sqrt(((X_ - X_[i:i+1])**2).sum(1)).argsort()>(max(0, self.X.shape[0]-51))
# _, _, grad_dict = self.inference_method.inference(self.kern, self.X[which], self.Z, self.likelihood, self.Y[which], self.Y_metadata)
# grad = self.kern.gradients_qX_expectations(variational_posterior=self.X[which], Z=self.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
#
# self.X.mean.gradient[:] = 0
# self.X.variance.gradient[:] = 0
# self.X.mean.gradient[which] = grad[0]
# self.X.variance.gradient[which] = grad[1]
# update for the KL divergence
# self.variational_prior.update_gradients_KL(self.X, which)
# -----------------------------------------------
# update for the KL divergence # update for the KL divergence
self.variational_prior.update_gradients_KL(self.X) self.variational_prior.update_gradients_KL(self.X)

View file

@ -150,7 +150,11 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
if plot_raw: if plot_raw:
m, _ = model._raw_predict(Xgrid) m, _ = model._raw_predict(Xgrid)
else: else:
m, _ = model.predict(Xgrid) if isinstance(model,GPCoregionalizedRegression) or isinstance(model,SparseGPCoregionalizedRegression):
meta = {'output_index': Xgrid[:,-1:].astype(np.int)}
else:
meta = None
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=meta)
for d in which_data_ycols: for d in which_data_ycols:
m_d = m[:,d].reshape(resolution, resolution).T m_d = m[:,d].reshape(resolution, resolution).T
plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)

View file

@ -1,13 +1,13 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
#import os import os
from setuptools import setup from setuptools import setup
# Version number # Version number
version = '0.4.6' version = '0.4.6'
from pkg_resources import Requirement, resource_string from pkg_resources import Requirement
def read(fname): def read(fname):
return open(os.path.join(os.path.dirname(__file__), fname)).read() return open(os.path.join(os.path.dirname(__file__), fname)).read()