mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-24 14:15:14 +02:00
Merge branch 'devel' of github.com:SheffieldML/GPy into devel
This commit is contained in:
commit
3651374617
9 changed files with 209 additions and 37 deletions
|
|
@ -349,7 +349,7 @@ class Model(Parameterized):
|
||||||
numerical_gradient = (f1 - f2) / (2 * step)
|
numerical_gradient = (f1 - f2) / (2 * step)
|
||||||
if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind]
|
if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind]
|
||||||
else: ratio = (f1 - f2) / (2 * step * gradient[xind])
|
else: ratio = (f1 - f2) / (2 * step * gradient[xind])
|
||||||
difference = np.abs((f1 - f2) / 2 / step - gradient[xind])
|
difference = np.abs(numerical_gradient - gradient[xind])
|
||||||
|
|
||||||
if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
|
if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
|
||||||
formatted_name = "\033[92m {0} \033[0m".format(names[nind])
|
formatted_name = "\033[92m {0} \033[0m".format(names[nind])
|
||||||
|
|
|
||||||
|
|
@ -281,7 +281,7 @@ class Gradcheckable(Pickleable, Parentable):
|
||||||
"""
|
"""
|
||||||
if self.has_parent():
|
if self.has_parent():
|
||||||
return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)
|
return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)
|
||||||
return self._checkgrad(self[''], verbose=verbose, step=step, tolerance=tolerance)
|
return self._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)
|
||||||
|
|
||||||
def _checkgrad(self, param, verbose=0, step=1e-6, tolerance=1e-3):
|
def _checkgrad(self, param, verbose=0, step=1e-6, tolerance=1e-3):
|
||||||
"""
|
"""
|
||||||
|
|
@ -372,8 +372,9 @@ class Indexable(Nameable, Observable):
|
||||||
basically just sums up the parameter sizes which come before param.
|
basically just sums up the parameter sizes which come before param.
|
||||||
"""
|
"""
|
||||||
if param.has_parent():
|
if param.has_parent():
|
||||||
if param._parent_._get_original(param) in self.parameters:
|
p = param._parent_._get_original(param)
|
||||||
return self._param_slices_[param._parent_._get_original(param)._parent_index_].start
|
if p in self.parameters:
|
||||||
|
return reduce(lambda a,b: a + b.size, self.parameters[:p._parent_index_], 0)
|
||||||
return self._offset_for(param._parent_) + param._parent_._offset_for(param)
|
return self._offset_for(param._parent_) + param._parent_._offset_for(param)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
@ -699,36 +700,10 @@ class OptimizationHandlable(Indexable):
|
||||||
|
|
||||||
def _get_params_transformed(self):
|
def _get_params_transformed(self):
|
||||||
raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
|
raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
|
||||||
# # transformed parameters (apply un-transformation rules)
|
|
||||||
# p = self.param_array.copy()
|
|
||||||
# [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
|
||||||
# if self.has_parent() and self.constraints[__fixed__].size != 0:
|
|
||||||
# fixes = np.ones(self.size).astype(bool)
|
|
||||||
# fixes[self.constraints[__fixed__]] = FIXED
|
|
||||||
# return p[fixes]
|
|
||||||
# elif self._has_fixes():
|
|
||||||
# return p[self._fixes_]
|
|
||||||
# return p
|
|
||||||
#
|
#
|
||||||
def _set_params_transformed(self, p):
|
def _set_params_transformed(self, p):
|
||||||
raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
|
raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
|
||||||
|
|
||||||
# """
|
|
||||||
# Set parameters p, but make sure they get transformed before setting.
|
|
||||||
# This means, the optimizer sees p, whereas the model sees transformed(p),
|
|
||||||
# such that, the parameters the model sees are in the right domain.
|
|
||||||
# """
|
|
||||||
# if not(p is self.param_array):
|
|
||||||
# if self.has_parent() and self.constraints[__fixed__].size != 0:
|
|
||||||
# fixes = np.ones(self.size).astype(bool)
|
|
||||||
# fixes[self.constraints[__fixed__]] = FIXED
|
|
||||||
# self.param_array.flat[fixes] = p
|
|
||||||
# elif self._has_fixes(): self.param_array.flat[self._fixes_] = p
|
|
||||||
# else: self.param_array.flat = p
|
|
||||||
# [np.put(self.param_array, ind, c.f(self.param_array.flat[ind]))
|
|
||||||
# for c, ind in self.constraints.iteritems() if c != __fixed__]
|
|
||||||
# self._trigger_params_changed()
|
|
||||||
|
|
||||||
def _trigger_params_changed(self, trigger_parent=True):
|
def _trigger_params_changed(self, trigger_parent=True):
|
||||||
"""
|
"""
|
||||||
First tell all children to update,
|
First tell all children to update,
|
||||||
|
|
@ -736,7 +711,7 @@ class OptimizationHandlable(Indexable):
|
||||||
|
|
||||||
If trigger_parent is True, we will tell the parent, otherwise not.
|
If trigger_parent is True, we will tell the parent, otherwise not.
|
||||||
"""
|
"""
|
||||||
[p._trigger_params_changed(trigger_parent=False) for p in self.parameters]
|
[p._trigger_params_changed(trigger_parent=False) for p in self.parameters if not p.is_fixed]
|
||||||
self.notify_observers(None, None if trigger_parent else -np.inf)
|
self.notify_observers(None, None if trigger_parent else -np.inf)
|
||||||
|
|
||||||
def _size_transformed(self):
|
def _size_transformed(self):
|
||||||
|
|
@ -888,6 +863,25 @@ class Parameterizable(OptimizationHandlable):
|
||||||
self._param_array_ = np.empty(self.size, dtype=np.float64)
|
self._param_array_ = np.empty(self.size, dtype=np.float64)
|
||||||
return self._param_array_
|
return self._param_array_
|
||||||
|
|
||||||
|
@property
|
||||||
|
def unfixed_param_array(self):
|
||||||
|
"""
|
||||||
|
Array representing the parameters of this class.
|
||||||
|
There is only one copy of all parameters in memory, two during optimization.
|
||||||
|
|
||||||
|
!WARNING!: setting the parameter array MUST always be done in memory:
|
||||||
|
m.param_array[:] = m_copy.param_array
|
||||||
|
"""
|
||||||
|
if self.__dict__.get('_param_array_', None) is None:
|
||||||
|
self._param_array_ = np.empty(self.size, dtype=np.float64)
|
||||||
|
|
||||||
|
if self.constraints[__fixed__].size !=0:
|
||||||
|
fixes = np.ones(self.size).astype(bool)
|
||||||
|
fixes[self.constraints[__fixed__]] = FIXED
|
||||||
|
return self._param_array_[fixes]
|
||||||
|
else:
|
||||||
|
return self._param_array_
|
||||||
|
|
||||||
@param_array.setter
|
@param_array.setter
|
||||||
def param_array(self, arr):
|
def param_array(self, arr):
|
||||||
self._param_array_ = arr
|
self._param_array_ = arr
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,7 @@ class EPDTC(LatentFunctionInference):
|
||||||
self._ep_approximation = None
|
self._ep_approximation = None
|
||||||
|
|
||||||
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
|
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
|
||||||
num_data, output_dim = X.shape
|
num_data, output_dim = Y.shape
|
||||||
assert output_dim ==1, "ep in 1D only (for now!)"
|
assert output_dim ==1, "ep in 1D only (for now!)"
|
||||||
|
|
||||||
Kmm = kern.K(Z)
|
Kmm = kern.K(Z)
|
||||||
|
|
|
||||||
|
|
@ -1,2 +1,3 @@
|
||||||
from scg import SCG
|
from scg import SCG
|
||||||
from optimization import *
|
from optimization import *
|
||||||
|
from hmc import HMC,HMC_shortcut
|
||||||
|
|
|
||||||
157
GPy/inference/optimization/hmc.py
Normal file
157
GPy/inference/optimization/hmc.py
Normal file
|
|
@ -0,0 +1,157 @@
|
||||||
|
"""HMC implementation"""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
class HMC:
|
||||||
|
def __init__(self,model,M=None,stepsize=1e-1):
|
||||||
|
self.model = model
|
||||||
|
self.stepsize = stepsize
|
||||||
|
self.p = np.empty_like(model.optimizer_array.copy())
|
||||||
|
if M is None:
|
||||||
|
self.M = np.eye(self.p.size)
|
||||||
|
else:
|
||||||
|
self.M = M
|
||||||
|
self.Minv = np.linalg.inv(self.M)
|
||||||
|
|
||||||
|
def sample(self, m_iters=1000, hmc_iters=20):
|
||||||
|
params = np.empty((m_iters,self.p.size))
|
||||||
|
for i in xrange(m_iters):
|
||||||
|
self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
|
||||||
|
H_old = self._computeH()
|
||||||
|
theta_old = self.model.optimizer_array.copy()
|
||||||
|
params[i] = self.model.unfixed_param_array
|
||||||
|
#Matropolis
|
||||||
|
self._update(hmc_iters)
|
||||||
|
H_new = self._computeH()
|
||||||
|
|
||||||
|
if H_old>H_new:
|
||||||
|
k = 1.
|
||||||
|
else:
|
||||||
|
k = np.exp(H_old-H_new)
|
||||||
|
if np.random.rand()<k:
|
||||||
|
params[i] = self.model.unfixed_param_array
|
||||||
|
else:
|
||||||
|
self.model.optimizer_array = theta_old
|
||||||
|
return params
|
||||||
|
|
||||||
|
def _update(self, hmc_iters):
|
||||||
|
for i in xrange(hmc_iters):
|
||||||
|
self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
|
||||||
|
self.model.optimizer_array = self.model.optimizer_array + self.stepsize*np.dot(self.Minv, self.p)
|
||||||
|
self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
|
||||||
|
|
||||||
|
def _computeH(self,):
|
||||||
|
return self.model.objective_function()+self.p.size*np.log(2*np.pi)/2.+np.log(np.linalg.det(self.M))/2.+np.dot(self.p, np.dot(self.Minv,self.p[:,None]))/2.
|
||||||
|
|
||||||
|
class HMC_shortcut:
|
||||||
|
def __init__(self,model,M=None,stepsize_range=[1e-6, 1e-1],groupsize=5, Hstd_th=[1e-5, 3.]):
|
||||||
|
self.model = model
|
||||||
|
self.stepsize_range = np.log(stepsize_range)
|
||||||
|
self.p = np.empty_like(model.optimizer_array.copy())
|
||||||
|
self.groupsize = groupsize
|
||||||
|
self.Hstd_th = Hstd_th
|
||||||
|
if M is None:
|
||||||
|
self.M = np.eye(self.p.size)
|
||||||
|
else:
|
||||||
|
self.M = M
|
||||||
|
self.Minv = np.linalg.inv(self.M)
|
||||||
|
|
||||||
|
def sample(self, m_iters=1000, hmc_iters=20):
|
||||||
|
params = np.empty((m_iters,self.p.size))
|
||||||
|
for i in xrange(m_iters):
|
||||||
|
# sample a stepsize from the uniform distribution
|
||||||
|
stepsize = np.exp(np.random.rand()*(self.stepsize_range[1]-self.stepsize_range[0])+self.stepsize_range[0])
|
||||||
|
self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
|
||||||
|
H_old = self._computeH()
|
||||||
|
params[i] = self.model.unfixed_param_array
|
||||||
|
theta_old = self.model.optimizer_array.copy()
|
||||||
|
#Matropolis
|
||||||
|
self._update(hmc_iters, stepsize)
|
||||||
|
H_new = self._computeH()
|
||||||
|
|
||||||
|
if H_old>H_new:
|
||||||
|
k = 1.
|
||||||
|
else:
|
||||||
|
k = np.exp(H_old-H_new)
|
||||||
|
if np.random.rand()<k:
|
||||||
|
params[i] = self.model.unfixed_param_array
|
||||||
|
else:
|
||||||
|
self.model.optimizer_array = theta_old
|
||||||
|
return params
|
||||||
|
|
||||||
|
def _update(self, hmc_iters, stepsize):
|
||||||
|
theta_buf = np.empty((2*hmc_iters+1,self.model.optimizer_array.size))
|
||||||
|
p_buf = np.empty((2*hmc_iters+1,self.p.size))
|
||||||
|
H_buf = np.empty((2*hmc_iters+1,))
|
||||||
|
# Set initial position
|
||||||
|
theta_buf[hmc_iters] = self.model.optimizer_array
|
||||||
|
p_buf[hmc_iters] = self.p
|
||||||
|
H_buf[hmc_iters] = self._computeH()
|
||||||
|
|
||||||
|
reversal = []
|
||||||
|
pos = 1
|
||||||
|
i=0
|
||||||
|
while i<hmc_iters:
|
||||||
|
self.p[:] += -stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
|
||||||
|
self.model.optimizer_array = self.model.optimizer_array + stepsize*np.dot(self.Minv, self.p)
|
||||||
|
self.p[:] += -stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
|
||||||
|
|
||||||
|
theta_buf[hmc_iters+pos] = self.model.optimizer_array
|
||||||
|
p_buf[hmc_iters+pos] = self.p
|
||||||
|
H_buf[hmc_iters+pos] = self._computeH()
|
||||||
|
i+=1
|
||||||
|
|
||||||
|
if i<self.groupsize:
|
||||||
|
pos += 1
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
if len(reversal)==0:
|
||||||
|
Hlist = range(hmc_iters+pos,hmc_iters+pos-self.groupsize,-1)
|
||||||
|
if self._testH(H_buf[Hlist]):
|
||||||
|
pos += 1
|
||||||
|
else:
|
||||||
|
# Reverse the trajectory for the 1st time
|
||||||
|
reversal.append(pos)
|
||||||
|
if hmc_iters-i>pos:
|
||||||
|
pos = -1
|
||||||
|
i += pos
|
||||||
|
self.model.optimizer_array = theta_buf[hmc_iters]
|
||||||
|
self.p[:] = -p_buf[hmc_iters]
|
||||||
|
else:
|
||||||
|
pos_new = pos-hmc_iters+i
|
||||||
|
self.model.optimizer_array = theta_buf[hmc_iters+pos_new]
|
||||||
|
self.p[:] = -p_buf[hmc_iters+pos_new]
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
Hlist = range(hmc_iters+pos,hmc_iters+pos+self.groupsize)
|
||||||
|
# print Hlist
|
||||||
|
# print self._testH(H_buf[Hlist])
|
||||||
|
|
||||||
|
if self._testH(H_buf[Hlist]):
|
||||||
|
pos += -1
|
||||||
|
else:
|
||||||
|
# Reverse the trajectory for the 2nd time
|
||||||
|
r = (hmc_iters - i)%((reversal[0]-pos)*2)
|
||||||
|
if r>(reversal[0]-pos):
|
||||||
|
pos_new = 2*reversal[0] - r - pos
|
||||||
|
else:
|
||||||
|
pos_new = pos + r
|
||||||
|
self.model.optimizer_array = theta_buf[hmc_iters+pos_new]
|
||||||
|
self.p[:] = p_buf[hmc_iters+pos_new] # the sign of momentum might be wrong!
|
||||||
|
# print reversal[0],pos,pos_new
|
||||||
|
# print H_buf
|
||||||
|
break
|
||||||
|
|
||||||
|
def _testH(self, Hlist):
|
||||||
|
Hstd = np.std(Hlist)
|
||||||
|
# print Hlist
|
||||||
|
# print Hstd
|
||||||
|
if Hstd<self.Hstd_th[0] or Hstd>self.Hstd_th[1]:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _computeH(self,):
|
||||||
|
return self.model.objective_function()+self.p.size*np.log(2*np.pi)/2.+np.log(np.linalg.det(self.M))/2.+np.dot(self.p, np.dot(self.Minv,self.p[:,None]))/2.
|
||||||
|
|
||||||
|
|
@ -40,7 +40,7 @@ class Add(CombinationKernel):
|
||||||
return reduce(np.add, (p.Kdiag(X) for p in which_parts))
|
return reduce(np.add, (p.Kdiag(X) for p in which_parts))
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
|
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts if not p.is_fixed]
|
||||||
|
|
||||||
def update_gradients_diag(self, dL_dK, X):
|
def update_gradients_diag(self, dL_dK, X):
|
||||||
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
|
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
|
||||||
|
|
|
||||||
|
|
@ -84,6 +84,22 @@ class BayesianGPLVM(SparseGP):
|
||||||
|
|
||||||
self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
|
self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
|
||||||
|
|
||||||
|
# This is testing code -------------------------
|
||||||
|
# i = np.random.randint(self.X.shape[0])
|
||||||
|
# X_ = self.X.mean
|
||||||
|
# which = np.sqrt(((X_ - X_[i:i+1])**2).sum(1)).argsort()>(max(0, self.X.shape[0]-51))
|
||||||
|
# _, _, grad_dict = self.inference_method.inference(self.kern, self.X[which], self.Z, self.likelihood, self.Y[which], self.Y_metadata)
|
||||||
|
# grad = self.kern.gradients_qX_expectations(variational_posterior=self.X[which], Z=self.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||||
|
#
|
||||||
|
# self.X.mean.gradient[:] = 0
|
||||||
|
# self.X.variance.gradient[:] = 0
|
||||||
|
# self.X.mean.gradient[which] = grad[0]
|
||||||
|
# self.X.variance.gradient[which] = grad[1]
|
||||||
|
|
||||||
|
# update for the KL divergence
|
||||||
|
# self.variational_prior.update_gradients_KL(self.X, which)
|
||||||
|
# -----------------------------------------------
|
||||||
|
|
||||||
# update for the KL divergence
|
# update for the KL divergence
|
||||||
self.variational_prior.update_gradients_KL(self.X)
|
self.variational_prior.update_gradients_KL(self.X)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -150,7 +150,11 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
if plot_raw:
|
if plot_raw:
|
||||||
m, _ = model._raw_predict(Xgrid)
|
m, _ = model._raw_predict(Xgrid)
|
||||||
else:
|
else:
|
||||||
m, _ = model.predict(Xgrid)
|
if isinstance(model,GPCoregionalizedRegression) or isinstance(model,SparseGPCoregionalizedRegression):
|
||||||
|
meta = {'output_index': Xgrid[:,-1:].astype(np.int)}
|
||||||
|
else:
|
||||||
|
meta = None
|
||||||
|
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=meta)
|
||||||
for d in which_data_ycols:
|
for d in which_data_ycols:
|
||||||
m_d = m[:,d].reshape(resolution, resolution).T
|
m_d = m[:,d].reshape(resolution, resolution).T
|
||||||
plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
||||||
|
|
|
||||||
4
setup.py
4
setup.py
|
|
@ -1,13 +1,13 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
#import os
|
import os
|
||||||
from setuptools import setup
|
from setuptools import setup
|
||||||
|
|
||||||
# Version number
|
# Version number
|
||||||
version = '0.4.6'
|
version = '0.4.6'
|
||||||
|
|
||||||
from pkg_resources import Requirement, resource_string
|
from pkg_resources import Requirement
|
||||||
def read(fname):
|
def read(fname):
|
||||||
return open(os.path.join(os.path.dirname(__file__), fname)).read()
|
return open(os.path.join(os.path.dirname(__file__), fname)).read()
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue