Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
James Hensman 2014-08-08 11:07:23 +01:00
commit 3651374617
9 changed files with 209 additions and 37 deletions

View file

@ -349,7 +349,7 @@ class Model(Parameterized):
numerical_gradient = (f1 - f2) / (2 * step)
if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind]
else: ratio = (f1 - f2) / (2 * step * gradient[xind])
difference = np.abs((f1 - f2) / 2 / step - gradient[xind])
difference = np.abs(numerical_gradient - gradient[xind])
if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
formatted_name = "\033[92m {0} \033[0m".format(names[nind])

View file

@ -281,7 +281,7 @@ class Gradcheckable(Pickleable, Parentable):
"""
if self.has_parent():
return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)
return self._checkgrad(self[''], verbose=verbose, step=step, tolerance=tolerance)
return self._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)
def _checkgrad(self, param, verbose=0, step=1e-6, tolerance=1e-3):
"""
@ -372,8 +372,9 @@ class Indexable(Nameable, Observable):
basically just sums up the parameter sizes which come before param.
"""
if param.has_parent():
if param._parent_._get_original(param) in self.parameters:
return self._param_slices_[param._parent_._get_original(param)._parent_index_].start
p = param._parent_._get_original(param)
if p in self.parameters:
return reduce(lambda a,b: a + b.size, self.parameters[:p._parent_index_], 0)
return self._offset_for(param._parent_) + param._parent_._offset_for(param)
return 0
@ -699,36 +700,10 @@ class OptimizationHandlable(Indexable):
def _get_params_transformed(self):
raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
# # transformed parameters (apply un-transformation rules)
# p = self.param_array.copy()
# [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
# if self.has_parent() and self.constraints[__fixed__].size != 0:
# fixes = np.ones(self.size).astype(bool)
# fixes[self.constraints[__fixed__]] = FIXED
# return p[fixes]
# elif self._has_fixes():
# return p[self._fixes_]
# return p
#
def _set_params_transformed(self, p):
raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
# """
# Set parameters p, but make sure they get transformed before setting.
# This means, the optimizer sees p, whereas the model sees transformed(p),
# such that, the parameters the model sees are in the right domain.
# """
# if not(p is self.param_array):
# if self.has_parent() and self.constraints[__fixed__].size != 0:
# fixes = np.ones(self.size).astype(bool)
# fixes[self.constraints[__fixed__]] = FIXED
# self.param_array.flat[fixes] = p
# elif self._has_fixes(): self.param_array.flat[self._fixes_] = p
# else: self.param_array.flat = p
# [np.put(self.param_array, ind, c.f(self.param_array.flat[ind]))
# for c, ind in self.constraints.iteritems() if c != __fixed__]
# self._trigger_params_changed()
def _trigger_params_changed(self, trigger_parent=True):
"""
First tell all children to update,
@ -736,7 +711,7 @@ class OptimizationHandlable(Indexable):
If trigger_parent is True, we will tell the parent, otherwise not.
"""
[p._trigger_params_changed(trigger_parent=False) for p in self.parameters]
[p._trigger_params_changed(trigger_parent=False) for p in self.parameters if not p.is_fixed]
self.notify_observers(None, None if trigger_parent else -np.inf)
def _size_transformed(self):
@ -888,6 +863,25 @@ class Parameterizable(OptimizationHandlable):
self._param_array_ = np.empty(self.size, dtype=np.float64)
return self._param_array_
@property
def unfixed_param_array(self):
"""
Array representing the parameters of this class.
There is only one copy of all parameters in memory, two during optimization.
!WARNING!: setting the parameter array MUST always be done in memory:
m.param_array[:] = m_copy.param_array
"""
if self.__dict__.get('_param_array_', None) is None:
self._param_array_ = np.empty(self.size, dtype=np.float64)
if self.constraints[__fixed__].size !=0:
fixes = np.ones(self.size).astype(bool)
fixes[self.constraints[__fixed__]] = FIXED
return self._param_array_[fixes]
else:
return self._param_array_
@param_array.setter
def param_array(self, arr):
self._param_array_ = arr

View file

@ -56,7 +56,7 @@ class EPDTC(LatentFunctionInference):
self._ep_approximation = None
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
num_data, output_dim = X.shape
num_data, output_dim = Y.shape
assert output_dim ==1, "ep in 1D only (for now!)"
Kmm = kern.K(Z)

View file

@ -1,2 +1,3 @@
from scg import SCG
from optimization import *
from hmc import HMC,HMC_shortcut

View file

@ -0,0 +1,157 @@
"""HMC implementation"""
import numpy as np
class HMC:
def __init__(self,model,M=None,stepsize=1e-1):
self.model = model
self.stepsize = stepsize
self.p = np.empty_like(model.optimizer_array.copy())
if M is None:
self.M = np.eye(self.p.size)
else:
self.M = M
self.Minv = np.linalg.inv(self.M)
def sample(self, m_iters=1000, hmc_iters=20):
params = np.empty((m_iters,self.p.size))
for i in xrange(m_iters):
self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
H_old = self._computeH()
theta_old = self.model.optimizer_array.copy()
params[i] = self.model.unfixed_param_array
#Matropolis
self._update(hmc_iters)
H_new = self._computeH()
if H_old>H_new:
k = 1.
else:
k = np.exp(H_old-H_new)
if np.random.rand()<k:
params[i] = self.model.unfixed_param_array
else:
self.model.optimizer_array = theta_old
return params
def _update(self, hmc_iters):
for i in xrange(hmc_iters):
self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
self.model.optimizer_array = self.model.optimizer_array + self.stepsize*np.dot(self.Minv, self.p)
self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
def _computeH(self,):
return self.model.objective_function()+self.p.size*np.log(2*np.pi)/2.+np.log(np.linalg.det(self.M))/2.+np.dot(self.p, np.dot(self.Minv,self.p[:,None]))/2.
class HMC_shortcut:
def __init__(self,model,M=None,stepsize_range=[1e-6, 1e-1],groupsize=5, Hstd_th=[1e-5, 3.]):
self.model = model
self.stepsize_range = np.log(stepsize_range)
self.p = np.empty_like(model.optimizer_array.copy())
self.groupsize = groupsize
self.Hstd_th = Hstd_th
if M is None:
self.M = np.eye(self.p.size)
else:
self.M = M
self.Minv = np.linalg.inv(self.M)
def sample(self, m_iters=1000, hmc_iters=20):
params = np.empty((m_iters,self.p.size))
for i in xrange(m_iters):
# sample a stepsize from the uniform distribution
stepsize = np.exp(np.random.rand()*(self.stepsize_range[1]-self.stepsize_range[0])+self.stepsize_range[0])
self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
H_old = self._computeH()
params[i] = self.model.unfixed_param_array
theta_old = self.model.optimizer_array.copy()
#Matropolis
self._update(hmc_iters, stepsize)
H_new = self._computeH()
if H_old>H_new:
k = 1.
else:
k = np.exp(H_old-H_new)
if np.random.rand()<k:
params[i] = self.model.unfixed_param_array
else:
self.model.optimizer_array = theta_old
return params
def _update(self, hmc_iters, stepsize):
theta_buf = np.empty((2*hmc_iters+1,self.model.optimizer_array.size))
p_buf = np.empty((2*hmc_iters+1,self.p.size))
H_buf = np.empty((2*hmc_iters+1,))
# Set initial position
theta_buf[hmc_iters] = self.model.optimizer_array
p_buf[hmc_iters] = self.p
H_buf[hmc_iters] = self._computeH()
reversal = []
pos = 1
i=0
while i<hmc_iters:
self.p[:] += -stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
self.model.optimizer_array = self.model.optimizer_array + stepsize*np.dot(self.Minv, self.p)
self.p[:] += -stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
theta_buf[hmc_iters+pos] = self.model.optimizer_array
p_buf[hmc_iters+pos] = self.p
H_buf[hmc_iters+pos] = self._computeH()
i+=1
if i<self.groupsize:
pos += 1
continue
else:
if len(reversal)==0:
Hlist = range(hmc_iters+pos,hmc_iters+pos-self.groupsize,-1)
if self._testH(H_buf[Hlist]):
pos += 1
else:
# Reverse the trajectory for the 1st time
reversal.append(pos)
if hmc_iters-i>pos:
pos = -1
i += pos
self.model.optimizer_array = theta_buf[hmc_iters]
self.p[:] = -p_buf[hmc_iters]
else:
pos_new = pos-hmc_iters+i
self.model.optimizer_array = theta_buf[hmc_iters+pos_new]
self.p[:] = -p_buf[hmc_iters+pos_new]
break
else:
Hlist = range(hmc_iters+pos,hmc_iters+pos+self.groupsize)
# print Hlist
# print self._testH(H_buf[Hlist])
if self._testH(H_buf[Hlist]):
pos += -1
else:
# Reverse the trajectory for the 2nd time
r = (hmc_iters - i)%((reversal[0]-pos)*2)
if r>(reversal[0]-pos):
pos_new = 2*reversal[0] - r - pos
else:
pos_new = pos + r
self.model.optimizer_array = theta_buf[hmc_iters+pos_new]
self.p[:] = p_buf[hmc_iters+pos_new] # the sign of momentum might be wrong!
# print reversal[0],pos,pos_new
# print H_buf
break
def _testH(self, Hlist):
Hstd = np.std(Hlist)
# print Hlist
# print Hstd
if Hstd<self.Hstd_th[0] or Hstd>self.Hstd_th[1]:
return False
else:
return True
def _computeH(self,):
return self.model.objective_function()+self.p.size*np.log(2*np.pi)/2.+np.log(np.linalg.det(self.M))/2.+np.dot(self.p, np.dot(self.Minv,self.p[:,None]))/2.

View file

@ -10,7 +10,7 @@ class Add(CombinationKernel):
"""
Add given list of kernels together.
propagates gradients through.
This kernel will take over the active dims of it's subkernels passed in.
"""
def __init__(self, subkerns, name='add'):
@ -40,7 +40,7 @@ class Add(CombinationKernel):
return reduce(np.add, (p.Kdiag(X) for p in which_parts))
def update_gradients_full(self, dL_dK, X, X2=None):
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts if not p.is_fixed]
def update_gradients_diag(self, dL_dK, X):
[p.update_gradients_diag(dL_dK, X) for p in self.parts]

View file

@ -84,6 +84,22 @@ class BayesianGPLVM(SparseGP):
self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
# This is testing code -------------------------
# i = np.random.randint(self.X.shape[0])
# X_ = self.X.mean
# which = np.sqrt(((X_ - X_[i:i+1])**2).sum(1)).argsort()>(max(0, self.X.shape[0]-51))
# _, _, grad_dict = self.inference_method.inference(self.kern, self.X[which], self.Z, self.likelihood, self.Y[which], self.Y_metadata)
# grad = self.kern.gradients_qX_expectations(variational_posterior=self.X[which], Z=self.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
#
# self.X.mean.gradient[:] = 0
# self.X.variance.gradient[:] = 0
# self.X.mean.gradient[which] = grad[0]
# self.X.variance.gradient[which] = grad[1]
# update for the KL divergence
# self.variational_prior.update_gradients_KL(self.X, which)
# -----------------------------------------------
# update for the KL divergence
self.variational_prior.update_gradients_KL(self.X)

View file

@ -150,7 +150,11 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
if plot_raw:
m, _ = model._raw_predict(Xgrid)
else:
m, _ = model.predict(Xgrid)
if isinstance(model,GPCoregionalizedRegression) or isinstance(model,SparseGPCoregionalizedRegression):
meta = {'output_index': Xgrid[:,-1:].astype(np.int)}
else:
meta = None
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=meta)
for d in which_data_ycols:
m_d = m[:,d].reshape(resolution, resolution).T
plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)

View file

@ -1,13 +1,13 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#import os
import os
from setuptools import setup
# Version number
version = '0.4.6'
from pkg_resources import Requirement, resource_string
from pkg_resources import Requirement
def read(fname):
return open(os.path.join(os.path.dirname(__file__), fname)).read()