mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-13 14:03:20 +02:00
debug HMC shortcut
This commit is contained in:
parent
e0a7884270
commit
c3bb7b28a1
2 changed files with 57 additions and 31 deletions
|
|
@ -888,6 +888,25 @@ class Parameterizable(OptimizationHandlable):
|
||||||
self._param_array_ = np.empty(self.size, dtype=np.float64)
|
self._param_array_ = np.empty(self.size, dtype=np.float64)
|
||||||
return self._param_array_
|
return self._param_array_
|
||||||
|
|
||||||
|
@property
|
||||||
|
def unfixed_param_array(self):
|
||||||
|
"""
|
||||||
|
Array representing the parameters of this class.
|
||||||
|
There is only one copy of all parameters in memory, two during optimization.
|
||||||
|
|
||||||
|
!WARNING!: setting the parameter array MUST always be done in memory:
|
||||||
|
m.param_array[:] = m_copy.param_array
|
||||||
|
"""
|
||||||
|
if self.__dict__.get('_param_array_', None) is None:
|
||||||
|
self._param_array_ = np.empty(self.size, dtype=np.float64)
|
||||||
|
|
||||||
|
if self.constraints[__fixed__].size !=0:
|
||||||
|
fixes = np.ones(self.size).astype(bool)
|
||||||
|
fixes[self.constraints[__fixed__]] = FIXED
|
||||||
|
return self._param_array_[fixes]
|
||||||
|
else:
|
||||||
|
return self._param_array_
|
||||||
|
|
||||||
@param_array.setter
|
@param_array.setter
|
||||||
def param_array(self, arr):
|
def param_array(self, arr):
|
||||||
self._param_array_ = arr
|
self._param_array_ = arr
|
||||||
|
|
|
||||||
|
|
@ -15,13 +15,12 @@ class HMC:
|
||||||
self.Minv = np.linalg.inv(self.M)
|
self.Minv = np.linalg.inv(self.M)
|
||||||
|
|
||||||
def sample(self, m_iters=1000, hmc_iters=20):
|
def sample(self, m_iters=1000, hmc_iters=20):
|
||||||
thetas = np.empty((m_iters,self.p.size))
|
params = np.empty((m_iters,self.p.size))
|
||||||
ps = np.empty((m_iters,self.p.size))
|
|
||||||
for i in xrange(m_iters):
|
for i in xrange(m_iters):
|
||||||
self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
|
self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
|
||||||
H_old = self._computeH()
|
H_old = self._computeH()
|
||||||
p_old = self.p.copy()
|
|
||||||
theta_old = self.model.optimizer_array.copy()
|
theta_old = self.model.optimizer_array.copy()
|
||||||
|
params[i] = self.model.unfixed_param_array
|
||||||
#Matropolis
|
#Matropolis
|
||||||
self._update(hmc_iters)
|
self._update(hmc_iters)
|
||||||
H_new = self._computeH()
|
H_new = self._computeH()
|
||||||
|
|
@ -31,13 +30,10 @@ class HMC:
|
||||||
else:
|
else:
|
||||||
k = np.exp(H_old-H_new)
|
k = np.exp(H_old-H_new)
|
||||||
if np.random.rand()<k:
|
if np.random.rand()<k:
|
||||||
thetas[i] = self.model.optimizer_array
|
params[i] = self.model.unfixed_param_array
|
||||||
ps[i] = self.p
|
|
||||||
else:
|
else:
|
||||||
thetas[i] = theta_old
|
|
||||||
ps[i] = p_old
|
|
||||||
self.model.optimizer_array = theta_old
|
self.model.optimizer_array = theta_old
|
||||||
return thetas, ps
|
return params
|
||||||
|
|
||||||
def _update(self, hmc_iters):
|
def _update(self, hmc_iters):
|
||||||
for i in xrange(hmc_iters):
|
for i in xrange(hmc_iters):
|
||||||
|
|
@ -49,9 +45,9 @@ class HMC:
|
||||||
return self.model.objective_function()+self.p.size*np.log(2*np.pi)/2.+np.log(np.linalg.det(self.M))/2.+np.dot(self.p, np.dot(self.Minv,self.p[:,None]))/2.
|
return self.model.objective_function()+self.p.size*np.log(2*np.pi)/2.+np.log(np.linalg.det(self.M))/2.+np.dot(self.p, np.dot(self.Minv,self.p[:,None]))/2.
|
||||||
|
|
||||||
class HMC_shortcut:
|
class HMC_shortcut:
|
||||||
def __init__(self,model,M=None,stepsize_range=[1e-6, 1e-1],groupsize=5, Hstd_th=[1e-3, 20.]):
|
def __init__(self,model,M=None,stepsize_range=[1e-6, 1e-1],groupsize=5, Hstd_th=[1e-5, 3.]):
|
||||||
self.model = model
|
self.model = model
|
||||||
self.stepsize_range = np.log10(stepsize_range)
|
self.stepsize_range = np.log(stepsize_range)
|
||||||
self.p = np.empty_like(model.optimizer_array.copy())
|
self.p = np.empty_like(model.optimizer_array.copy())
|
||||||
self.groupsize = groupsize
|
self.groupsize = groupsize
|
||||||
self.Hstd_th = Hstd_th
|
self.Hstd_th = Hstd_th
|
||||||
|
|
@ -62,14 +58,13 @@ class HMC_shortcut:
|
||||||
self.Minv = np.linalg.inv(self.M)
|
self.Minv = np.linalg.inv(self.M)
|
||||||
|
|
||||||
def sample(self, m_iters=1000, hmc_iters=20):
|
def sample(self, m_iters=1000, hmc_iters=20):
|
||||||
thetas = np.empty((m_iters,self.p.size))
|
params = np.empty((m_iters,self.p.size))
|
||||||
ps = np.empty((m_iters,self.p.size))
|
|
||||||
for i in xrange(m_iters):
|
for i in xrange(m_iters):
|
||||||
# sample a stepsize from the uniform distribution
|
# sample a stepsize from the uniform distribution
|
||||||
stepsize = np.exp10(np.random.rand()*(self.stepsize_range[1]-self.stepsize_range[0])+self.stepsize_range[0])
|
stepsize = np.exp(np.random.rand()*(self.stepsize_range[1]-self.stepsize_range[0])+self.stepsize_range[0])
|
||||||
self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
|
self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
|
||||||
H_old = self._computeH()
|
H_old = self._computeH()
|
||||||
p_old = self.p.copy()
|
params[i] = self.model.unfixed_param_array
|
||||||
theta_old = self.model.optimizer_array.copy()
|
theta_old = self.model.optimizer_array.copy()
|
||||||
#Matropolis
|
#Matropolis
|
||||||
self._update(hmc_iters, stepsize)
|
self._update(hmc_iters, stepsize)
|
||||||
|
|
@ -80,13 +75,10 @@ class HMC_shortcut:
|
||||||
else:
|
else:
|
||||||
k = np.exp(H_old-H_new)
|
k = np.exp(H_old-H_new)
|
||||||
if np.random.rand()<k:
|
if np.random.rand()<k:
|
||||||
thetas[i] = self.model.optimizer_array
|
params[i] = self.model.unfixed_param_array
|
||||||
ps[i] = self.p
|
|
||||||
else:
|
else:
|
||||||
thetas[i] = theta_old
|
|
||||||
ps[i] = p_old
|
|
||||||
self.model.optimizer_array = theta_old
|
self.model.optimizer_array = theta_old
|
||||||
return thetas, ps
|
return params
|
||||||
|
|
||||||
def _update(self, hmc_iters, stepsize):
|
def _update(self, hmc_iters, stepsize):
|
||||||
theta_buf = np.empty((2*hmc_iters+1,self.model.optimizer_array.size))
|
theta_buf = np.empty((2*hmc_iters+1,self.model.optimizer_array.size))
|
||||||
|
|
@ -99,31 +91,43 @@ class HMC_shortcut:
|
||||||
|
|
||||||
reversal = []
|
reversal = []
|
||||||
pos = 1
|
pos = 1
|
||||||
for i in xrange(hmc_iters):
|
i=0
|
||||||
self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
|
while i<hmc_iters:
|
||||||
self.model.optimizer_array = self.model.optimizer_array + self.stepsize*np.dot(self.Minv, self.p)
|
self.p[:] += -stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
|
||||||
self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
|
self.model.optimizer_array = self.model.optimizer_array + stepsize*np.dot(self.Minv, self.p)
|
||||||
|
self.p[:] += -stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
|
||||||
|
|
||||||
theta_buf[hmc_iters+pos] = self.model.optimizer_array
|
theta_buf[hmc_iters+pos] = self.model.optimizer_array
|
||||||
p_buf[hmc_iters+pos] = self.p
|
p_buf[hmc_iters+pos] = self.p
|
||||||
H_buf[hmc_iters+pos] = self._computeH()
|
H_buf[hmc_iters+pos] = self._computeH()
|
||||||
|
i+=1
|
||||||
|
|
||||||
if i<self.groupsize:
|
if i<self.groupsize:
|
||||||
pos += 1
|
pos += 1
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
if len(reversal)==0:
|
if len(reversal)==0:
|
||||||
Hlist = range(pos,pos-self.groupsize,-1)
|
Hlist = range(hmc_iters+pos,hmc_iters+pos-self.groupsize,-1)
|
||||||
if self._testH(H_buf[Hlist]):
|
if self._testH(H_buf[Hlist]):
|
||||||
pos += 1
|
pos += 1
|
||||||
else:
|
else:
|
||||||
# Reverse the trajectory for the 1st time
|
# Reverse the trajectory for the 1st time
|
||||||
reversal.add(pos)
|
reversal.append(pos)
|
||||||
pos = -1
|
if hmc_iters-i>pos:
|
||||||
self.model.optimizer_array = theta_buf[hmc_iters]
|
pos = -1
|
||||||
self.p[:] = -p_buf[hmc_iters]
|
i += pos
|
||||||
|
self.model.optimizer_array = theta_buf[hmc_iters]
|
||||||
|
self.p[:] = -p_buf[hmc_iters]
|
||||||
|
else:
|
||||||
|
pos_new = pos-hmc_iters+i
|
||||||
|
self.model.optimizer_array = theta_buf[hmc_iters+pos_new]
|
||||||
|
self.p[:] = -p_buf[hmc_iters+pos_new]
|
||||||
|
break
|
||||||
else:
|
else:
|
||||||
Hlist = range(pos,pos+self.groupsize)
|
Hlist = range(hmc_iters+pos,hmc_iters+pos+self.groupsize)
|
||||||
|
# print Hlist
|
||||||
|
# print self._testH(H_buf[Hlist])
|
||||||
|
|
||||||
if self._testH(H_buf[Hlist]):
|
if self._testH(H_buf[Hlist]):
|
||||||
pos += -1
|
pos += -1
|
||||||
else:
|
else:
|
||||||
|
|
@ -132,14 +136,17 @@ class HMC_shortcut:
|
||||||
if r>(reversal[0]-pos):
|
if r>(reversal[0]-pos):
|
||||||
pos_new = 2*reversal[0] - r - pos
|
pos_new = 2*reversal[0] - r - pos
|
||||||
else:
|
else:
|
||||||
pos_new = 2*pos + r - reversal[0]
|
pos_new = pos + r
|
||||||
self.model.optimizer_array = theta_buf[hmc_iters+pos_new]
|
self.model.optimizer_array = theta_buf[hmc_iters+pos_new]
|
||||||
self.p[:] = p_buf[hmc_iters+pos_new] # the sign of momentum might be wrong!
|
self.p[:] = p_buf[hmc_iters+pos_new] # the sign of momentum might be wrong!
|
||||||
|
# print reversal[0],pos,pos_new
|
||||||
|
# print H_buf
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
def _testH(self, Hlist):
|
def _testH(self, Hlist):
|
||||||
Hstd = np.std(Hlist)
|
Hstd = np.std(Hlist)
|
||||||
|
# print Hlist
|
||||||
|
# print Hstd
|
||||||
if Hstd<self.Hstd_th[0] or Hstd>self.Hstd_th[1]:
|
if Hstd<self.Hstd_th[0] or Hstd>self.Hstd_th[1]:
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue