mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-14 22:42:37 +02:00
Merge branch 'devel' of github.com:SheffieldML/GPy into devel
This commit is contained in:
commit
225c35d55d
7 changed files with 118 additions and 68 deletions
|
|
@ -97,51 +97,66 @@ class opt_SGD(Optimizer):
|
||||||
return subset
|
return subset
|
||||||
|
|
||||||
def shift_constraints(self, j):
|
def shift_constraints(self, j):
|
||||||
|
|
||||||
|
constrained_indices = copy.deepcopy(self.model.constrained_indices)
|
||||||
|
|
||||||
|
for c, constraint in enumerate(constrained_indices):
|
||||||
|
mask = (np.ones_like(constrained_indices[c]) == 1)
|
||||||
|
for i in range(len(constrained_indices[c])):
|
||||||
|
pos = np.where(j == constrained_indices[c][i])[0]
|
||||||
|
if len(pos) == 1:
|
||||||
|
self.model.constrained_indices[c][i] = pos
|
||||||
|
else:
|
||||||
|
mask[i] = False
|
||||||
|
|
||||||
|
self.model.constrained_indices[c] = self.model.constrained_indices[c][mask]
|
||||||
|
return constrained_indices
|
||||||
# back them up
|
# back them up
|
||||||
bounded_i = copy.deepcopy(self.model.constrained_bounded_indices)
|
# bounded_i = copy.deepcopy(self.model.constrained_bounded_indices)
|
||||||
bounded_l = copy.deepcopy(self.model.constrained_bounded_lowers)
|
# bounded_l = copy.deepcopy(self.model.constrained_bounded_lowers)
|
||||||
bounded_u = copy.deepcopy(self.model.constrained_bounded_uppers)
|
# bounded_u = copy.deepcopy(self.model.constrained_bounded_uppers)
|
||||||
|
|
||||||
for b in range(len(bounded_i)): # for each group of constraints
|
# for b in range(len(bounded_i)): # for each group of constraints
|
||||||
for bc in range(len(bounded_i[b])):
|
# for bc in range(len(bounded_i[b])):
|
||||||
pos = np.where(j == bounded_i[b][bc])[0]
|
# pos = np.where(j == bounded_i[b][bc])[0]
|
||||||
if len(pos) == 1:
|
# if len(pos) == 1:
|
||||||
pos2 = np.where(self.model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0]
|
# pos2 = np.where(self.model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0]
|
||||||
self.model.constrained_bounded_indices[b][pos2] = pos[0]
|
# self.model.constrained_bounded_indices[b][pos2] = pos[0]
|
||||||
else:
|
# else:
|
||||||
if len(self.model.constrained_bounded_indices[b]) == 1:
|
# if len(self.model.constrained_bounded_indices[b]) == 1:
|
||||||
# if it's the last index to be removed
|
# # if it's the last index to be removed
|
||||||
# the logic here is just a mess. If we remove the last one, then all the
|
# # the logic here is just a mess. If we remove the last one, then all the
|
||||||
# b-indices change and we have to iterate through everything to find our
|
# # b-indices change and we have to iterate through everything to find our
|
||||||
# current index. Can't deal with this right now.
|
# # current index. Can't deal with this right now.
|
||||||
raise NotImplementedError
|
# raise NotImplementedError
|
||||||
|
|
||||||
else: # just remove it from the indices
|
# else: # just remove it from the indices
|
||||||
mask = self.model.constrained_bounded_indices[b] != bc
|
# mask = self.model.constrained_bounded_indices[b] != bc
|
||||||
self.model.constrained_bounded_indices[b] = self.model.constrained_bounded_indices[b][mask]
|
# self.model.constrained_bounded_indices[b] = self.model.constrained_bounded_indices[b][mask]
|
||||||
|
|
||||||
|
|
||||||
# here we shif the positive constraints. We cycle through each positive
|
# # here we shif the positive constraints. We cycle through each positive
|
||||||
# constraint
|
# # constraint
|
||||||
positive = self.model.constrained_positive_indices.copy()
|
# positive = self.model.constrained_positive_indices.copy()
|
||||||
mask = (np.ones_like(positive) == 1)
|
# mask = (np.ones_like(positive) == 1)
|
||||||
for p in range(len(positive)):
|
# for p in range(len(positive)):
|
||||||
# we now check whether the constrained index appears in the j vector
|
# # we now check whether the constrained index appears in the j vector
|
||||||
# (the vector of the "active" indices)
|
# # (the vector of the "active" indices)
|
||||||
pos = np.where(j == self.model.constrained_positive_indices[p])[0]
|
# pos = np.where(j == self.model.constrained_positive_indices[p])[0]
|
||||||
if len(pos) == 1:
|
# if len(pos) == 1:
|
||||||
self.model.constrained_positive_indices[p] = pos
|
# self.model.constrained_positive_indices[p] = pos
|
||||||
else:
|
# else:
|
||||||
mask[p] = False
|
# mask[p] = False
|
||||||
self.model.constrained_positive_indices = self.model.constrained_positive_indices[mask]
|
# self.model.constrained_positive_indices = self.model.constrained_positive_indices[mask]
|
||||||
|
|
||||||
return (bounded_i, bounded_l, bounded_u), positive
|
# return (bounded_i, bounded_l, bounded_u), positive
|
||||||
|
|
||||||
def restore_constraints(self, b, p):
|
def restore_constraints(self, c):#b, p):
|
||||||
self.model.constrained_bounded_indices = b[0]
|
# self.model.constrained_bounded_indices = b[0]
|
||||||
self.model.constrained_bounded_lowers = b[1]
|
# self.model.constrained_bounded_lowers = b[1]
|
||||||
self.model.constrained_bounded_uppers = b[2]
|
# self.model.constrained_bounded_uppers = b[2]
|
||||||
self.model.constrained_positive_indices = p
|
# self.model.constrained_positive_indices = p
|
||||||
|
self.model.constrained_indices = c
|
||||||
|
|
||||||
def get_param_shapes(self, N = None, Q = None):
|
def get_param_shapes(self, N = None, Q = None):
|
||||||
model_name = self.model.__class__.__name__
|
model_name = self.model.__class__.__name__
|
||||||
|
|
@ -168,9 +183,15 @@ class opt_SGD(Optimizer):
|
||||||
if self.model.N == 0 or Y.std() == 0.0:
|
if self.model.N == 0 or Y.std() == 0.0:
|
||||||
return 0, step, self.model.N
|
return 0, step, self.model.N
|
||||||
|
|
||||||
self.model.likelihood._mean = Y.mean()
|
self.model.likelihood._bias = Y.mean()
|
||||||
self.model.likelihood._std = Y.std()
|
self.model.likelihood._scale = Y.std()
|
||||||
self.model.likelihood.set_data(Y)
|
self.model.likelihood.set_data(Y)
|
||||||
|
# self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision
|
||||||
|
|
||||||
|
sigma = self.model.likelihood._variance
|
||||||
|
self.model.likelihood._variance = None # invalidate cache
|
||||||
|
self.model.likelihood._set_params(sigma)
|
||||||
|
|
||||||
|
|
||||||
j = self.subset_parameter_vector(self.x_opt, samples, shapes)
|
j = self.subset_parameter_vector(self.x_opt, samples, shapes)
|
||||||
self.model.X = X[samples]
|
self.model.X = X[samples]
|
||||||
|
|
@ -181,27 +202,30 @@ class opt_SGD(Optimizer):
|
||||||
self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T)
|
self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T)
|
||||||
self.model.likelihood.trYYT = np.trace(self.model.likelihood.YYT)
|
self.model.likelihood.trYYT = np.trace(self.model.likelihood.YYT)
|
||||||
|
|
||||||
b, p = self.shift_constraints(j)
|
ci = self.shift_constraints(j)
|
||||||
f, fp = f_fp(self.x_opt[j])
|
f, fp = f_fp(self.x_opt[j])
|
||||||
step[j] = self.momentum * step[j] + self.learning_rate[j] * fp
|
step[j] = self.momentum * step[j] + self.learning_rate[j] * fp
|
||||||
self.x_opt[j] -= step[j]
|
self.x_opt[j] -= step[j]
|
||||||
|
self.restore_constraints(ci)
|
||||||
|
|
||||||
self.restore_constraints(b, p)
|
self.model.grads[j] = fp
|
||||||
# restore likelihood _mean and _std, otherwise when we call set_data(y) on
|
# restore likelihood _bias and _scale, otherwise when we call set_data(y) on
|
||||||
# the next feature, it will get normalized with the mean and std of this one.
|
# the next feature, it will get normalized with the mean and std of this one.
|
||||||
self.model.likelihood._mean = 0
|
self.model.likelihood._bias = 0
|
||||||
self.model.likelihood._std = 1
|
self.model.likelihood._scale = 1
|
||||||
|
|
||||||
return f, step, self.model.N
|
return f, step, self.model.N
|
||||||
|
|
||||||
def opt(self, f_fp=None, f=None, fp=None):
|
def opt(self, f_fp=None, f=None, fp=None):
|
||||||
self.x_opt = self.model._get_params_transformed()
|
self.x_opt = self.model._get_params_transformed()
|
||||||
|
self.model.grads = np.zeros_like(self.x_opt)
|
||||||
|
|
||||||
X, Y = self.model.X.copy(), self.model.likelihood.Y.copy()
|
X, Y = self.model.X.copy(), self.model.likelihood.Y.copy()
|
||||||
|
|
||||||
self.model.likelihood.YYT = None
|
self.model.likelihood.YYT = None
|
||||||
self.model.likelihood.trYYT = None
|
self.model.likelihood.trYYT = None
|
||||||
self.model.likelihood._mean = 0.0
|
self.model.likelihood._bias = 0.0
|
||||||
self.model.likelihood._std = 1.0
|
self.model.likelihood._scale = 1.0
|
||||||
|
|
||||||
N, Q = self.model.X.shape
|
N, Q = self.model.X.shape
|
||||||
D = self.model.likelihood.Y.shape[1]
|
D = self.model.likelihood.Y.shape[1]
|
||||||
|
|
@ -225,6 +249,11 @@ class opt_SGD(Optimizer):
|
||||||
self.model.D = len(j)
|
self.model.D = len(j)
|
||||||
self.model.likelihood.D = len(j)
|
self.model.likelihood.D = len(j)
|
||||||
self.model.likelihood.set_data(Y[:, j])
|
self.model.likelihood.set_data(Y[:, j])
|
||||||
|
# self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision
|
||||||
|
|
||||||
|
sigma = self.model.likelihood._variance
|
||||||
|
self.model.likelihood._variance = None # invalidate cache
|
||||||
|
self.model.likelihood._set_params(sigma)
|
||||||
|
|
||||||
if missing_data:
|
if missing_data:
|
||||||
shapes = self.get_param_shapes(N, Q)
|
shapes = self.get_param_shapes(N, Q)
|
||||||
|
|
@ -250,7 +279,6 @@ class opt_SGD(Optimizer):
|
||||||
# plt.clf()
|
# plt.clf()
|
||||||
# plt.plot(self.param_traces['noise'])
|
# plt.plot(self.param_traces['noise'])
|
||||||
|
|
||||||
# import pdb; pdb.set_trace()
|
|
||||||
# for k in self.param_traces.keys():
|
# for k in self.param_traces.keys():
|
||||||
# self.param_traces[k].append(self.model.get(k)[0])
|
# self.param_traces[k].append(self.model.get(k)[0])
|
||||||
|
|
||||||
|
|
@ -262,6 +290,9 @@ class opt_SGD(Optimizer):
|
||||||
self.model.likelihood.N = N
|
self.model.likelihood.N = N
|
||||||
self.model.likelihood.D = D
|
self.model.likelihood.D = D
|
||||||
self.model.likelihood.Y = Y
|
self.model.likelihood.Y = Y
|
||||||
|
sigma = self.model.likelihood._variance
|
||||||
|
self.model.likelihood._variance = None # invalidate cache
|
||||||
|
self.model.likelihood._set_params(sigma)
|
||||||
|
|
||||||
self.trace.append(self.f_opt)
|
self.trace.append(self.f_opt)
|
||||||
if self.iteration_file is not None:
|
if self.iteration_file is not None:
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,7 @@ class GPLVM(GP):
|
||||||
return np.hstack((self.X.flatten(), GP._get_params(self)))
|
return np.hstack((self.X.flatten(), GP._get_params(self)))
|
||||||
|
|
||||||
def _set_params(self,x):
|
def _set_params(self,x):
|
||||||
self.X = x[:self.X.size].reshape(self.N,self.Q).copy()
|
self.X = x[:self.N*self.Q].reshape(self.N,self.Q).copy()
|
||||||
GP._set_params(self, x[self.X.size:])
|
GP._set_params(self, x[self.X.size:])
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
def _log_likelihood_gradients(self):
|
||||||
|
|
|
||||||
|
|
@ -3,17 +3,12 @@
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
from ..util.linalg import mdot, jitchol, tdot, symmetrify
|
from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides
|
||||||
from ..util.plot import gpplot
|
from ..util.plot import gpplot
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from GP import GP
|
from GP import GP
|
||||||
from scipy import linalg
|
from scipy import linalg
|
||||||
|
|
||||||
def backsub_both_sides(L, X):
|
|
||||||
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
|
|
||||||
tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1)
|
|
||||||
return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T
|
|
||||||
|
|
||||||
class sparse_GP(GP):
|
class sparse_GP(GP):
|
||||||
"""
|
"""
|
||||||
Variational sparse GP model
|
Variational sparse GP model
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,7 @@ class warpedGP(GP):
|
||||||
self.warping_function = TanhWarpingFunction_d(warping_terms)
|
self.warping_function = TanhWarpingFunction_d(warping_terms)
|
||||||
self.warping_params = (np.random.randn(self.warping_function.n_terms*3+1,) * 1)
|
self.warping_params = (np.random.randn(self.warping_function.n_terms*3+1,) * 1)
|
||||||
|
|
||||||
|
Y = self._scale_data(Y)
|
||||||
self.has_uncertain_inputs = False
|
self.has_uncertain_inputs = False
|
||||||
self.Y_untransformed = Y.copy()
|
self.Y_untransformed = Y.copy()
|
||||||
self.predict_in_warped_space = False
|
self.predict_in_warped_space = False
|
||||||
|
|
@ -30,6 +31,14 @@ class warpedGP(GP):
|
||||||
|
|
||||||
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
||||||
|
|
||||||
|
def _scale_data(self, Y):
|
||||||
|
self._Ymax = Y.max()
|
||||||
|
self._Ymin = Y.min()
|
||||||
|
return (Y-self._Ymin)/(self._Ymax-self._Ymin) - 0.5
|
||||||
|
|
||||||
|
def _unscale_data(self, Y):
|
||||||
|
return (Y + 0.5)*(self._Ymax - self._Ymin) + self._Ymin
|
||||||
|
|
||||||
def _set_params(self, x):
|
def _set_params(self, x):
|
||||||
self.warping_params = x[:self.warping_function.num_parameters]
|
self.warping_params = x[:self.warping_function.num_parameters]
|
||||||
Y = self.transform_data()
|
Y = self.transform_data()
|
||||||
|
|
@ -79,5 +88,5 @@ class warpedGP(GP):
|
||||||
if self.predict_in_warped_space:
|
if self.predict_in_warped_space:
|
||||||
mu = self.warping_function.f_inv(mu, self.warping_params)
|
mu = self.warping_function.f_inv(mu, self.warping_params)
|
||||||
var = self.warping_function.f_inv(var, self.warping_params)
|
var = self.warping_function.f_inv(var, self.warping_params)
|
||||||
|
mu = self._unscale_data(mu)
|
||||||
return mu, var
|
return mu, var
|
||||||
|
|
|
||||||
|
|
@ -257,16 +257,20 @@ def symmetrify(A,upper=False):
|
||||||
N,M = A.shape
|
N,M = A.shape
|
||||||
assert N==M
|
assert N==M
|
||||||
c_contig_code = """
|
c_contig_code = """
|
||||||
|
int iN;
|
||||||
for (int i=1; i<N; i++){
|
for (int i=1; i<N; i++){
|
||||||
|
iN = i*N;
|
||||||
for (int j=0; j<i; j++){
|
for (int j=0; j<i; j++){
|
||||||
A[i+j*N] = A[i*N+j];
|
A[i+j*N] = A[iN+j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
f_contig_code = """
|
f_contig_code = """
|
||||||
|
int iN;
|
||||||
for (int i=1; i<N; i++){
|
for (int i=1; i<N; i++){
|
||||||
|
iN = i*N;
|
||||||
for (int j=0; j<i; j++){
|
for (int j=0; j<i; j++){
|
||||||
A[i*N+j] = A[i+j*N];
|
A[iN+j] = A[i+j*N];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|
@ -317,3 +321,8 @@ def cholupdate(L,x):
|
||||||
x = x.copy()
|
x = x.copy()
|
||||||
N = x.size
|
N = x.size
|
||||||
weave.inline(code, support_code=support_code, arg_names=['N','L','x'], type_converters=weave.converters.blitz)
|
weave.inline(code, support_code=support_code, arg_names=['N','L','x'], type_converters=weave.converters.blitz)
|
||||||
|
|
||||||
|
def backsub_both_sides(L, X):
|
||||||
|
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
|
||||||
|
tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1)
|
||||||
|
return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T
|
||||||
|
|
|
||||||
|
|
@ -185,7 +185,7 @@ class TanhWarpingFunction_d(WarpingFunction):
|
||||||
return z
|
return z
|
||||||
|
|
||||||
|
|
||||||
def f_inv(self, y, psi, iterations = 30):
|
def f_inv(self, z, psi, max_iterations = 1000):
|
||||||
"""
|
"""
|
||||||
calculate the numerical inverse of f
|
calculate the numerical inverse of f
|
||||||
|
|
||||||
|
|
@ -194,13 +194,19 @@ class TanhWarpingFunction_d(WarpingFunction):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
y = y.copy()
|
z = z.copy()
|
||||||
z = np.ones_like(y)
|
y = np.ones_like(z)
|
||||||
|
it = 0
|
||||||
|
update = np.inf
|
||||||
|
|
||||||
for i in range(iterations):
|
while it == 0 or (np.abs(update).sum() > 1e-10 and it < max_iterations):
|
||||||
z -= (self.f(z, psi) - y)/self.fgrad_y(z,psi)
|
update = (self.f(y, psi) - z)/self.fgrad_y(y, psi)
|
||||||
|
y -= update
|
||||||
|
it += 1
|
||||||
|
if it == max_iterations:
|
||||||
|
print "WARNING!!! Maximum number of iterations reached in f_inv "
|
||||||
|
|
||||||
return z
|
return y
|
||||||
|
|
||||||
|
|
||||||
def fgrad_y(self, y, psi, return_precalc = False):
|
def fgrad_y(self, y, psi, return_precalc = False):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue