Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
Ricardo 2013-05-13 19:41:10 +01:00
commit 225c35d55d
7 changed files with 118 additions and 68 deletions

View file

@ -97,51 +97,66 @@ class opt_SGD(Optimizer):
return subset return subset
def shift_constraints(self, j): def shift_constraints(self, j):
constrained_indices = copy.deepcopy(self.model.constrained_indices)
for c, constraint in enumerate(constrained_indices):
mask = (np.ones_like(constrained_indices[c]) == 1)
for i in range(len(constrained_indices[c])):
pos = np.where(j == constrained_indices[c][i])[0]
if len(pos) == 1:
self.model.constrained_indices[c][i] = pos
else:
mask[i] = False
self.model.constrained_indices[c] = self.model.constrained_indices[c][mask]
return constrained_indices
# back them up # back them up
bounded_i = copy.deepcopy(self.model.constrained_bounded_indices) # bounded_i = copy.deepcopy(self.model.constrained_bounded_indices)
bounded_l = copy.deepcopy(self.model.constrained_bounded_lowers) # bounded_l = copy.deepcopy(self.model.constrained_bounded_lowers)
bounded_u = copy.deepcopy(self.model.constrained_bounded_uppers) # bounded_u = copy.deepcopy(self.model.constrained_bounded_uppers)
for b in range(len(bounded_i)): # for each group of constraints # for b in range(len(bounded_i)): # for each group of constraints
for bc in range(len(bounded_i[b])): # for bc in range(len(bounded_i[b])):
pos = np.where(j == bounded_i[b][bc])[0] # pos = np.where(j == bounded_i[b][bc])[0]
if len(pos) == 1: # if len(pos) == 1:
pos2 = np.where(self.model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0] # pos2 = np.where(self.model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0]
self.model.constrained_bounded_indices[b][pos2] = pos[0] # self.model.constrained_bounded_indices[b][pos2] = pos[0]
else: # else:
if len(self.model.constrained_bounded_indices[b]) == 1: # if len(self.model.constrained_bounded_indices[b]) == 1:
# if it's the last index to be removed # # if it's the last index to be removed
# the logic here is just a mess. If we remove the last one, then all the # # the logic here is just a mess. If we remove the last one, then all the
# b-indices change and we have to iterate through everything to find our # # b-indices change and we have to iterate through everything to find our
# current index. Can't deal with this right now. # # current index. Can't deal with this right now.
raise NotImplementedError # raise NotImplementedError
else: # just remove it from the indices # else: # just remove it from the indices
mask = self.model.constrained_bounded_indices[b] != bc # mask = self.model.constrained_bounded_indices[b] != bc
self.model.constrained_bounded_indices[b] = self.model.constrained_bounded_indices[b][mask] # self.model.constrained_bounded_indices[b] = self.model.constrained_bounded_indices[b][mask]
# here we shif the positive constraints. We cycle through each positive # # here we shif the positive constraints. We cycle through each positive
# constraint # # constraint
positive = self.model.constrained_positive_indices.copy() # positive = self.model.constrained_positive_indices.copy()
mask = (np.ones_like(positive) == 1) # mask = (np.ones_like(positive) == 1)
for p in range(len(positive)): # for p in range(len(positive)):
# we now check whether the constrained index appears in the j vector # # we now check whether the constrained index appears in the j vector
# (the vector of the "active" indices) # # (the vector of the "active" indices)
pos = np.where(j == self.model.constrained_positive_indices[p])[0] # pos = np.where(j == self.model.constrained_positive_indices[p])[0]
if len(pos) == 1: # if len(pos) == 1:
self.model.constrained_positive_indices[p] = pos # self.model.constrained_positive_indices[p] = pos
else: # else:
mask[p] = False # mask[p] = False
self.model.constrained_positive_indices = self.model.constrained_positive_indices[mask] # self.model.constrained_positive_indices = self.model.constrained_positive_indices[mask]
return (bounded_i, bounded_l, bounded_u), positive # return (bounded_i, bounded_l, bounded_u), positive
def restore_constraints(self, b, p): def restore_constraints(self, c):#b, p):
self.model.constrained_bounded_indices = b[0] # self.model.constrained_bounded_indices = b[0]
self.model.constrained_bounded_lowers = b[1] # self.model.constrained_bounded_lowers = b[1]
self.model.constrained_bounded_uppers = b[2] # self.model.constrained_bounded_uppers = b[2]
self.model.constrained_positive_indices = p # self.model.constrained_positive_indices = p
self.model.constrained_indices = c
def get_param_shapes(self, N = None, Q = None): def get_param_shapes(self, N = None, Q = None):
model_name = self.model.__class__.__name__ model_name = self.model.__class__.__name__
@ -168,9 +183,15 @@ class opt_SGD(Optimizer):
if self.model.N == 0 or Y.std() == 0.0: if self.model.N == 0 or Y.std() == 0.0:
return 0, step, self.model.N return 0, step, self.model.N
self.model.likelihood._mean = Y.mean() self.model.likelihood._bias = Y.mean()
self.model.likelihood._std = Y.std() self.model.likelihood._scale = Y.std()
self.model.likelihood.set_data(Y) self.model.likelihood.set_data(Y)
# self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision
sigma = self.model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache
self.model.likelihood._set_params(sigma)
j = self.subset_parameter_vector(self.x_opt, samples, shapes) j = self.subset_parameter_vector(self.x_opt, samples, shapes)
self.model.X = X[samples] self.model.X = X[samples]
@ -181,27 +202,30 @@ class opt_SGD(Optimizer):
self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T) self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T)
self.model.likelihood.trYYT = np.trace(self.model.likelihood.YYT) self.model.likelihood.trYYT = np.trace(self.model.likelihood.YYT)
b, p = self.shift_constraints(j) ci = self.shift_constraints(j)
f, fp = f_fp(self.x_opt[j]) f, fp = f_fp(self.x_opt[j])
step[j] = self.momentum * step[j] + self.learning_rate[j] * fp step[j] = self.momentum * step[j] + self.learning_rate[j] * fp
self.x_opt[j] -= step[j] self.x_opt[j] -= step[j]
self.restore_constraints(ci)
self.restore_constraints(b, p) self.model.grads[j] = fp
# restore likelihood _mean and _std, otherwise when we call set_data(y) on # restore likelihood _bias and _scale, otherwise when we call set_data(y) on
# the next feature, it will get normalized with the mean and std of this one. # the next feature, it will get normalized with the mean and std of this one.
self.model.likelihood._mean = 0 self.model.likelihood._bias = 0
self.model.likelihood._std = 1 self.model.likelihood._scale = 1
return f, step, self.model.N return f, step, self.model.N
def opt(self, f_fp=None, f=None, fp=None): def opt(self, f_fp=None, f=None, fp=None):
self.x_opt = self.model._get_params_transformed() self.x_opt = self.model._get_params_transformed()
self.model.grads = np.zeros_like(self.x_opt)
X, Y = self.model.X.copy(), self.model.likelihood.Y.copy() X, Y = self.model.X.copy(), self.model.likelihood.Y.copy()
self.model.likelihood.YYT = None self.model.likelihood.YYT = None
self.model.likelihood.trYYT = None self.model.likelihood.trYYT = None
self.model.likelihood._mean = 0.0 self.model.likelihood._bias = 0.0
self.model.likelihood._std = 1.0 self.model.likelihood._scale = 1.0
N, Q = self.model.X.shape N, Q = self.model.X.shape
D = self.model.likelihood.Y.shape[1] D = self.model.likelihood.Y.shape[1]
@ -225,6 +249,11 @@ class opt_SGD(Optimizer):
self.model.D = len(j) self.model.D = len(j)
self.model.likelihood.D = len(j) self.model.likelihood.D = len(j)
self.model.likelihood.set_data(Y[:, j]) self.model.likelihood.set_data(Y[:, j])
# self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision
sigma = self.model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache
self.model.likelihood._set_params(sigma)
if missing_data: if missing_data:
shapes = self.get_param_shapes(N, Q) shapes = self.get_param_shapes(N, Q)
@ -250,7 +279,6 @@ class opt_SGD(Optimizer):
# plt.clf() # plt.clf()
# plt.plot(self.param_traces['noise']) # plt.plot(self.param_traces['noise'])
# import pdb; pdb.set_trace()
# for k in self.param_traces.keys(): # for k in self.param_traces.keys():
# self.param_traces[k].append(self.model.get(k)[0]) # self.param_traces[k].append(self.model.get(k)[0])
@ -262,6 +290,9 @@ class opt_SGD(Optimizer):
self.model.likelihood.N = N self.model.likelihood.N = N
self.model.likelihood.D = D self.model.likelihood.D = D
self.model.likelihood.Y = Y self.model.likelihood.Y = Y
sigma = self.model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache
self.model.likelihood._set_params(sigma)
self.trace.append(self.f_opt) self.trace.append(self.f_opt)
if self.iteration_file is not None: if self.iteration_file is not None:

View file

@ -45,7 +45,7 @@ class GPLVM(GP):
return np.hstack((self.X.flatten(), GP._get_params(self))) return np.hstack((self.X.flatten(), GP._get_params(self)))
def _set_params(self,x): def _set_params(self,x):
self.X = x[:self.X.size].reshape(self.N,self.Q).copy() self.X = x[:self.N*self.Q].reshape(self.N,self.Q).copy()
GP._set_params(self, x[self.X.size:]) GP._set_params(self, x[self.X.size:])
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):

View file

@ -3,17 +3,12 @@
import numpy as np import numpy as np
import pylab as pb import pylab as pb
from ..util.linalg import mdot, jitchol, tdot, symmetrify from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides
from ..util.plot import gpplot from ..util.plot import gpplot
from .. import kern from .. import kern
from GP import GP from GP import GP
from scipy import linalg from scipy import linalg
def backsub_both_sides(L, X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1)
return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T
class sparse_GP(GP): class sparse_GP(GP):
""" """
Variational sparse GP model Variational sparse GP model

View file

@ -23,6 +23,7 @@ class warpedGP(GP):
self.warping_function = TanhWarpingFunction_d(warping_terms) self.warping_function = TanhWarpingFunction_d(warping_terms)
self.warping_params = (np.random.randn(self.warping_function.n_terms*3+1,) * 1) self.warping_params = (np.random.randn(self.warping_function.n_terms*3+1,) * 1)
Y = self._scale_data(Y)
self.has_uncertain_inputs = False self.has_uncertain_inputs = False
self.Y_untransformed = Y.copy() self.Y_untransformed = Y.copy()
self.predict_in_warped_space = False self.predict_in_warped_space = False
@ -30,6 +31,14 @@ class warpedGP(GP):
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X) GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
def _scale_data(self, Y):
self._Ymax = Y.max()
self._Ymin = Y.min()
return (Y-self._Ymin)/(self._Ymax-self._Ymin) - 0.5
def _unscale_data(self, Y):
return (Y + 0.5)*(self._Ymax - self._Ymin) + self._Ymin
def _set_params(self, x): def _set_params(self, x):
self.warping_params = x[:self.warping_function.num_parameters] self.warping_params = x[:self.warping_function.num_parameters]
Y = self.transform_data() Y = self.transform_data()
@ -79,5 +88,5 @@ class warpedGP(GP):
if self.predict_in_warped_space: if self.predict_in_warped_space:
mu = self.warping_function.f_inv(mu, self.warping_params) mu = self.warping_function.f_inv(mu, self.warping_params)
var = self.warping_function.f_inv(var, self.warping_params) var = self.warping_function.f_inv(var, self.warping_params)
mu = self._unscale_data(mu)
return mu, var return mu, var

View file

@ -257,16 +257,20 @@ def symmetrify(A,upper=False):
N,M = A.shape N,M = A.shape
assert N==M assert N==M
c_contig_code = """ c_contig_code = """
int iN;
for (int i=1; i<N; i++){ for (int i=1; i<N; i++){
iN = i*N;
for (int j=0; j<i; j++){ for (int j=0; j<i; j++){
A[i+j*N] = A[i*N+j]; A[i+j*N] = A[iN+j];
} }
} }
""" """
f_contig_code = """ f_contig_code = """
int iN;
for (int i=1; i<N; i++){ for (int i=1; i<N; i++){
iN = i*N;
for (int j=0; j<i; j++){ for (int j=0; j<i; j++){
A[i*N+j] = A[i+j*N]; A[iN+j] = A[i+j*N];
} }
} }
""" """
@ -317,3 +321,8 @@ def cholupdate(L,x):
x = x.copy() x = x.copy()
N = x.size N = x.size
weave.inline(code, support_code=support_code, arg_names=['N','L','x'], type_converters=weave.converters.blitz) weave.inline(code, support_code=support_code, arg_names=['N','L','x'], type_converters=weave.converters.blitz)
def backsub_both_sides(L, X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1)
return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T

View file

@ -185,7 +185,7 @@ class TanhWarpingFunction_d(WarpingFunction):
return z return z
def f_inv(self, y, psi, iterations = 30): def f_inv(self, z, psi, max_iterations = 1000):
""" """
calculate the numerical inverse of f calculate the numerical inverse of f
@ -194,13 +194,19 @@ class TanhWarpingFunction_d(WarpingFunction):
""" """
y = y.copy() z = z.copy()
z = np.ones_like(y) y = np.ones_like(z)
it = 0
update = np.inf
for i in range(iterations): while it == 0 or (np.abs(update).sum() > 1e-10 and it < max_iterations):
z -= (self.f(z, psi) - y)/self.fgrad_y(z,psi) update = (self.f(y, psi) - z)/self.fgrad_y(y, psi)
y -= update
it += 1
if it == max_iterations:
print "WARNING!!! Maximum number of iterations reached in f_inv "
return z return y
def fgrad_y(self, y, psi, return_precalc = False): def fgrad_y(self, y, psi, return_precalc = False):