Merge branch 'master' into devel

This commit is contained in:
Nicolo Fusi 2013-04-10 16:55:27 +01:00
commit f6879ef768
12 changed files with 307 additions and 90 deletions

View file

@ -188,19 +188,23 @@ class model(parameterised):
""" """
initial_parameters = self._get_params_transformed() initial_parameters = self._get_params_transformed()
if parallel: if parallel:
jobs = [] try:
pool = mp.Pool(processes=num_processes) jobs = []
for i in range(Nrestarts): pool = mp.Pool(processes=num_processes)
self.randomize() for i in range(Nrestarts):
job = pool.apply_async(opt_wrapper, args = (self,), kwds = kwargs) self.randomize()
jobs.append(job) job = pool.apply_async(opt_wrapper, args = (self,), kwds = kwargs)
jobs.append(job)
pool.close() # signal that no more data coming in pool.close() # signal that no more data coming in
pool.join() # wait for all the tasks to complete pool.join() # wait for all the tasks to complete
except KeyboardInterrupt:
print "Ctrl+c received, terminating and joining pool."
pool.terminate()
pool.join()
for i in range(Nrestarts): for i in range(Nrestarts):
try: try:
@ -257,7 +261,7 @@ class model(parameterised):
self._set_params_transformed(x) self._set_params_transformed(x)
LL_gradients = self._transform_gradients(self._log_likelihood_gradients()) LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
prior_gradients = self._transform_gradients(self._log_prior_gradients()) prior_gradients = self._transform_gradients(self._log_prior_gradients())
return -LL_gradients - prior_gradients return - LL_gradients - prior_gradients
def objective_and_gradients(self, x): def objective_and_gradients(self, x):
obj_f = self.objective_function(x) obj_f = self.objective_function(x)

View file

@ -133,3 +133,32 @@ def stick():
plt.close('all') plt.close('all')
return m return m
def BGPLVM_oil():
data = GPy.util.datasets.oil()
Y, X = data['Y'], data['X']
X -= X.mean(axis=0)
X /= X.std(axis=0)
Q = 10
M = 30
kernel = GPy.kern.rbf(Q, ARD = True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
m = GPy.models.Bayesian_GPLVM(X, Q, kernel=kernel, M=M)
# m.scale_factor = 100.0
m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)')
from sklearn import cluster
km = cluster.KMeans(M, verbose=10)
Z = km.fit(m.X).cluster_centers_
# Z = GPy.util.misc.kmm_init(m.X, M)
m.set('iip', Z)
m.set('bias', 1e-4)
# optimize
# m.ensure_default_constraints()
import pdb; pdb.set_trace()
m.optimize('tnc', messages=1)
print m
m.plot_latent(labels=data['Y'].argmax(axis=1))
return m

View file

@ -3,8 +3,8 @@ import scipy as sp
import scipy.sparse import scipy.sparse
from optimization import Optimizer from optimization import Optimizer
from scipy import linalg, optimize from scipy import linalg, optimize
import copy import pylab as plt
import sys import copy, sys, pickle
class opt_SGD(Optimizer): class opt_SGD(Optimizer):
""" """
@ -18,7 +18,7 @@ class opt_SGD(Optimizer):
""" """
def __init__(self, start, iterations = 10, learning_rate = 1e-4, momentum = 0.9, model = None, messages = False, batch_size = 1, self_paced = False, center = True, **kwargs): def __init__(self, start, iterations = 10, learning_rate = 1e-4, momentum = 0.9, model = None, messages = False, batch_size = 1, self_paced = False, center = True, iteration_file = None, **kwargs):
self.opt_name = "Stochastic Gradient Descent" self.opt_name = "Stochastic Gradient Descent"
self.model = model self.model = model
@ -31,6 +31,17 @@ class opt_SGD(Optimizer):
self.batch_size = batch_size self.batch_size = batch_size
self.self_paced = self_paced self.self_paced = self_paced
self.center = center self.center = center
self.param_traces = [('noise',[])]
self.iteration_file = iteration_file
# if len([p for p in self.model.kern.parts if p.name == 'bias']) == 1:
# self.param_traces.append(('bias',[]))
# if len([p for p in self.model.kern.parts if p.name == 'linear']) == 1:
# self.param_traces.append(('linear',[]))
# if len([p for p in self.model.kern.parts if p.name == 'rbf']) == 1:
# self.param_traces.append(('rbf_var',[]))
self.param_traces = dict(self.param_traces)
self.fopt_trace = []
num_params = len(self.model._get_params()) num_params = len(self.model._get_params())
if isinstance(self.learning_rate, float): if isinstance(self.learning_rate, float):
@ -48,6 +59,18 @@ class opt_SGD(Optimizer):
status += "Time elapsed: \t\t\t %s\n" % self.time status += "Time elapsed: \t\t\t %s\n" % self.time
return status return status
def plot_traces(self):
plt.figure()
plt.subplot(211)
plt.title('Parameters')
for k in self.param_traces.keys():
plt.plot(self.param_traces[k], label=k)
plt.legend(loc=0)
plt.subplot(212)
plt.title('Objective function')
plt.plot(self.fopt_trace)
def non_null_samples(self, data): def non_null_samples(self, data):
return (np.isnan(data).sum(axis=1) == 0) return (np.isnan(data).sum(axis=1) == 0)
@ -128,38 +151,46 @@ class opt_SGD(Optimizer):
def step_with_missing_data(self, f_fp, X, step, shapes, sparse_matrix): def step_with_missing_data(self, f_fp, X, step, shapes, sparse_matrix):
N, Q = X.shape N, Q = X.shape
if not sparse_matrix: if not sparse_matrix:
Y = self.model.likelihood.Y
samples = self.non_null_samples(self.model.likelihood.Y) samples = self.non_null_samples(self.model.likelihood.Y)
self.model.N = samples.sum() self.model.N = samples.sum()
self.model.likelihood.Y = self.model.likelihood.Y[samples] Y = Y[samples]
else: else:
samples = self.model.likelihood.Y.nonzero()[0] samples = self.model.likelihood.Y.nonzero()[0]
self.model.N = len(samples) self.model.N = len(samples)
self.model.likelihood.Y = np.asarray(self.model.likelihood.Y[samples].todense(), dtype = np.float64) Y = np.asarray(self.model.likelihood.Y[samples].todense(), dtype = np.float64)
if self.model.N == 0 or Y.std() == 0.0:
return 0, step, self.model.N
# FIXME: get rid of self.center, everything should be centered by default
self.model.likelihood._mean = Y.mean()
self.model.likelihood._std = Y.std()
self.model.likelihood.set_data(Y)
self.model.likelihood.N = self.model.N
j = self.subset_parameter_vector(self.x_opt, samples, shapes) j = self.subset_parameter_vector(self.x_opt, samples, shapes)
self.model.X = X[samples] self.model.X = X[samples]
if self.model.N == 0 or self.model.likelihood.Y.std() == 0.0: # if self.center:
return 0, step, self.model.N # self.model.likelihood.Y -= self.model.likelihood.Y.mean()
# self.model.likelihood.Y /= self.model.likelihood.Y.std()
if self.center:
self.model.likelihood.Y -= self.model.likelihood.Y.mean()
self.model.likelihood.Y /= self.model.likelihood.Y.std()
model_name = self.model.__class__.__name__ model_name = self.model.__class__.__name__
if model_name == 'Bayesian_GPLVM': if model_name == 'Bayesian_GPLVM':
self.model.likelihood.trYYT = np.sum(np.square(self.model.likelihood.Y)) self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T)
self.model.likelihood.trYYT = np.trace(self.model.likelihood.YYT)
b, p = self.shift_constraints(j) b, p = self.shift_constraints(j)
momentum_term = self.momentum * step[j]
f, fp = f_fp(self.x_opt[j]) f, fp = f_fp(self.x_opt[j])
step[j] = self.learning_rate[j] * fp # momentum_term = self.momentum * step[j]
self.x_opt[j] -= step[j] + momentum_term # step[j] = self.learning_rate[j] * fp
# self.x_opt[j] -= step[j] + momentum_term
step[j] = self.momentum * step[j] + self.learning_rate[j] * fp
self.x_opt[j] -= step[j]
self.restore_constraints(b, p) self.restore_constraints(b, p)
@ -177,10 +208,14 @@ class opt_SGD(Optimizer):
missing_data = self.check_for_missing(self.model.likelihood.Y) missing_data = self.check_for_missing(self.model.likelihood.Y)
self.model.likelihood.YYT = None self.model.likelihood.YYT = None
self.model.likelihood.trYYT = None
self.model.likelihood._mean = 0.0
self.model.likelihood._std = 1.0
num_params = self.model._get_params() num_params = self.model._get_params()
step = np.zeros_like(num_params)
step = np.zeros_like(num_params)
for it in range(self.iterations): for it in range(self.iterations):
if it == 0 or self.self_paced is False: if it == 0 or self.self_paced is False:
features = np.random.permutation(Y.shape[1]) features = np.random.permutation(Y.shape[1])
else: else:
@ -195,39 +230,59 @@ class opt_SGD(Optimizer):
for j in features: for j in features:
count += 1 count += 1
self.model.D = len(j) self.model.D = len(j)
self.model.likelihood.Y = Y[:, j] self.model.likelihood.D = len(j)
self.model.likelihood.set_data(Y[:, j])
if missing_data or sparse_matrix: if missing_data or sparse_matrix:
shapes = self.get_param_shapes(N, Q) shapes = self.get_param_shapes(N, Q)
f, step, Nj = self.step_with_missing_data(f_fp, X, step, shapes, sparse_matrix) f, step, Nj = self.step_with_missing_data(f_fp, X, step, shapes, sparse_matrix)
else: else:
Nj = N Nj = N
momentum_term = self.momentum * step # compute momentum using update(t-1)
f, fp = f_fp(self.x_opt) f, fp = f_fp(self.x_opt)
step = self.learning_rate * fp # compute update(t) # momentum_term = self.momentum * step # compute momentum using update(t-1)
self.x_opt -= step + momentum_term # step = self.learning_rate * fp # compute update(t)
# self.x_opt -= step + momentum_term
step = self.momentum * step + self.learning_rate * fp
self.x_opt -= step
if self.messages == 2: if self.messages == 2:
noise = np.exp(self.x_opt)[-1] noise = self.model.likelihood._variance
status = "evaluating {feature: 5d}/{tot: 5d} \t f: {f: 2.3f} \t non-missing: {nm: 4d}\t noise: {noise: 2.4f}\r".format(feature = count, tot = len(features), f = f, nm = Nj, noise = noise) status = "evaluating {feature: 5d}/{tot: 5d} \t f: {f: 2.3f} \t non-missing: {nm: 4d}\t noise: {noise: 2.4f}\r".format(feature = count, tot = len(features), f = f, nm = Nj, noise = noise)
sys.stdout.write(status) sys.stdout.write(status)
sys.stdout.flush() sys.stdout.flush()
last_printed_count = count last_printed_count = count
self.param_traces['noise'].append(noise)
NLL.append(f) NLL.append(f)
self.fopt_trace.append(f)
# for k in self.param_traces.keys():
# self.param_traces[k].append(self.model.get(k)[0])
# should really be a sum(), but earlier samples in the iteration will have a very crappy ll # should really be a sum(), but earlier samples in the iteration will have a very crappy ll
self.f_opt = np.mean(NLL) self.f_opt = np.mean(NLL)
self.model.N = N self.model.N = N
self.model.X = X self.model.X = X
self.model.D = D self.model.D = D
self.model.likelihood.N = N self.model.likelihood.N = N
self.model.likelihood.D = D
self.model.likelihood.Y = Y self.model.likelihood.Y = Y
# self.model.Youter = np.dot(Y, Y.T) # self.model.Youter = np.dot(Y, Y.T)
self.trace.append(self.f_opt) self.trace.append(self.f_opt)
if self.iteration_file is not None:
f = open(self.iteration_file + "iteration%d.pickle" % it, 'w')
data = [self.x_opt, self.fopt_trace, self.param_traces]
pickle.dump(data, f)
f.close()
if self.messages != 0: if self.messages != 0:
sys.stdout.write('\r' + ' '*len(status)*2 + ' \r') sys.stdout.write('\r' + ' '*len(status)*2 + ' \r')
status = "SGD Iteration: {0: 3d}/{1: 3d} f: {2: 2.3f}\n".format(it+1, self.iterations, self.f_opt) status = "SGD Iteration: {0: 3d}/{1: 3d} f: {2: 2.3f}\n".format(it+1, self.iterations, self.f_opt)
sys.stdout.write(status) sys.stdout.write(status)
sys.stdout.flush() sys.stdout.flush()

View file

@ -51,6 +51,27 @@ class kern(parameterised):
parameterised.__init__(self) parameterised.__init__(self)
def plot_ARD(self):
"""
If an ARD kernel is present, it bar-plots the ARD parameters
"""
for p in self.parts:
if hasattr(p, 'ARD') and p.ARD:
pb.figure()
pb.title('ARD parameters, %s kernel' % p.name)
if p.name == 'linear':
ard_params = p.variances
else:
ard_params = 1./p.lengthscale
pb.bar(np.arange(len(ard_params))-0.4, ard_params)
def _transform_gradients(self,g): def _transform_gradients(self,g):
x = self._get_params() x = self._get_params()
g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices] g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices]

View file

@ -1,6 +1,7 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import kernpart
import numpy as np import numpy as np

View file

@ -53,6 +53,7 @@ class periodic_Matern52(kernpart):
psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2)) psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
return r,omega[:,0:1], psi return r,omega[:,0:1], psi
@silence_errors
def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2): def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) ) Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower) Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower)

View file

@ -53,6 +53,7 @@ class periodic_exponential(kernpart):
psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2)) psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
return r,omega[:,0:1], psi return r,omega[:,0:1], psi
@silence_errors
def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2): def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) ) Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower) Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower)

View file

@ -93,5 +93,5 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'" raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'"
else: else:
input_1, input_2 = which_indices input_1, input_2 = which_indices
GPLVM.plot_latent(self, which_indices=[input_1, input_2],*args, **kwargs) ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2],*args, **kwargs)
pb.plot(self.Z[:, input_1], self.Z[:, input_2], '^w') ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')

View file

@ -89,9 +89,9 @@ class GPLVM(GP):
Xtest_full = np.zeros((Xtest.shape[0], self.X.shape[1])) Xtest_full = np.zeros((Xtest.shape[0], self.X.shape[1]))
Xtest_full[:, :2] = Xtest Xtest_full[:, :2] = Xtest
mu, var, low, up = self.predict(Xtest_full) mu, var, low, up = self.predict(Xtest_full)
var = var.mean(axis=1) # this was var[:, :2] edit by Neil var = var[:, :1] # FIXME: this was a :2
pb.imshow(var.reshape(resolution,resolution).T[::-1,:],extent=[xmin[0],xmax[0],xmin[1],xmax[1]],cmap=pb.cm.binary,interpolation='bilinear') pb.imshow(var.reshape(resolution,resolution).T[::-1,:],
extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear')
for i,ul in enumerate(np.unique(labels)): for i,ul in enumerate(np.unique(labels)):
if type(ul) is np.string_: if type(ul) is np.string_:
@ -118,5 +118,6 @@ class GPLVM(GP):
pb.xlim(xmin[0],xmax[0]) pb.xlim(xmin[0],xmax[0])
pb.ylim(xmin[1],xmax[1]) pb.ylim(xmin[1],xmax[1])
pb.grid(b=False) # remove the grid if present, it doesn't look good
ax.set_aspect('auto') # set a nice aspect ratio
return pb.gca() #input_1, input_2 temporary removal, to return axes. return pb.gca() #input_1, input_2 temporary removal, to return axes.

View file

@ -9,85 +9,74 @@ from ..util.linalg import pdinv
from ..util.plot import gpplot from ..util.plot import gpplot
from ..util.warping_functions import * from ..util.warping_functions import *
from GP_regression import GP_regression from GP_regression import GP_regression
from GP import GP
from .. import likelihoods
from .. import kern
class warpedGP(GP):
def __init__(self, X, Y, kernel=None, warping_function = None, warping_terms = 3, normalize_X=False, normalize_Y=False, Xslices=None):
class warpedGP(GP_regression): if kernel is None:
""" kernel = kern.rbf(X.shape[1])
TODO: fecking docstrings!
@nfusi: I'#ve hacked a little on this, but no guarantees. J.
"""
def __init__(self, X, Y, warping_function = None, warping_terms = 3, **kwargs):
if warping_function == None: if warping_function == None:
self.warping_function = TanhWarpingFunction(warping_terms) self.warping_function = TanhWarpingFunction_d(warping_terms)
# self.warping_params = np.random.randn(self.warping_function.n_terms, 3) self.warping_params = (np.random.randn(self.warping_function.n_terms*3+1,) * 1)
self.warping_params = np.ones((self.warping_function.n_terms, 3))*0.0 # TODO better init
self.warp_params_shape = (self.warping_function.n_terms, 3) # todo get this from the subclass
self.Z = Y.copy() self.has_uncertain_inputs = False
self.N, self.D = Y.shape self.Y_untransformed = Y.copy()
self.transform_data() self.predict_in_warped_space = False
GP_regression.__init__(self, X, self.Y, **kwargs) likelihood = likelihoods.Gaussian(self.transform_data(), normalize=normalize_Y)
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X, Xslices=Xslices)
def _set_params(self, x): def _set_params(self, x):
self.warping_params = x[:self.warping_function.num_parameters].reshape(self.warp_params_shape).copy() self.warping_params = x[:self.warping_function.num_parameters]
self.transform_data() Y = self.transform_data()
GP_regression._set_params(self, x[self.warping_function.num_parameters:].copy()) self.likelihood.set_data(Y)
GP._set_params(self, x[self.warping_function.num_parameters:].copy())
def _get_params(self): def _get_params(self):
return np.hstack((self.warping_params.flatten().copy(), GP_regression._get_params(self).copy())) return np.hstack((self.warping_params.flatten().copy(), GP._get_params(self).copy()))
def _get_param_names(self): def _get_param_names(self):
warping_names = self.warping_function._get_param_names() warping_names = self.warping_function._get_param_names()
param_names = GP_regression._get_param_names(self) param_names = GP._get_param_names(self)
return warping_names + param_names return warping_names + param_names
def transform_data(self): def transform_data(self):
self.Y = self.warping_function.f(self.Z.copy(), self.warping_params).copy() Y = self.warping_function.f(self.Y_untransformed.copy(), self.warping_params).copy()
return Y
# this supports the 'smart' behaviour in GP_regression
if self.D > self.N:
self.YYT = np.dot(self.Y, self.Y.T)
else:
self.YYT = None
return self.Y
def log_likelihood(self): def log_likelihood(self):
ll = GP_regression.log_likelihood(self) ll = GP.log_likelihood(self)
jacobian = self.warping_function.fgrad_y(self.Z, self.warping_params) jacobian = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
return ll + np.log(jacobian).sum() return ll + np.log(jacobian).sum()
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
ll_grads = GP_regression._log_likelihood_gradients(self) ll_grads = GP._log_likelihood_gradients(self)
alpha = np.dot(self.Ki, self.Y.flatten()) alpha = np.dot(self.Ki, self.likelihood.Y.flatten())
warping_grads = self.warping_function_gradients(alpha) warping_grads = self.warping_function_gradients(alpha)
warping_grads = np.append(warping_grads[:,:-1].flatten(), warping_grads[0,-1])
return np.hstack((warping_grads.flatten(), ll_grads.flatten())) return np.hstack((warping_grads.flatten(), ll_grads.flatten()))
def warping_function_gradients(self, Kiy): def warping_function_gradients(self, Kiy):
grad_y = self.warping_function.fgrad_y(self.Z, self.warping_params) grad_y = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Z, self.warping_params, grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed, self.warping_params,
return_covar_chain = True) return_covar_chain = True)
djac_dpsi = ((1.0/grad_y[:,:, None, None])*grad_y_psi).sum(axis=0).sum(axis=0) djac_dpsi = ((1.0/grad_y[:,:, None, None])*grad_y_psi).sum(axis=0).sum(axis=0)
dquad_dpsi = (Kiy[:,None,None,None] * grad_psi).sum(axis=0).sum(axis=0) dquad_dpsi = (Kiy[:,None,None,None] * grad_psi).sum(axis=0).sum(axis=0)
return -dquad_dpsi + djac_dpsi return -dquad_dpsi + djac_dpsi
def plot_warping(self): def plot_warping(self):
self.warping_function.plot(self.warping_params, self.Z.min(), self.Z.max()) self.warping_function.plot(self.warping_params, self.Y_untransformed.min(), self.Y_untransformed.max())
def predict(self, X, in_unwarped_space = False, **kwargs): def _raw_predict(self, *args, **kwargs):
mu, var = GP_regression.predict(self, X, **kwargs) mu, var = GP._raw_predict(self, *args, **kwargs)
# The plot() function calls _set_params() before calling predict() if self.predict_in_warped_space:
# this is causing the observations to be plotted in the transformed
# space (where Y lives), making the plot looks very wrong
# if the predictions are made in the untransformed space
# (where Z lives). To fix this I included the option below. It's
# just a quick fix until I figure out something smarter.
if in_unwarped_space:
mu = self.warping_function.f_inv(mu, self.warping_params) mu = self.warping_function.f_inv(mu, self.warping_params)
var = self.warping_function.f_inv(var, self.warping_params) var = self.warping_function.f_inv(var, self.warping_params)

View file

@ -155,3 +155,118 @@ class TanhWarpingFunction(WarpingFunction):
variables = ['a', 'b', 'c'] variables = ['a', 'b', 'c']
names = sum([['warp_tanh_%s_t%i' % (variables[n],q) for n in range(3)] for q in range(self.n_terms)],[]) names = sum([['warp_tanh_%s_t%i' % (variables[n],q) for n in range(3)] for q in range(self.n_terms)],[])
return names return names
class TanhWarpingFunction_d(WarpingFunction):
def __init__(self,n_terms=3):
"""n_terms specifies the number of tanh terms to be used"""
self.n_terms = n_terms
self.num_parameters = 3 * self.n_terms + 1
def f(self,y,psi):
"""transform y with f using parameter vector psi
psi = [[a,b,c]]
f = \sum_{terms} a * tanh(b*(y+c))
"""
#1. check that number of params is consistent
# assert psi.shape[0] == self.n_terms, 'inconsistent parameter dimensions'
# assert psi.shape[1] == 4, 'inconsistent parameter dimensions'
mpsi = psi.copy()
d = psi[-1]
mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
#3. transform data
z = d*y.copy()
for i in range(len(mpsi)):
a,b,c = mpsi[i]
z += a*np.tanh(b*(y+c))
return z
def f_inv(self, y, psi, iterations = 30):
"""
calculate the numerical inverse of f
== input ==
iterations: number of N.R. iterations
"""
y = y.copy()
z = np.ones_like(y)
for i in range(iterations):
z -= (self.f(z, psi) - y)/self.fgrad_y(z,psi)
return z
def fgrad_y(self, y, psi, return_precalc = False):
"""
gradient of f w.r.t to y ([N x 1])
returns: Nx1 vector of derivatives, unless return_precalc is true,
then it also returns the precomputed stuff
"""
mpsi = psi.copy()
d = psi[-1]
mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
# vectorized version
S = (mpsi[:,1]*(y[:,:,None] + mpsi[:,2])).T
R = np.tanh(S)
D = 1-R**2
GRAD = (d + (mpsi[:,0:1][:,:,None]*mpsi[:,1:2][:,:,None]*D).sum(axis=0)).T
if return_precalc:
return GRAD, S, R, D
return GRAD
def fgrad_y_psi(self, y, psi, return_covar_chain = False):
"""
gradient of f w.r.t to y and psi
returns: NxIx4 tensor of partial derivatives
"""
mpsi = psi.copy()
mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
w, s, r, d = self.fgrad_y(y, psi, return_precalc = True)
gradients = np.zeros((y.shape[0], y.shape[1], len(mpsi), 4))
for i in range(len(mpsi)):
a,b,c = mpsi[i]
gradients[:,:,i,0] = (b*(1.0/np.cosh(s[i]))**2).T
gradients[:,:,i,1] = a*(d[i] - 2.0*s[i]*r[i]*(1.0/np.cosh(s[i]))**2).T
gradients[:,:,i,2] = (-2.0*a*(b**2)*r[i]*((1.0/np.cosh(s[i]))**2)).T
gradients[:,:,0,3] = 1.0
if return_covar_chain:
covar_grad_chain = np.zeros((y.shape[0], y.shape[1], len(mpsi), 4))
for i in range(len(mpsi)):
a,b,c = mpsi[i]
covar_grad_chain[:, :, i, 0] = (r[i]).T
covar_grad_chain[:, :, i, 1] = (a*(y + c) * ((1.0/np.cosh(s[i]))**2).T)
covar_grad_chain[:, :, i, 2] = a*b*((1.0/np.cosh(s[i]))**2).T
covar_grad_chain[:, :, 0, 3] = y
return gradients, covar_grad_chain
return gradients
def _get_param_names(self):
variables = ['a', 'b', 'c', 'd']
names = sum([['warp_tanh_%s_t%i' % (variables[n],q) for n in range(3)] for q in range(self.n_terms)],[])
names.append('warp_tanh_d')
return names

View file

@ -5,7 +5,7 @@ import os
from setuptools import setup from setuptools import setup
# Version number # Version number
version = '0.2' version = '0.3.2'
def read(fname): def read(fname):
return open(os.path.join(os.path.dirname(__file__), fname)).read() return open(os.path.join(os.path.dirname(__file__), fname)).read()
@ -18,7 +18,7 @@ setup(name = 'GPy',
license = "BSD 3-clause", license = "BSD 3-clause",
keywords = "machine-learning gaussian-processes kernels", keywords = "machine-learning gaussian-processes kernels",
url = "http://sheffieldml.github.com/GPy/", url = "http://sheffieldml.github.com/GPy/",
packages = ['GPy', 'GPy.core', 'GPy.kern', 'GPy.util', 'GPy.models', 'GPy.inference', 'GPy.examples', 'GPy.likelihoods'], packages = ['GPy', 'GPy.core', 'GPy.kern', 'GPy.util', 'GPy.models', 'GPy.inference', 'GPy.examples', 'GPy.likelihoods', 'GPy.testing'],
package_dir={'GPy': 'GPy'}, package_dir={'GPy': 'GPy'},
package_data = {'GPy': ['GPy/examples']}, package_data = {'GPy': ['GPy/examples']},
py_modules = ['GPy.__init__'], py_modules = ['GPy.__init__'],