Polynomial kernel gradients wrt X working.

This commit is contained in:
Neil Lawrence 2013-08-28 00:20:12 +02:00
parent a61feb17a5
commit b127c96bf2
8 changed files with 79 additions and 26 deletions

View file

@ -114,7 +114,7 @@ def coregionalisation_sparse(max_iters=100):
return m
def epomeo_gpx(max_iters=100):
"""Perform Gaussian process regression on the GPX data from the Mount Epomeo runs. Requires gpxpy to be installed on your system."""
"""Perform Gaussian process regression on the latitude and longitude data from the Mount Epomeo runs. Requires gpxpy to be installed on your system to load in the data."""
data = GPy.util.datasets.epomeo_gpx()
num_data_list = []
for Xpart in data['X']:
@ -122,16 +122,16 @@ def epomeo_gpx(max_iters=100):
num_data_array = np.array(num_data_list)
num_data = num_data_array.sum()
Y = np.zeros((num_data, 3))
Y = np.zeros((num_data, 2))
t = np.zeros((num_data, 2))
start = 0
for Xpart, index in zip(data['X'], range(len(data['X']))):
end = start+Xpart.shape[0]
t[start:end, :] = np.hstack((Xpart[:, 0:1],
index*np.ones((Xpart.shape[0], 1))))
Y[start:end, :] = Xpart[:, 1:4]
Y[start:end, :] = Xpart[:, 1:3]
num_inducing = 40
num_inducing = 200
Z = np.hstack((np.linspace(t[:,0].min(), t[:, 0].max(), num_inducing)[:, None],
np.random.randint(0, 4, num_inducing)[:, None]))
@ -139,12 +139,12 @@ def epomeo_gpx(max_iters=100):
k2 = GPy.kern.coregionalise(output_dim=5, rank=5)
k = k1**k2
m = GPy.models.SparseGPRegression(t, Y, kernel=k, Z=Z)
m = GPy.models.SparseGPRegression(t, Y, kernel=k, Z=Z, normalize_Y=True)
m.constrain_fixed('.*rbf_var', 1.)
m.constrain_fixed('iip')
m.constrain_bounded('noise_variance', 1e-3, 1e-1)
# m.optimize_restarts(5, robust=True, messages=1, max_iters=max_iters, optimizer='bfgs')
# m.optimize(max_iters=max_iters)
m.optimize(max_iters=max_iters,messages=True)
return m

View file

@ -61,6 +61,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
function_eval = 1
fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient.
if any(np.isnan(gradnew)):
raise UnexpectedInfOrNan
current_grad = np.dot(gradnew, gradnew)
gradold = gradnew.copy()
d = -gradnew # Initial search direction.

View file

@ -619,3 +619,25 @@ class Kern_check_dK_dX(Kern_check_model):
def _set_params(self, x):
self.X=x.reshape(self.X.shape)
class Kern_check_dKdiag_dX(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0]))
def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten()
def _get_param_names(self):
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
def _get_params(self):
return self.X.flatten()
def _set_params(self, x):
self.X=x.reshape(self.X.shape)

View file

@ -107,12 +107,22 @@ class MLP(Kernpart):
def dK_dX(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X"""
self._K_computations(X, X2)
arg = self._K_asin_arg
post_div = np.sqrt(1-arg*arg)
numer = self._K_numer
denom = self._K_denom
vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1.
denom3 = denom*denom*denom
target += (((X2[None,:, :]/denom[None, :, None]+vec2[None, None, :]*X[:, :, None]*numer/denom)/post_div[:, :, None]) * dL_dK[:, :, None]).sum(1)
target *= four_over_tau*self.weight_variance*self.variance
raise NotImplementedError
# self._K_computations(X, X2)
# gX = np.zeros((X2.shape[0], X.shape[1], X.shape[0]))
gX = np.zeros((X2.shape[0], X.shape[1], X.shape[0]))
# for i in range(X.shape[0]):
# gX[:, :, i] = self._dK_dX_point(dL_dK, X, X2, target, i)
for i in range(X.shape[0]):
gX[:, :, i] = self._dK_dX_point(dL_dK, X, X2, target, i)
def _dK_dX_point(self, dL_dK, X, X2, target, i):
@ -130,9 +140,8 @@ class MLP(Kernpart):
denom3 = denom*denom*denom
gX = np.zeros((X2.shape[0], X2.shape[1]))
for j in range(X2.shape[1]):
gX[:, j] =X2[:, j]/denom - vec2*X[i, j]*numer/denom3
gX[:, j] = X2[:, j]/denom - vec2*X[i, j]*numer/denom3
gX[:, j] = four_over_tau*self.weight_variance*self.variance*gX[:, j]/np.sqrt(1-arg*arg)
target[i, :]
def _K_computations(self, X, X2):

View file

@ -101,11 +101,15 @@ class POLY(Kernpart):
def dK_dX(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X"""
pass
self._K_computations(X, X2)
arg = self._K_poly_arg
target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]* dL_dK[:, :, None]).sum(1)
def _dK_dX_point(self, dL_dK, X, X2, target, i):
"""Gradient with respect to one point of X"""
pass
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X"""
self._K_diag_computations(X)
arg = self._K_diag_poly_arg
target += 2.*self.weight_variance*self.degree*self.variance*X*dL_dKdiag[:, None]*(arg**(self.degree-1))[:, None]
def _K_computations(self, X, X2):

View file

@ -62,7 +62,7 @@ class Prod(Kernpart):
return self._K2
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
"""Derivative of the covariance matrix with respect to the parameters."""
self._K_computations(X,X2)
if X2 is None:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params])

View file

@ -1,15 +1,15 @@
Prod.py kernel should take a list of kernels rather than two arguments for kernels.
transformations.py should have limits on what is fed into exp() particularly for the negative log logistic.
Prod.py kernel could also take a list of kernels rather than two arguments for kernels.
transformations.py should have limits on what is fed into exp() particularly for the negative log logistic (done -neil).
Load in a model with mlp kernel, plot it, change a parameter, plot it again. It doesn't update the plot.
Tests for kernels which work directly on the kernel implementation (not through GP).
Should stationary covariances have their own kernpart type?
Should stationary covariances have their own kernpart type, I think so, also inner product kernels. That way the caching so carefully constructed for RBF or linear could be shared.
Where do we declare default kernel parameters. In constructors.py or in the definition file for the kernel?
When printing to stdout, can we check that our approach is also working nicely for the ipython notebook? I like the way our optimization ticks over, but at the moment this doesn't seem to work in the ipython notebook, it would be nice if it did.
When printing to stdout, can we check that our approach is also working nicely for the ipython notebook? I like the way our optimization ticks over, but at the moment this doesn't seem to work in the ipython notebook, it would be nice if it did. My problems may be due to using ipython 0.12, I've had a poke around at fixing this and I can't do it for 0.12.
When we print a model should we also include information such as number of inputs and number of outputs?
@ -37,3 +37,20 @@ When using bfgs in ipython notebook, text appears in the original console, not i
In sparse GPs wouldn't it be clearer to call Z inducing?
In coregionalisation matrix, setting the W to all ones will (surely?) ensure that symmetry isn't broken. Also, but allowing it to scale like that, the output variance increases as rank is increased (and if user sets rank to more than output dim they could get very different results).
We are inconsistent about our use of ise and ize e.g. optimize and normalize_X, but coregionalise, we should choose one and stick to it. Suggest -ize.
Exceptions: we need to provide a list of exceptions we throw and specify what is thrown where.
Why is it get_params() but it's getstate()? Should be get_state(). Why is it get_gradient instead of get_gradients? Need to be consistent!! Doesn't matter which way we choose as long as it's consistent.
In likelihood Nparams should be num_params
In likelihood N should be num_data
The Gaussian target in likelihood should be F What is V doing here?
Need to check for nan values in likelihoods. These should be treated as missing values. If the likelihood can't handle the missing value an error should be throw.
Sometimes you want to print kernpart objects, for diagnosis etc. This isn't possible currently.

View file

@ -16,10 +16,9 @@ import datetime
import sys, urllib
def reporthook(a,b,c):
# ',' at the end of the line is important!
print "% 3.1f%% of %d bytes\r" % (min(100, float(a * b) / c * 100), c),
#print "% 3.1f%% of %d bytes\r" % (min(100, float(a * b) / c * 100), c),
#you can also use sys.stdout.write
#sys.stdout.write("\r% 3.1f%% of %d bytes"
# % (min(100, float(a * b) / c * 100), c)
sys.stdout.write("\r% 3.1f%% of %d bytes" % (min(100, float(a * b) / c * 100), c))
sys.stdout.flush()
# Global variables
@ -288,7 +287,7 @@ except ImportError:
gpxpy_available = False
if gpxpy_available:
def epomeo_gpx(data_set='epomeo_gpx'):
def epomeo_gpx(data_set='epomeo_gpx', sample_every=4):
if not data_available(data_set):
download_data(data_set)
files = ['endomondo_1', 'endomondo_2', 'garmin_watch_via_endomondo','viewranger_phone', 'viewranger_tablet']
@ -301,7 +300,7 @@ if gpxpy_available:
segment = gpx.tracks[0].segments[0]
points = [point for track in gpx.tracks for segment in track.segments for point in segment.points]
data = [[(point.time-datetime.datetime(2013,8,21)).total_seconds(), point.latitude, point.longitude, point.elevation] for point in points]
X.append(np.asarray(data))
X.append(np.asarray(data)[::sample_every, :])
gpx_file.close()
return data_details_return({'X' : X, 'info' : 'Data is an array containing time in seconds, latitude, longitude and elevation in that order.'}, data_set)