Polynomial kernel gradients wrt X working.

2026-07-26 17:11:06 +02:00 · 2013-08-28 00:20:12 +02:00 · 2013-08-28 00:20:12 +02:00 · b127c96bf2
commit b127c96bf2
parent a61feb17a5
8 changed files with 79 additions and 26 deletions
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@ -114,7 +114,7 @@ def coregionalisation_sparse(max_iters=100):
    return m

 def epomeo_gpx(max_iters=100):
-    """Perform Gaussian process regression on the GPX data from the Mount Epomeo runs. Requires gpxpy to be installed on your system."""
+    """Perform Gaussian process regression on the latitude and longitude data from the Mount Epomeo runs. Requires gpxpy to be installed on your system to load in the data."""
    data = GPy.util.datasets.epomeo_gpx()
    num_data_list = []
    for Xpart in data['X']:
@ -122,16 +122,16 @@ def epomeo_gpx(max_iters=100):

    num_data_array = np.array(num_data_list)
    num_data = num_data_array.sum()
-    Y = np.zeros((num_data, 3))
+    Y = np.zeros((num_data, 2))
    t = np.zeros((num_data, 2))
    start = 0
    for Xpart, index in zip(data['X'], range(len(data['X']))):
        end = start+Xpart.shape[0]
        t[start:end, :] = np.hstack((Xpart[:, 0:1],
                                    index*np.ones((Xpart.shape[0], 1))))
-        Y[start:end, :] = Xpart[:, 1:4]
+        Y[start:end, :] = Xpart[:, 1:3]

-    num_inducing = 40
+    num_inducing = 200
    Z = np.hstack((np.linspace(t[:,0].min(), t[:, 0].max(), num_inducing)[:, None],
                   np.random.randint(0, 4, num_inducing)[:, None]))

@ -139,12 +139,12 @@ def epomeo_gpx(max_iters=100):
    k2 = GPy.kern.coregionalise(output_dim=5, rank=5)
    k = k1**k2 

-    m = GPy.models.SparseGPRegression(t, Y, kernel=k, Z=Z)
+    m = GPy.models.SparseGPRegression(t, Y, kernel=k, Z=Z, normalize_Y=True)
    m.constrain_fixed('.*rbf_var', 1.)
    m.constrain_fixed('iip')
    m.constrain_bounded('noise_variance', 1e-3, 1e-1)
 #     m.optimize_restarts(5, robust=True, messages=1, max_iters=max_iters, optimizer='bfgs')
-#    m.optimize(max_iters=max_iters)
+    m.optimize(max_iters=max_iters,messages=True)

    return m

--- a/GPy/inference/scg.py
+++ b/GPy/inference/scg.py
@ -61,6 +61,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
    function_eval = 1
    fnow = fold
    gradnew = gradf(x, *optargs) # Initial gradient.
+    if any(np.isnan(gradnew)):
+        raise UnexpectedInfOrNan
    current_grad = np.dot(gradnew, gradnew)
    gradold = gradnew.copy()
    d = -gradnew # Initial search direction.
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@ -619,3 +619,25 @@ class Kern_check_dK_dX(Kern_check_model):

    def _set_params(self, x):
        self.X=x.reshape(self.X.shape)
+
+class Kern_check_dKdiag_dX(Kern_check_model):
+    """This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
+    def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
+        Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
+        if dL_dK==None:
+            self.dL_dK = np.ones((self.X.shape[0]))
+
+    def log_likelihood(self):
+        return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
+
+    def _log_likelihood_gradients(self):
+        return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten()
+
+    def _get_param_names(self):
+        return ['X_'  +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
+                
+    def _get_params(self):
+        return self.X.flatten()
+
+    def _set_params(self, x):
+        self.X=x.reshape(self.X.shape)
--- a/GPy/kern/parts/mlp.py
+++ b/GPy/kern/parts/mlp.py
@ -107,12 +107,22 @@ class MLP(Kernpart):

    def dK_dX(self, dL_dK, X, X2, target):
        """Derivative of the covariance matrix with respect to X"""
+        self._K_computations(X, X2)
+        arg = self._K_asin_arg
+        post_div = np.sqrt(1-arg*arg)
+        numer = self._K_numer
+        denom = self._K_denom
+        vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1.
+        denom3 = denom*denom*denom
+        target += (((X2[None,:, :]/denom[None, :, None]+vec2[None, None, :]*X[:, :, None]*numer/denom)/post_div[:, :, None]) * dL_dK[:, :, None]).sum(1)
+        target *= four_over_tau*self.weight_variance*self.variance
        raise NotImplementedError
-        # self._K_computations(X, X2)
-        # gX = np.zeros((X2.shape[0], X.shape[1], X.shape[0]))
+
+
+        gX = np.zeros((X2.shape[0], X.shape[1], X.shape[0]))
        
-        # for i in range(X.shape[0]):
-        #     gX[:, :, i] = self._dK_dX_point(dL_dK, X, X2, target, i)
+        for i in range(X.shape[0]):
+            gX[:, :, i] = self._dK_dX_point(dL_dK, X, X2, target, i)
            
            
    def _dK_dX_point(self, dL_dK, X, X2, target, i):
@ -130,9 +140,8 @@ class MLP(Kernpart):
        denom3 = denom*denom*denom
        gX = np.zeros((X2.shape[0], X2.shape[1]))
        for j in range(X2.shape[1]):
-            gX[:, j] =X2[:, j]/denom - vec2*X[i, j]*numer/denom3
+            gX[:, j] = X2[:, j]/denom - vec2*X[i, j]*numer/denom3
            gX[:, j] = four_over_tau*self.weight_variance*self.variance*gX[:, j]/np.sqrt(1-arg*arg)
-            target[i, :]
    
    
    def _K_computations(self, X, X2):
--- a/GPy/kern/parts/poly.py
+++ b/GPy/kern/parts/poly.py
@ -101,11 +101,15 @@ class POLY(Kernpart):

    def dK_dX(self, dL_dK, X, X2, target):
        """Derivative of the covariance matrix with respect to X"""
-        pass
+        self._K_computations(X, X2)
+        arg = self._K_poly_arg
+        target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]* dL_dK[:, :, None]).sum(1)
            
-    def _dK_dX_point(self, dL_dK, X, X2, target, i):
-        """Gradient with respect to one point of X"""
-        pass
+    def dKdiag_dX(self, dL_dKdiag, X, target):
+        """Gradient of diagonal of covariance with respect to X"""
+        self._K_diag_computations(X)
+        arg = self._K_diag_poly_arg
+        target += 2.*self.weight_variance*self.degree*self.variance*X*dL_dKdiag[:, None]*(arg**(self.degree-1))[:, None]
    
    
    def _K_computations(self, X, X2):
--- a/GPy/kern/parts/prod.py
+++ b/GPy/kern/parts/prod.py
@ -62,7 +62,7 @@ class Prod(Kernpart):
        return self._K2
    
    def dK_dtheta(self,dL_dK,X,X2,target):
-        """derivative of the covariance matrix with respect to the parameters."""
+        """Derivative of the covariance matrix with respect to the parameters."""
        self._K_computations(X,X2)
        if X2 is None:
            self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params])
--- a/GPy/notes.txt
+++ b/GPy/notes.txt
@ -1,15 +1,15 @@
-Prod.py kernel should take a list of kernels rather than two arguments for kernels.
-transformations.py should have limits on what is fed into exp() particularly for the negative log logistic. 
+Prod.py kernel could also take a list of kernels rather than two arguments for kernels.
+transformations.py should have limits on what is fed into exp() particularly for the negative log logistic (done -neil). 

 Load in a model with mlp kernel, plot it, change a parameter, plot it again. It doesn't update the plot.

 Tests for kernels which work directly on the kernel implementation (not through GP).

-Should stationary covariances have their own kernpart type?
+Should stationary covariances have their own kernpart type, I think so, also inner product kernels. That way the caching so carefully constructed for RBF or linear could be shared.

 Where do we declare default kernel parameters. In constructors.py or in the definition file for the kernel?

-When printing to stdout, can we check that our approach is also working nicely for the ipython notebook? I like the way our optimization ticks over, but at the moment this doesn't seem to work in the ipython notebook, it would be nice if it did.
+When printing to stdout, can we check that our approach is also working nicely for the ipython notebook? I like the way our optimization ticks over, but at the moment this doesn't seem to work in the ipython notebook, it would be nice if it did. My problems may be due to using ipython 0.12, I've had a poke around at fixing this and I can't do it for 0.12.

 When we print a model should we also include information such as number of inputs and number of outputs?

@ -37,3 +37,20 @@ When using bfgs in ipython notebook, text appears in the original console, not i
 In sparse GPs wouldn't it be clearer to call Z inducing?

 In coregionalisation matrix, setting the W to all ones will (surely?) ensure that symmetry isn't broken. Also, but allowing it to scale like that, the output variance increases as rank is increased (and if user sets rank to more than output dim they could get very different results).
+
+We are inconsistent about our use of ise and ize e.g. optimize and normalize_X, but coregionalise, we should choose one and stick to it. Suggest -ize.
+
+Exceptions: we need to provide a list of exceptions we throw and specify what is thrown where. 
+
+Why is it get_params() but it's getstate()? Should be get_state(). Why is it get_gradient instead of get_gradients? Need to be consistent!! Doesn't matter which way we choose as long as it's consistent.
+
+In likelihood Nparams should be num_params
+
+In likelihood N should be num_data
+
+The Gaussian target in likelihood should be F What is V doing here?
+
+Need to check for nan values in likelihoods. These should be treated as missing values. If the likelihood can't handle the missing value an error should be throw.
+
+
+Sometimes you want to print kernpart objects, for diagnosis etc. This isn't possible currently. 
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@ -16,10 +16,9 @@ import datetime
 import sys, urllib
 def reporthook(a,b,c): 
    # ',' at the end of the line is important!
-    print "% 3.1f%% of %d bytes\r" % (min(100, float(a * b) / c * 100), c),
+    #print "% 3.1f%% of %d bytes\r" % (min(100, float(a * b) / c * 100), c),
    #you can also use sys.stdout.write
-    #sys.stdout.write("\r% 3.1f%% of %d bytes" 
-    #                 % (min(100, float(a * b) / c * 100), c)
+    sys.stdout.write("\r% 3.1f%% of %d bytes" % (min(100, float(a * b) / c * 100), c))
    sys.stdout.flush()
     
 # Global variables
@ -288,7 +287,7 @@ except ImportError:
    gpxpy_available = False

 if gpxpy_available:
-    def epomeo_gpx(data_set='epomeo_gpx'):
+    def epomeo_gpx(data_set='epomeo_gpx', sample_every=4):
        if not data_available(data_set):
            download_data(data_set)
        files = ['endomondo_1', 'endomondo_2', 'garmin_watch_via_endomondo','viewranger_phone', 'viewranger_tablet']
@ -301,7 +300,7 @@ if gpxpy_available:
            segment = gpx.tracks[0].segments[0]
            points = [point for track in gpx.tracks for segment in track.segments for point in segment.points]
            data = [[(point.time-datetime.datetime(2013,8,21)).total_seconds(), point.latitude, point.longitude, point.elevation] for point in points]
-            X.append(np.asarray(data))
+            X.append(np.asarray(data)[::sample_every, :])
            gpx_file.close()        
        return data_details_return({'X' : X, 'info' : 'Data is an array containing time in seconds, latitude, longitude and elevation in that order.'}, data_set)