From df97f7814efb0589868cfe9e6ef4026414fb5a83 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Wed, 13 Nov 2013 13:40:44 +0000
Subject: [PATCH 01/28] better handling of missing config files

---
 GPy/util/config.py | 5 ++++-
 MANIFEST.in        | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/GPy/util/config.py b/GPy/util/config.py
index cd29a8af..02796e0b 100644
--- a/GPy/util/config.py
+++ b/GPy/util/config.py
@@ -14,6 +14,9 @@ print default_file, os.path.isfile(default_file)
 # 1. check if the user has a ~/.gpy_config.cfg
 if os.path.isfile(user_file):
     config.read(user_file)
-else:
+elif os.path.isfile(default_file):
     # 2. if not, use the default one
     config.read(default_file)
+else:
+    #3. panic
+    raise ValueError, "no configuration file found"
diff --git a/MANIFEST.in b/MANIFEST.in
index c89284cd..8d5b2304 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,3 +2,5 @@ include *.txt
 recursive-include doc *.txt
 include *.md
 recursive-include doc *.md
+include *.cfg
+recursive-include doc *.cfg

From e79794f6ef7f337dc3a500f13fe864b79293e8c5 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Thu, 14 Nov 2013 08:47:16 +0000
Subject: [PATCH 02/28] Part implementation of ode_eq functionality. Not yet
 numerically stable or efficient (some horrible use of cut and paste to get
 things working ...)

---
 GPy/kern/parts/sympy_helpers.cpp | 106 +++++++++++++++-
 GPy/kern/parts/sympy_helpers.h   |   7 ++
 GPy/util/symbolic.py             | 203 ++++++++++++++++++++++++++++---
 3 files changed, 299 insertions(+), 17 deletions(-)

diff --git a/GPy/kern/parts/sympy_helpers.cpp b/GPy/kern/parts/sympy_helpers.cpp
index e4df4d80..d21d2683 100644
--- a/GPy/kern/parts/sympy_helpers.cpp
+++ b/GPy/kern/parts/sympy_helpers.cpp
@@ -1,7 +1,8 @@
 #include <math.h>
 #include <float.h>
 #include <stdlib.h>
-
+#include <iostream>
+#include <stdexcept>
 double DiracDelta(double x){
   // TODO: this doesn't seem to be a dirac delta ... should return infinity. Neil
     if((x<0.000001) & (x>-0.000001))//go on, laugh at my c++ skills
@@ -14,6 +15,7 @@ double DiracDelta(double x,int foo){
 };
 
 double sinc(double x){
+  // compute the sinc function
   if (x==0)
     return 1.0;
   else 
@@ -21,6 +23,7 @@ double sinc(double x){
 }
 
 double sinc_grad(double x){
+  // compute the gradient of the sinc function.
   if (x==0)
     return 0.0;
   else 
@@ -28,6 +31,7 @@ double sinc_grad(double x){
 }
 
 double erfcx(double x){
+  // compute the scaled complex error function.
   double xneg=-sqrt(log(DBL_MAX/2));
   double xmax = 1/(sqrt(M_PI)*DBL_MIN);
   xmax = DBL_MAX<xmax ? DBL_MAX : xmax;
@@ -50,12 +54,108 @@ double erfcx(double x){
 }
 
 double ln_diff_erf(double x0, double x1){
+  // stably compute the log of difference between two erfs.
+  if (x1>x0)
+    throw std::runtime_error("Error: second argument must be smaller than first in ln_diff_err");
+  return log(erf(x0) - erf(x1));
   if (x0==x1)
-    return INFINITY;
+    return -INFINITY;
   else if(x0<0 && x1>0 || x0>0 && x1<0)
     return log(erf(x0)-erf(x1));
   else if(x1>0)
-    return log(erfcx(x1)-erfcx(x0)*exp(x1*x1)- x0*x0)-x1*x1;
+    return log(erfcx(x1)-erfcx(x0)*exp(x1*x1- x0*x0))-x1*x1;
   else 
     return log(erfcx(-x0)-erfcx(-x1)*exp(x0*x0 - x1*x1))-x0*x0;
 }
+
+double h(double t, double tprime, double d_i, double d_j, double l){
+  // Compute the h function for the sim covariance.
+  double half_l_di = 0.5*l*d_i;
+  double arg_1 = half_l_di + tprime/l;
+  double arg_2 = half_l_di - (t-tprime)/l;
+  double ln_part_1 = ln_diff_erf(arg_1, arg_2);
+  arg_2 = half_l_di - t/l;
+  double sign_val = 1.0;
+  if(t/l==0)
+    sign_val = 0.0;
+  else if (t/l < 0)
+    sign_val = -1.0;
+  double ln_part_2 = ln_diff_erf(half_l_di, arg_2);
+  
+  return sign_val*exp(half_l_di*half_l_di - d_i*(t-tprime) + ln_part_1 - log(d_i + d_j)) - sign_val*exp(half_l_di*half_l_di - d_i*t - d_j*tprime + ln_part_2 - log(d_i + d_j));
+}
+
+double dh_dl(double t, double tprime, double d_i, double d_j, double l){
+  // compute gradient of h function with respect to lengthscale for sim covariance
+  // TODO a lot of energy wasted recomputing things here, need to do this in a shared way somehow ... perhaps needs rewrite of sympykern.
+  double half_l_di = 0.5*l*d_i;
+  double arg_1 = half_l_di + tprime/l;
+  double arg_2 = half_l_di - (t-tprime)/l;
+  double ln_part_1 = ln_diff_erf(arg_1, arg_2);
+  arg_2 = half_l_di - t/l;
+  double ln_part_2 = ln_diff_erf(half_l_di, arg_2);
+  double diff_t = t - tprime;
+  double l2 = l*l;
+  double hv = h(t, tprime, d_i, d_j, l);
+  return 0.5*d_i*d_i*l*hv + 2/(sqrt(M_PI)*(d_i+d_j))*((-diff_t/l2-d_i/2)*exp(-diff_t*diff_t/l2)+(-tprime/l2+d_i/2)*exp(-tprime*tprime/l2-d_i*t)-(-t/l2-d_i/2)*exp(-t*t/l2-d_j*tprime)-d_i/2*exp(-(d_i*t+d_j*tprime)));
+}
+
+double dh_dd_i(double t, double tprime, double d_i, double d_j, double l){
+  double diff_t = (t-tprime);
+  double l2 = l*l;
+  double hv = h(t, tprime, d_i, d_j, l);
+  double half_l_di = 0.5*l*d_i;
+  double arg_1 = half_l_di + tprime/l;
+  double arg_2 = half_l_di - (t-tprime)/l;
+  double ln_part_1 = ln_diff_erf(arg_1, arg_2);
+  arg_1 = half_l_di;
+  arg_2 = half_l_di - t/l;
+  double sign_val = 1.0;
+  if(t/l==0)
+    sign_val = 0.0;
+  else if (t/l < 0)
+    sign_val = -1.0;
+  double ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l);
+
+  double base = ((0.5*d_i*l2*(d_i+d_j)-1)*hv 
+		 + (-diff_t*sign_val*exp(half_l_di*half_l_di
+					 -d_i*diff_t
+					 +ln_part_1)
+		    +t*sign_val*exp(half_l_di*half_l_di
+				    -d_i*t-d_j*tprime
+				    +ln_part_2))
+		 + l/sqrt(M_PI)*(-exp(-diff_t*diff_t/l2)
+			       +exp(-tprime*tprime/l2-d_i*t)
+			       +exp(-t*t/l2-d_j*tprime)
+			       -exp(-(d_i*t + d_j*tprime))));
+  return base/(d_i+d_j);
+}
+
+double dh_dd_j(double t, double tprime, double d_i, double d_j, double l){
+  double diff_t = (t-tprime);
+  double l2 = l*l;
+  double half_l_di = 0.5*l*d_i;
+  double hv = h(t, tprime, d_i, d_j, l);
+  double arg_1 = half_l_di + tprime/l;
+  double arg_2 = half_l_di - (t-tprime)/l;
+  double ln_part_1 = ln_diff_erf(arg_1, arg_2);
+  arg_1 = half_l_di;
+  arg_2 = half_l_di - t/l;
+  double sign_val = 1.0;
+  if(t/l==0)
+    sign_val = 0.0;
+  else if (t/l < 0)
+    sign_val = -1.0;
+  double ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l);
+  double base = tprime*sign_val*exp(half_l_di*half_l_di-(d_i*t+d_j*tprime)+ln_part_2)-hv;
+  return base/(d_i+d_j);
+}
+
+
+double dh_dt(double t, double tprime, double d_i, double d_j, double l){
+  return 0.0;
+}
+
+double dh_dtprime(double t, double tprime, double d_i, double d_j, double l){
+  return 0.0;
+}
diff --git a/GPy/kern/parts/sympy_helpers.h b/GPy/kern/parts/sympy_helpers.h
index 56220167..5e58d5d2 100644
--- a/GPy/kern/parts/sympy_helpers.h
+++ b/GPy/kern/parts/sympy_helpers.h
@@ -7,3 +7,10 @@ double sinc_grad(double x);
 
 double erfcx(double x);
 double ln_diff_erf(double x0, double x1);
+
+double h(double t, double tprime, double d_i, double d_j, double l);
+double dh_dl(double t, double tprime, double d_i, double d_j, double l);
+double dh_dd_i(double t, double tprime, double d_i, double d_j, double l);
+double dh_dd_j(double t, double tprime, double d_i, double d_j, double l);
+double dh_dt(double t, double tprime, double d_i, double d_j, double l);
+double dh_dtprime(double t, double tprime, double d_i, double d_j, double l);
diff --git a/GPy/util/symbolic.py b/GPy/util/symbolic.py
index 0b5ca381..d546f940 100644
--- a/GPy/util/symbolic.py
+++ b/GPy/util/symbolic.py
@@ -1,4 +1,4 @@
-from sympy import Function, S, oo, I, cos, sin, asin, log, erf,pi,exp
+from sympy import Function, S, oo, I, cos, sin, asin, log, erf,pi,exp,sqrt,sign
 
 
 class ln_diff_erf(Function):
@@ -19,15 +19,84 @@ class ln_diff_erf(Function):
         if x0.is_Number and x1.is_Number:            
             return log(erf(x0)-erf(x1))
 
-class sim_h(Function):
+class dh_dd_i(Function):
+    nargs = 5
+    @classmethod
+    def eval(cls, t, tprime, d_i, d_j, l):
+        if (t.is_Number
+            and tprime.is_Number
+            and d_i.is_Number
+            and d_j.is_Number
+            and l.is_Number):
+
+            diff_t = (t-tprime)
+            l2 = l*l
+            h = h(t, tprime, d_i, d_j, l)
+            half_l_di = 0.5*l*d_i
+            arg_1 = half_l_di + tprime/l
+            arg_2 = half_l_di - (t-tprime)/l
+            ln_part_1 = ln_diff_erf(arg_1, arg_2)
+            arg_1 = half_l_di 
+            arg_2 = half_l_di - t/l
+            sign_val = sign(t/l)
+            ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l)
+
+            base = ((0.5*d_i*l2*(d_i+d_j)-1)*h 
+                    + (-diff_t*sign_val*exp(half_l_di*half_l_di
+                                          -d_i*diff_t
+                                          +ln_part_1)
+                       +t*sign_val*exp(half_l_di*half_l_di
+                                          -d_i*t-d_j*tprime
+                                          +ln_part_2))
+                    + l/sqrt(pi)*(-exp(-diff_t*diff_t/l2)
+                                     +exp(-tprime*tprime/l2-d_i*t)
+                                     +exp(-t*t/l2-d_j*tprime)
+                                     -exp(-(d_i*t + d_j*tprime))))
+            return base/(d_i+d_j)
+
+class dh_dd_j(Function):
+    nargs = 5
+    @classmethod
+    def eval(cls, t, tprime, d_i, d_j, l):
+        if (t.is_Number
+            and tprime.is_Number
+            and d_i.is_Number
+            and d_j.is_Number
+            and l.is_Number):
+            diff_t = (t-tprime)
+            l2 = l*l
+            half_l_di = 0.5*l*d_i
+            h = h(t, tprime, d_i, d_j, l)
+            arg_1 = half_l_di + tprime/l
+            arg_2 = half_l_di - (t-tprime)/l
+            ln_part_1 = ln_diff_erf(arg_1, arg_2)
+            arg_1 = half_l_di 
+            arg_2 = half_l_di - t/l
+            sign_val = sign(t/l)
+            ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l)
+            sign_val = sign(t/l)
+            base = tprime*sign_val*exp(half_l_di*half_l_di-(d_i*t+d_j*tprime)+ln_part_2)-h
+            return base/(d_i+d_j)
+    
+class dh_dl(Function):
+    nargs = 5
+    @classmethod
+    def eval(cls, t, tprime, d_i, d_j, l):
+        if (t.is_Number
+            and tprime.is_Number
+            and d_i.is_Number
+            and d_j.is_Number
+            and l.is_Number):
+
+            diff_t = (t-tprime)
+            l2 = l*l
+            h = h(t, tprime, d_i, d_j, l)
+            return 0.5*d_i*d_i*l*h + 2./(sqrt(pi)*(d_i+d_j))*((-diff_t/l2-d_i/2.)*exp(-diff_t*diff_t/l2)+(-tprime/l2+d_i/2.)*exp(-tprime*tprime/l2-d_i*t)-(-t/l2-d_i/2.)*exp(-t*t/l2-d_j*tprime)-d_i/2.*exp(-(d_i*t+d_j*tprime)))
+
+class dh_dt(Function):
     nargs = 5
-
-    def fdiff(self, argindex=1):
-        pass
-    
     @classmethod
     def eval(cls, t, tprime, d_i, d_j, l):
-        # putting in the is_Number stuff forces it to look for a fdiff method for derivative.
         if (t.is_Number
             and tprime.is_Number
             and d_i.is_Number
@@ -40,13 +109,119 @@ class sim_h(Function):
                 or l is S.NaN):
                 return S.NaN
             else:
-                return (exp((d_j/2*l)**2)/(d_i+d_j)
-                        *(exp(-d_j*(tprime - t))
-                          *(erf((tprime-t)/l - d_j/2*l)
-                            + erf(t/l + d_j/2*l))
-                          - exp(-(d_j*tprime + d_i))
-                          *(erf(tprime/l - d_j/2*l)
-                            + erf(d_j/2*l))))
+                half_l_di = 0.5*l*d_i
+                arg_1 = half_l_di + tprime/l
+                arg_2 = half_l_di - (t-tprime)/l
+                ln_part_1 = ln_diff_erf(arg_1, arg_2)
+                arg_1 = half_l_di 
+                arg_2 = half_l_di - t/l
+                sign_val = sign(t/l)
+                ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l)
+
+                
+                return (sign_val*exp(half_l_di*half_l_di
+                                        - d_i*(t-tprime)
+                                        + ln_part_1
+                                        - log(d_i + d_j))
+                        - sign_val*exp(half_l_di*half_l_di
+                                          - d_i*t - d_j*tprime
+                                          + ln_part_2
+                                          - log(d_i + d_j))).diff(t)
+
+class dh_dtprime(Function):
+    nargs = 5
+    @classmethod
+    def eval(cls, t, tprime, d_i, d_j, l):
+        if (t.is_Number
+            and tprime.is_Number
+            and d_i.is_Number
+            and d_j.is_Number
+            and l.is_Number):
+            if (t is S.NaN
+                or tprime is S.NaN
+                or d_i is S.NaN
+                or d_j is S.NaN
+                or l is S.NaN):
+                return S.NaN
+            else:
+                half_l_di = 0.5*l*d_i
+                arg_1 = half_l_di + tprime/l
+                arg_2 = half_l_di - (t-tprime)/l
+                ln_part_1 = ln_diff_erf(arg_1, arg_2)
+                arg_1 = half_l_di 
+                arg_2 = half_l_di - t/l
+                sign_val = sign(t/l)
+                ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l)
+
+                
+                return (sign_val*exp(half_l_di*half_l_di
+                                        - d_i*(t-tprime)
+                                        + ln_part_1
+                                        - log(d_i + d_j))
+                        - sign_val*exp(half_l_di*half_l_di
+                                          - d_i*t - d_j*tprime
+                                          + ln_part_2
+                                          - log(d_i + d_j))).diff(tprime)
+
+
+class h(Function):
+    nargs = 5
+    def fdiff(self, argindex=5):
+        t, tprime, d_i, d_j, l = self.args
+        if argindex == 1:
+            return dh_dt(t, tprime, d_i, d_j, l)
+        elif argindex == 2:
+            return dh_dtprime(t, tprime, d_i, d_j, l)
+        elif argindex == 3:
+            return dh_dd_i(t, tprime, d_i, d_j, l)
+        elif argindex == 4:
+            return dh_dd_j(t, tprime, d_i, d_j, l)
+        elif argindex == 5:
+            return dh_dl(t, tprime, d_i, d_j, l)
+                                                                
+    
+    @classmethod
+    def eval(cls, t, tprime, d_i, d_j, l):
+        # putting in the is_Number stuff forces it to look for a fdiff method for derivative. If it's left out, then when asking for self.diff, it just does the diff on the eval symbolic terms directly. We want to avoid that because we are looking to ensure everything is numerically stable. Maybe it's because of the if statement that this happens? 
+        if (t.is_Number
+            and tprime.is_Number
+            and d_i.is_Number
+            and d_j.is_Number
+            and l.is_Number):
+            if (t is S.NaN
+                or tprime is S.NaN
+                or d_i is S.NaN
+                or d_j is S.NaN
+                or l is S.NaN):
+                return S.NaN
+            else:
+                half_l_di = 0.5*l*d_i
+                arg_1 = half_l_di + tprime/l
+                arg_2 = half_l_di - (t-tprime)/l
+                ln_part_1 = ln_diff_erf(arg_1, arg_2)
+                arg_1 = half_l_di 
+                arg_2 = half_l_di - t/l
+                sign_val = sign(t/l)
+                ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l)
+
+                
+                return (sign_val*exp(half_l_di*half_l_di
+                                        - d_i*(t-tprime)
+                                        + ln_part_1
+                                        - log(d_i + d_j))
+                        - sign_val*exp(half_l_di*half_l_di
+                                          - d_i*t - d_j*tprime
+                                          + ln_part_2
+                                          - log(d_i + d_j)))
+            
+                                  
+                # return (exp((d_j/2.*l)**2)/(d_i+d_j)
+                #         *(exp(-d_j*(tprime - t))
+                #           *(erf((tprime-t)/l - d_j/2.*l)
+                #             + erf(t/l + d_j/2.*l))
+                #           - exp(-(d_j*tprime + d_i))
+                #           *(erf(tprime/l - d_j/2.*l)
+                #             + erf(d_j/2.*l))))
 
 class erfc(Function):
     nargs = 1

From c12ca4c53d625d9ccf5da0ab749e53f145e60ab6 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 14 Nov 2013 08:54:05 +0000
Subject: [PATCH 03/28] a trial namespace renaming

---
 GPy/models/__init__.py | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py
index 10ce577b..a8be5890 100644
--- a/GPy/models/__init__.py
+++ b/GPy/models/__init__.py
@@ -1,18 +1,19 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-from gp_regression import GPRegression
-from gp_classification import GPClassification
-from sparse_gp_regression import SparseGPRegression
-from svigp_regression import SVIGPRegression
-from sparse_gp_classification import SparseGPClassification
-from fitc_classification import FITCClassification
-from gplvm import GPLVM
-from bcgplvm import BCGPLVM
-from sparse_gplvm import SparseGPLVM
-from warped_gp import WarpedGP
-from bayesian_gplvm import BayesianGPLVM
-from mrd import MRD
-from gradient_checker import GradientChecker
-from gp_multioutput_regression import GPMultioutputRegression
-from sparse_gp_multioutput_regression import SparseGPMultioutputRegression
+from gp_regression import GPRegression; _gp_regression = gp_regression ; del gp_regression 
+from gp_classification import GPClassification; _gp_classification = gp_classification ; del gp_classification 
+from sparse_gp_regression import SparseGPRegression; _sparse_gp_regression = sparse_gp_regression ; del sparse_gp_regression 
+from svigp_regression import SVIGPRegression; _svigp_regression = svigp_regression ; del svigp_regression 
+from sparse_gp_classification import SparseGPClassification; _sparse_gp_classification = sparse_gp_classification ; del sparse_gp_classification 
+from fitc_classification import FITCClassification; _fitc_classification = fitc_classification ; del fitc_classification 
+from gplvm import GPLVM; _gplvm = gplvm ; del gplvm 
+from bcgplvm import BCGPLVM; _bcgplvm = bcgplvm; del bcgplvm
+from sparse_gplvm import SparseGPLVM; _sparse_gplvm = sparse_gplvm ; del sparse_gplvm 
+from warped_gp import WarpedGP; _warped_gp = warped_gp ; del warped_gp 
+from bayesian_gplvm import BayesianGPLVM; _bayesian_gplvm = bayesian_gplvm ; del bayesian_gplvm 
+from mrd import MRD; _mrd = mrd ; del mrd 
+from gradient_checker import GradientChecker; _gradient_checker = gradient_checker ; del gradient_checker 
+from gp_multioutput_regression import GPMultioutputRegression; _gp_multioutput_regression = gp_multioutput_regression ; del gp_multioutput_regression 
+from sparse_gp_multioutput_regression import SparseGPMultioutputRegression; _sparse_gp_multioutput_regression = sparse_gp_multioutput_regression ; del sparse_gp_multioutput_regression 
+

From d95137e0497cae4b5ba7deed862cbf686bb0f837 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Thu, 14 Nov 2013 09:01:05 +0000
Subject: [PATCH 04/28] half way through crossterm objective

---
 GPy/kern/kern.py                          | 17 ++++++++++++-----
 GPy/kern/parts/rbf.py                     | 10 ++++++++++
 GPy/testing/psi_stat_expectation_tests.py | 15 +++++++++------
 3 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 619d1687..d686064a 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -462,10 +462,8 @@ class kern(Parameterized):
                 pass
             # rbf X bias
             elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
-                target += 2 * p1.variance * (p2._psi1[:, :, None] + p2._psi1[:, None, :])
+                target += p1.variance * (p2._psi1[:, :, None] + p2._psi1[:, None, :])
             elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
-                tmp1 = p2.variance * (p1._psi1[:, :, None] + p1._psi1[:, None, :])
-                renorm = p1.variance*np.exp()
                 target += p2.variance * (p1._psi1[:, :, None] + p1._psi1[:, None, :])
             # linear X bias
             elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, Linear):
@@ -478,12 +476,21 @@ class kern(Parameterized):
                 target += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
             # rbf X any
             elif isinstance(p1, (RBF, RBFInv)):
-                pass
+                psi11 = np.zeros((mu.shape[0], Z.shape[0]))
+                psi12 = np.zeros((mu.shape[0], Z.shape[0]))
+                p1.psi1(Z, mu, S, psi11)
+                p2.psi1(Z, mu, S, psi12)
+                
+                crossterms  = psi11[:, :, None] + psi12[:, None, :]
+                crossterms += psi12[:, :, None] + psi11[:, None, :]
+                
+                target += p1._crossterm_product_expectation(p2, Z, mu, S)
+                #import ipdb;ipdb.set_trace()
             elif isinstance(p2, (RBF, RBFInv)):
                 raise NotImplementedError # TODO
             else:
                 raise NotImplementedError, "psi2 cannot be computed for this kernel"
-        return target
+        return target        
 
     def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S):
         target = np.zeros(self.num_params)
diff --git a/GPy/kern/parts/rbf.py b/GPy/kern/parts/rbf.py
index 585d687f..56a6b0eb 100644
--- a/GPy/kern/parts/rbf.py
+++ b/GPy/kern/parts/rbf.py
@@ -208,6 +208,16 @@ class RBF(Kernpart):
         self._psi_computations(Z, mu, S)
         target += self._psi2
 
+    def _crossterm_product_expectation(self, K, Z, mu, S):
+        # compute the crossterm expectation for K as the other kernel:
+        import ipdb;ipdb.set_trace()
+        Sigma = 1./self.lengthscale[None,:] + 1./S # is independent across M, 
+        M = (Z[None,:,:]/self.lengthscale[None,None,:] + (mu/S)[:,None,:]) / Sigma[:,None,:]
+        psi1_other = K.psi1()
+        self.variance
+        # return is [N x M x M]
+        return 
+
     def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
         """Shape N,num_inducing,num_inducing,Ntheta"""
         self._psi_computations(Z, mu, S)
diff --git a/GPy/testing/psi_stat_expectation_tests.py b/GPy/testing/psi_stat_expectation_tests.py
index 16904927..ae3d1022 100644
--- a/GPy/testing/psi_stat_expectation_tests.py
+++ b/GPy/testing/psi_stat_expectation_tests.py
@@ -27,7 +27,7 @@ def ard(p):
 @testing.deepTest(__test__())
 class Test(unittest.TestCase):
     input_dim = 9
-    num_inducing = 4
+    num_inducing = 13
     N = 30
     Nsamples = 9e6
 
@@ -51,13 +51,16 @@ class Test(unittest.TestCase):
 #                        GPy.kern.bias(self.input_dim) +
 #                        GPy.kern.white(self.input_dim)),
 #                     (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
-#                     GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
-#                     GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) +
+                    (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True)
+                     +GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True)
 #                     GPy.kern.bias(self.input_dim) +
 #                     GPy.kern.white(self.input_dim)),
-        (GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) +
-                    GPy.kern.bias(self.input_dim, np.random.rand()) +
-                    GPy.kern.white(self.input_dim, np.random.rand())),
+                    ),
+        (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True)
+         +GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True)
+         #+GPy.kern.bias(self.input_dim, np.random.rand())
+         #+GPy.kern.white(self.input_dim, np.random.rand())),
+         ),
                 (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
                     GPy.kern.bias(self.input_dim, np.random.rand()) +
                     GPy.kern.white(self.input_dim, np.random.rand())),

From a074763eb69597ae22b0b5f7b284a96685d12ea2 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Thu, 14 Nov 2013 12:28:26 -0800
Subject: [PATCH 05/28] fixed problem in warping

---
 GPy/util/warping_functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/util/warping_functions.py b/GPy/util/warping_functions.py
index e05f39af..35ad3b80 100644
--- a/GPy/util/warping_functions.py
+++ b/GPy/util/warping_functions.py
@@ -222,7 +222,7 @@ class TanhWarpingFunction_d(WarpingFunction):
         """
 
 
-        mpsi = psi.coSpy()
+        mpsi = psi.copy()
         d = psi[-1]
         mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
 

From b845c0d634a48f9e11e13cb6a3329629e84e28fd Mon Sep 17 00:00:00 2001
From: mu <mu@mu-DQ67SW.(none)>
Date: Mon, 18 Nov 2013 10:43:58 +0000
Subject: [PATCH 06/28] constructor and init for ODE_UY

---
 GPy/kern/constructors.py   | 17 +++++++++++++++++
 GPy/kern/parts/__init__.py |  1 +
 2 files changed, 18 insertions(+)

diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 392f43ba..1feec4df 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -588,3 +588,20 @@ def ODE_1(input_dim=1, varianceU=1.,  varianceY=1., lengthscaleU=None,  lengthsc
     """
     part = parts.ODE_1.ODE_1(input_dim, varianceU, varianceY, lengthscaleU, lengthscaleY)
     return kern(input_dim, [part])
+
+def ODE_UY(input_dim=2, varianceU=1.,  varianceY=1., lengthscaleU=None,  lengthscaleY=None):
+    """
+    kernel resultiong from a first order ODE with OU driving GP
+    :param input_dim: the number of input dimension, has to be equal to one
+    :type input_dim: int
+    :param input_lengthU: the number of input U length
+    :param varianceU: variance of the driving GP
+    :type varianceU: float
+    :param varianceY: 'variance' of the transfer function
+    :type varianceY: float
+    :param lengthscaleY: 'lengthscale' of the transfer function
+    :type lengthscaleY: float
+    :rtype: kernel object
+    """
+    part = parts.ODE_UY.ODE_UY(input_dim, varianceU, varianceY, lengthscaleU, lengthscaleY)
+    return kern(input_dim, [part])
\ No newline at end of file
diff --git a/GPy/kern/parts/__init__.py b/GPy/kern/parts/__init__.py
index 0a758f1e..3b020828 100644
--- a/GPy/kern/parts/__init__.py
+++ b/GPy/kern/parts/__init__.py
@@ -14,6 +14,7 @@ import Matern32
 import Matern52
 import mlp
 import ODE_1
+import ODE_UY
 import periodic_exponential
 import periodic_Matern32
 import periodic_Matern52

From 241ca0b628b5eb2cf8e00cde11fa842721fcbf6c Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Mon, 18 Nov 2013 16:39:43 +0000
Subject: [PATCH 07/28] Working eq_ode1 in sympy now.

---
 GPy/kern/parts/__init__.py       |   1 +
 GPy/kern/parts/sympy_helpers.cpp | 119 ++++++++++++++++++-------------
 GPy/kern/parts/sympy_helpers.py  |  71 ++++++++++++++++++
 GPy/util/symbolic.py             |   2 +-
 4 files changed, 141 insertions(+), 52 deletions(-)
 create mode 100644 GPy/kern/parts/sympy_helpers.py

diff --git a/GPy/kern/parts/__init__.py b/GPy/kern/parts/__init__.py
index 0a758f1e..54c5bba5 100644
--- a/GPy/kern/parts/__init__.py
+++ b/GPy/kern/parts/__init__.py
@@ -26,4 +26,5 @@ import rbf
 import rbf_inv
 import spline
 import symmetric
+import sympy_helpers
 import white
diff --git a/GPy/kern/parts/sympy_helpers.cpp b/GPy/kern/parts/sympy_helpers.cpp
index d21d2683..9f30eea9 100644
--- a/GPy/kern/parts/sympy_helpers.cpp
+++ b/GPy/kern/parts/sympy_helpers.cpp
@@ -1,3 +1,4 @@
+#include "Python.h"
 #include <math.h>
 #include <float.h>
 #include <stdlib.h>
@@ -29,24 +30,33 @@ double sinc_grad(double x){
   else 
     return (x*cos(x) - sin(x))/(x*x);
 }
-
 double erfcx(double x){
+  // Based on code by Soren Hauberg 2010 for Octave.
   // compute the scaled complex error function.
+  //return erfc(x)*exp(x*x);
   double xneg=-sqrt(log(DBL_MAX/2));
   double xmax = 1/(sqrt(M_PI)*DBL_MIN);
   xmax = DBL_MAX<xmax ? DBL_MAX : xmax;
   // Find values where erfcx can be evaluated
-  double t = 3.97886080735226 / (abs(x) + 3.97886080735226);
+  double t = 3.97886080735226 / (fabs(x) + 3.97886080735226);
   double u = t-0.5;
   double y = (((((((((u * 0.00127109764952614092 + 1.19314022838340944e-4) * u 
-	      - 0.003963850973605135)   * u - 8.70779635317295828e-4) * u 
-	    + 0.00773672528313526668) * u + 0.00383335126264887303) * u 
-	  - 0.0127223813782122755)  * u - 0.0133823644533460069)  * u 
-	+ 0.0161315329733252248)  * u + 0.0390976845588484035)  * u + 0.00249367200053503304;
+		     - 0.003963850973605135)   * u - 8.70779635317295828e-4) * u 
+		   + 0.00773672528313526668) * u + 0.00383335126264887303) * u 
+		 - 0.0127223813782122755)  * u - 0.0133823644533460069)  * u 
+	       + 0.0161315329733252248)  * u + 0.0390976845588484035)  * u + 0.00249367200053503304;
+  y = ((((((((((((y * u - 0.0838864557023001992) * u -		       
+		 0.119463959964325415) * u + 0.0166207924969367356) * u + 
+	       0.357524274449531043) * u + 0.805276408752910567)  * u + 
+	     1.18902982909273333)  * u + 1.37040217682338167)   * u +	
+	   1.31314653831023098)  * u + 1.07925515155856677)   * u +	
+	 0.774368199119538609) * u + 0.490165080585318424)  * u +	
+       0.275374741597376782) * t;
+
   if (x<xneg)
     return -INFINITY;
   else if (x<0)
-    return 2*exp(x*x)-y;
+    return 2.0*exp(x*x)-y;
   else if (x>xmax)
     return 0.0;
   else 
@@ -55,16 +65,19 @@ double erfcx(double x){
 
 double ln_diff_erf(double x0, double x1){
   // stably compute the log of difference between two erfs.
-  if (x1>x0)
-    throw std::runtime_error("Error: second argument must be smaller than first in ln_diff_err");
-  return log(erf(x0) - erf(x1));
-  if (x0==x1)
+  if (x1>x0){
+    PyErr_SetString(PyExc_RuntimeError,"second argument must be smaller than or equal to first in ln_diff_erf");
+    throw 1;
+  }
+  if (x0==x1){
+    PyErr_WarnEx(PyExc_RuntimeWarning,"divide by zero encountered in log", 1);
     return -INFINITY;
-  else if(x0<0 && x1>0 || x0>0 && x1<0)
+  }
+  else if(x0<0 && x1>0 || x0>0 && x1<0) //x0 and x1 have opposite signs
     return log(erf(x0)-erf(x1));
-  else if(x1>0)
-    return log(erfcx(x1)-erfcx(x0)*exp(x1*x1- x0*x0))-x1*x1;
-  else 
+  else if(x0>0) //x0 positive, x1 non-negative
+    return log(erfcx(x1)-erfcx(x0)*exp(x1*x1- x0*x0))-x1*x1; 
+  else //x0 and x1 non-positive
     return log(erfcx(-x0)-erfcx(-x1)*exp(x0*x0 - x1*x1))-x0*x0;
 }
 
@@ -80,26 +93,19 @@ double h(double t, double tprime, double d_i, double d_j, double l){
     sign_val = 0.0;
   else if (t/l < 0)
     sign_val = -1.0;
-  double ln_part_2 = ln_diff_erf(half_l_di, arg_2);
-  
-  return sign_val*exp(half_l_di*half_l_di - d_i*(t-tprime) + ln_part_1 - log(d_i + d_j)) - sign_val*exp(half_l_di*half_l_di - d_i*t - d_j*tprime + ln_part_2 - log(d_i + d_j));
-}
-
-double dh_dl(double t, double tprime, double d_i, double d_j, double l){
-  // compute gradient of h function with respect to lengthscale for sim covariance
-  // TODO a lot of energy wasted recomputing things here, need to do this in a shared way somehow ... perhaps needs rewrite of sympykern.
-  double half_l_di = 0.5*l*d_i;
-  double arg_1 = half_l_di + tprime/l;
-  double arg_2 = half_l_di - (t-tprime)/l;
-  double ln_part_1 = ln_diff_erf(arg_1, arg_2);
   arg_2 = half_l_di - t/l;
   double ln_part_2 = ln_diff_erf(half_l_di, arg_2);
-  double diff_t = t - tprime;
-  double l2 = l*l;
-  double hv = h(t, tprime, d_i, d_j, l);
-  return 0.5*d_i*d_i*l*hv + 2/(sqrt(M_PI)*(d_i+d_j))*((-diff_t/l2-d_i/2)*exp(-diff_t*diff_t/l2)+(-tprime/l2+d_i/2)*exp(-tprime*tprime/l2-d_i*t)-(-t/l2-d_i/2)*exp(-t*t/l2-d_j*tprime)-d_i/2*exp(-(d_i*t+d_j*tprime)));
+  // if either ln_part_1 or ln_part_2 are -inf, don't bother computing rest of that term.
+  double part_1 = 0.0;
+  if(isfinite(ln_part_1))
+    part_1 = sign_val*exp(half_l_di*half_l_di - d_i*(t-tprime) + ln_part_1 - log(d_i + d_j));
+  double part_2 = 0.0;
+  if(isfinite(ln_part_2))
+    part_2 = sign_val*exp(half_l_di*half_l_di - d_i*t - d_j*tprime + ln_part_2 - log(d_i + d_j));
+  return part_1 - part_2;
 }
 
+
 double dh_dd_i(double t, double tprime, double d_i, double d_j, double l){
   double diff_t = (t-tprime);
   double l2 = l*l;
@@ -116,41 +122,52 @@ double dh_dd_i(double t, double tprime, double d_i, double d_j, double l){
   else if (t/l < 0)
     sign_val = -1.0;
   double ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l);
-
-  double base = ((0.5*d_i*l2*(d_i+d_j)-1)*hv 
-		 + (-diff_t*sign_val*exp(half_l_di*half_l_di
-					 -d_i*diff_t
-					 +ln_part_1)
-		    +t*sign_val*exp(half_l_di*half_l_di
-				    -d_i*t-d_j*tprime
-				    +ln_part_2))
-		 + l/sqrt(M_PI)*(-exp(-diff_t*diff_t/l2)
-			       +exp(-tprime*tprime/l2-d_i*t)
-			       +exp(-t*t/l2-d_j*tprime)
-			       -exp(-(d_i*t + d_j*tprime))));
+  double base = (0.5*d_i*l2*(d_i+d_j)-1)*hv;
+  if(isfinite(ln_part_1))
+    base -= diff_t*sign_val*exp(half_l_di*half_l_di
+				-d_i*diff_t
+				+ln_part_1);
+  if(isfinite(ln_part_2))
+    base += t*sign_val*exp(half_l_di*half_l_di
+			   -d_i*t-d_j*tprime
+			   +ln_part_2);
+  base += l/sqrt(M_PI)*(-exp(-diff_t*diff_t/l2)
+			+exp(-tprime*tprime/l2-d_i*t)
+			+exp(-t*t/l2-d_j*tprime)
+			-exp(-(d_i*t + d_j*tprime)));
   return base/(d_i+d_j);
+
 }
 
 double dh_dd_j(double t, double tprime, double d_i, double d_j, double l){
-  double diff_t = (t-tprime);
-  double l2 = l*l;
   double half_l_di = 0.5*l*d_i;
   double hv = h(t, tprime, d_i, d_j, l);
-  double arg_1 = half_l_di + tprime/l;
-  double arg_2 = half_l_di - (t-tprime)/l;
-  double ln_part_1 = ln_diff_erf(arg_1, arg_2);
-  arg_1 = half_l_di;
-  arg_2 = half_l_di - t/l;
   double sign_val = 1.0;
   if(t/l==0)
     sign_val = 0.0;
   else if (t/l < 0)
     sign_val = -1.0;
   double ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l);
-  double base = tprime*sign_val*exp(half_l_di*half_l_di-(d_i*t+d_j*tprime)+ln_part_2)-hv;
+  double base = -hv;
+  if(isfinite(ln_part_2))
+    base += tprime*sign_val*exp(half_l_di*half_l_di-(d_i*t+d_j*tprime)+ln_part_2);
   return base/(d_i+d_j);
 }
 
+double dh_dl(double t, double tprime, double d_i, double d_j, double l){
+  // compute gradient of h function with respect to lengthscale for sim covariance
+  // TODO a lot of energy wasted recomputing things here, need to do this in a shared way somehow ... perhaps needs rewrite of sympykern.
+  double half_l_di = 0.5*l*d_i;
+  double arg_1 = half_l_di + tprime/l;
+  double arg_2 = half_l_di - (t-tprime)/l;
+  double ln_part_1 = ln_diff_erf(arg_1, arg_2);
+  arg_2 = half_l_di - t/l;
+  double ln_part_2 = ln_diff_erf(half_l_di, arg_2);
+  double diff_t = t - tprime;
+  double l2 = l*l;
+  double hv = h(t, tprime, d_i, d_j, l);
+  return 0.5*d_i*d_i*l*hv + 2/(sqrt(M_PI)*(d_i+d_j))*((-diff_t/l2-d_i/2)*exp(-diff_t*diff_t/l2)+(-tprime/l2+d_i/2)*exp(-tprime*tprime/l2-d_i*t)-(-t/l2-d_i/2)*exp(-t*t/l2-d_j*tprime)-d_i/2*exp(-(d_i*t+d_j*tprime)));
+}
 
 double dh_dt(double t, double tprime, double d_i, double d_j, double l){
   return 0.0;
diff --git a/GPy/kern/parts/sympy_helpers.py b/GPy/kern/parts/sympy_helpers.py
new file mode 100644
index 00000000..125dac58
--- /dev/null
+++ b/GPy/kern/parts/sympy_helpers.py
@@ -0,0 +1,71 @@
+# Code for testing functions written in sympy_helpers.cpp
+from scipy import weave
+import tempfile
+import os
+import numpy as np
+current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
+extra_compile_args = []
+
+weave_kwargs = {
+    'support_code': "",
+    'include_dirs':[tempfile.gettempdir(), current_dir],
+    'headers':['"parts/sympy_helpers.h"'],
+    'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")],
+    'extra_compile_args':extra_compile_args,
+    'extra_link_args':['-lgomp'],
+    'verbose':True}
+
+def erfcx(x):
+    code = """
+        // Code for computing scaled complementary erf
+        int i;
+        int dim;
+        int elements = Ntarget[0];
+        for (dim=1; dim<Dtarget; dim++)
+          elements *= Ntarget[dim];
+        for (i=0;i<elements;i++) 
+            target[i] = erfcx(x[i]);
+        """
+    x = np.asarray(x)
+    arg_names = ['target','x']
+    target = np.zeros_like(x)
+    weave.inline(code=code, arg_names=arg_names,**weave_kwargs)
+    return target
+
+def ln_diff_erf(x, y):
+    code = """
+        // Code for computing scaled complementary erf
+        int i;
+        int dim;
+        int elements = Ntarget[0];
+        for (dim=1; dim<Dtarget; dim++)
+          elements *= Ntarget[dim];
+        for (i=0;i<elements;i++) 
+          target[i] = ln_diff_erf(x[i], y[i]);
+        """
+    x = np.asarray(x)
+    y = np.asarray(y)
+    assert(x.shape==y.shape)
+    target = np.zeros_like(x)
+    arg_names = ['target','x', 'y']
+    weave.inline(code=code, arg_names=arg_names,**weave_kwargs)
+    return target
+
+def h(t, tprime, d_i, d_j, l):
+    code = """
+        // Code for computing the 1st order ODE h helper function.
+        int i;
+        int dim;
+        int elements = Ntarget[0];
+        for (dim=1; dim<Dtarget; dim++)
+          elements *= Ntarget[dim];
+        for (i=0;i<elements;i++) 
+          target[i] = h(t[i], tprime[i], d_i, d_j, l);
+        """
+    t = np.asarray(t)
+    tprime = np.asarray(tprime)
+    assert(tprime.shape==t.shape)
+    target = np.zeros_like(t)
+    arg_names = ['target','t', 'tprime', 'd_i', 'd_j', 'l']
+    weave.inline(code=code, arg_names=arg_names,**weave_kwargs)
+    return target
diff --git a/GPy/util/symbolic.py b/GPy/util/symbolic.py
index d546f940..395f9e3e 100644
--- a/GPy/util/symbolic.py
+++ b/GPy/util/symbolic.py
@@ -10,7 +10,7 @@ class ln_diff_erf(Function):
             return -2*exp(-x1**2)/(sqrt(pi)*(erf(x0)-erf(x1)))
         elif argindex == 1:
             x0, x1 = self.args
-            return 2*exp(-x0**2)/(sqrt(pi)*(erf(x0)-erf(x1)))
+            return 2.*exp(-x0**2)/(sqrt(pi)*(erf(x0)-erf(x1)))
         else:
             raise ArgumentIndexError(self, argindex)
         

From f46c72b79b752def2883143bcb90e1cb0394f0ee Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Tue, 19 Nov 2013 06:50:25 +0000
Subject: [PATCH 08/28] Bug fix for single output sympy kernel.

---
 GPy/kern/parts/__init__.py  |  2 +-
 GPy/kern/parts/sympykern.py | 15 +++++++++++----
 GPy/util/datasets.py        | 15 +++++++++++++--
 3 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/GPy/kern/parts/__init__.py b/GPy/kern/parts/__init__.py
index d8e7f8e6..f278941a 100644
--- a/GPy/kern/parts/__init__.py
+++ b/GPy/kern/parts/__init__.py
@@ -14,7 +14,7 @@ import Matern32
 import Matern52
 import mlp
 import ODE_1
-import ODE_UY
+#import ODE_UY
 import periodic_exponential
 import periodic_Matern32
 import periodic_Matern52
diff --git a/GPy/kern/parts/sympykern.py b/GPy/kern/parts/sympykern.py
index 88c179aa..7f7fba11 100644
--- a/GPy/kern/parts/sympykern.py
+++ b/GPy/kern/parts/sympykern.py
@@ -177,8 +177,15 @@ class spkern(Kernpart):
         # Code to compute argument string when only diagonal is required.
         diag_arg_string = re.sub('int jj','//int jj',X_arg_string)
         diag_arg_string = re.sub('j','i',diag_arg_string)
-        diag_precompute_string = precompute_list[0]
-
+        if precompute_string == '':
+            # if it's not multioutput, the precompute strings are set to zero
+            diag_precompute_string = ''
+            diag_precompute_replace = ''
+        else:
+            # for multioutput we need to extract the index of the output form the input.
+            diag_precompute_string = precompute_list[0]
+            diag_precompute_replace = precompute_list[1]
+        
 
         # Here's the code to do the looping for K
         self._K_code =\
@@ -215,13 +222,13 @@ class spkern(Kernpart):
             TARGET2(i, i) += k(%s);
             for (j=0;j<i;j++){
               %s //int jj=(int)X2(j, 1);
-              double kval = k(%s); //double kval = k(X2(i, 0), X2(j, 0), shared_lengthscale, LENGTHSCALE1(ii), SCALE1(ii), LENGTHSCALE1(jj), SCALE1(jj));
+              double kval = k(%s); //double kval = k(X2(i, 0), shared_lengthscale, LENGTHSCALE1(ii), SCALE1(ii));
               TARGET2(i, j) += kval;
               TARGET2(j, i) += kval;
             }
         }
         /*%s*/
-        """%(diag_precompute_string, diag_arg_string, re.sub('Z2', 'X2', precompute_list[1]), X_arg_string,str(self._sp_k)) #adding a string representation forces recompile when needed
+        """%(diag_precompute_string, diag_arg_string, re.sub('Z2', 'X2', diag_precompute_replace), X_arg_string,str(self._sp_k)) #adding a string representation forces recompile when needed
 
         # Code to do the looping for Kdiag
         self._Kdiag_code =\
diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 565f8e76..69f010f9 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -689,7 +689,7 @@ def olympic_marathon_men(data_set='olympic_marathon_men'):
     Y = olympics[:, 1:2]
     return data_details_return({'X': X, 'Y': Y}, data_set)
 
-def olympics():
+def olympic_sprints(data_set='rogers_girolami_data'):
     """All olympics sprint winning times for multiple output prediction."""
     X = np.zeros((0, 2))
     Y = np.zeros((0, 1))
@@ -707,7 +707,18 @@ def olympics():
     data['X'] = X
     data['Y'] = Y
     data['info'] = "Olympics sprint event winning for men and women to 2008. Data is from Rogers and Girolami's First Course in Machine Learning."
-    return data
+    return data_details_return({
+        'X': X,
+        'Y': Y,
+        'info': "Olympics sprint event winning for men and women to 2008. Data is from Rogers and Girolami's First Course in Machine Learning.",
+        'output_info': {
+          0:'100m Men', 
+          1:'100m Women', 
+          2:'200m Men', 
+          3:'200m Women', 
+          4:'400m Men', 
+          5:'400m Women'}
+        }, data_set)
 
 # def movielens_small(partNo=1,seed=default_seed):
 #     np.random.seed(seed=seed)

From 5f3b6bd204624941f130c5d85cf9ca3fc250afd2 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Tue, 19 Nov 2013 09:33:06 +0000
Subject: [PATCH 09/28] Moved data resource information to a json file.

---
 GPy/util/data_resources.json               | 319 +++++++++++++++++++++
 GPy/util/datasets.py                       | 131 +--------
 GPy/util/datasets/data_resources_create.py | 127 ++++++++
 3 files changed, 453 insertions(+), 124 deletions(-)
 create mode 100644 GPy/util/data_resources.json
 create mode 100644 GPy/util/datasets/data_resources_create.py

diff --git a/GPy/util/data_resources.json b/GPy/util/data_resources.json
new file mode 100644
index 00000000..2b36b0c1
--- /dev/null
+++ b/GPy/util/data_resources.json
@@ -0,0 +1,319 @@
+{
+   "rogers_girolami_data":{
+      "files":[
+         [
+            "firstcoursemldata.tar.gz"
+         ]
+      ],
+      "license":null,
+      "citation":"A First Course in Machine Learning. Simon Rogers and Mark Girolami: Chapman & Hall/CRC, ISBN-13: 978-1439824146",
+      "details":"Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.",
+      "urls":[
+         "https://www.dropbox.com/sh/7p6tu1t29idgliq/_XqlH_3nt9/"
+      ],
+      "suffices":[
+         [
+            "?dl=1"
+         ]
+      ],
+      "size":21949154
+   },
+   "ankur_pose_data":{
+      "files":[
+         [
+            "ankurDataPoseSilhouette.mat"
+         ]
+      ],
+      "citation":"3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.",
+      "license":null,
+      "urls":[
+         "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/ankur_pose_data/"
+      ],
+      "details":"Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing."
+   },
+   "osu_accad":{
+      "files":[
+         [
+            "swagger1TXT.ZIP",
+            "handspring1TXT.ZIP",
+            "quickwalkTXT.ZIP",
+            "run1TXT.ZIP",
+            "sprintTXT.ZIP",
+            "dogwalkTXT.ZIP",
+            "camper_04TXT.ZIP",
+            "dance_KB3_TXT.ZIP",
+            "per20_TXT.ZIP",
+            "perTWO07_TXT.ZIP",
+            "perTWO13_TXT.ZIP",
+            "perTWO14_TXT.ZIP",
+            "perTWO15_TXT.ZIP",
+            "perTWO16_TXT.ZIP"
+         ],
+         [
+            "connections.txt"
+         ]
+      ],
+      "license":"Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).",
+      "citation":"The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.",
+      "details":"Motion capture data of different motions from the Open Motion Data Project at Ohio State University.",
+      "urls":[
+         "http://accad.osu.edu/research/mocap/data/",
+         "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/stick/"
+      ],
+      "size":15922790
+   },
+   "isomap_face_data":{
+      "files":[
+         [
+            "face_data.mat"
+         ]
+      ],
+      "license":null,
+      "citation":"A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000",
+      "details":"Face data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.",
+      "urls":[
+         "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/isomap_face_data/"
+      ],
+      "size":24229368
+   },
+   "boston_housing":{
+      "files":[
+         [
+            "Index",
+            "housing.data",
+            "housing.names"
+         ]
+      ],
+      "license":null,
+      "citation":"Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.",
+      "details":"The Boston Housing data relates house values in Boston to a range of input variables.",
+      "urls":[
+         "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/"
+      ],
+      "size":51276
+   },
+   "cmu_mocap_full":{
+      "files":[
+         [
+            "allasfamc.zip"
+         ]
+      ],
+      "license":"From http://mocap.cs.cmu.edu. This data is free for use in research projects. You may include this data in commercially-sold products, but you may not resell this data directly, even in converted form. If you publish results obtained using this data, we would appreciate it if you would send the citation to your published paper to jkh+mocap@cs.cmu.edu, and also would add this text to your acknowledgments section: The data used in this project was obtained from mocap.cs.cmu.edu. The database was created with funding from NSF EIA-0196217.",
+      "citation":"Please include this in your acknowledgements: The data used in this project was obtained from mocap.cs.cmu.edu.\nThe database was created with funding from NSF EIA-0196217.",
+      "details":"CMU Motion Capture data base. Captured by a Vicon motion capture system consisting of 12 infrared MX-40 cameras, each of which is capable of recording at 120 Hz with images of 4 megapixel resolution. Motions are captured in a working volume of approximately 3m x 8m. The capture subject wears 41 markers and a stylish black garment.",
+      "urls":[
+         "http://mocap.cs.cmu.edu"
+      ],
+      "size":null
+   },
+   "brendan_faces":{
+      "files":[
+         [
+            "frey_rawface.mat"
+         ]
+      ],
+      "license":null,
+      "citation":"Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.",
+      "details":"A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.",
+      "urls":[
+         "http://www.cs.nyu.edu/~roweis/data/"
+      ],
+      "size":1100584
+   },
+   "olympic_marathon_men":{
+      "files":[
+         [
+            "olympicMarathonTimes.csv"
+         ]
+      ],
+      "license":null,
+      "citation":null,
+      "details":"Olympic mens' marathon gold medal winning times from 1896 to 2012. Time given in pace (minutes per kilometer). Data is originally downloaded and collated from Wikipedia, we are not responsible for errors in the data",
+      "urls":[
+         "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olympic_marathon_men/"
+      ],
+      "size":584
+   },
+   "pumadyn-32nm":{
+      "files":[
+         [
+            "pumadyn-32nm.tar.gz"
+         ]
+      ],
+      "license":"Data is made available by the Delve system at the University of Toronto",
+      "citation":"Created by Zoubin Ghahramani using the Matlab Robotics Toolbox of Peter Corke. Corke, P. I. (1996). A Robotics Toolbox for MATLAB. IEEE Robotics and Automation Magazine, 3 (1): 24-32.",
+      "details":"Pumadyn non linear 32 input data set with moderate noise. See http://www.cs.utoronto.ca/~delve/data/pumadyn/desc.html for details.",
+      "urls":[
+         "ftp://ftp.cs.toronto.edu/pub/neuron/delve/data/tarfiles/pumadyn-family/"
+      ],
+      "size":5861646
+   },
+   "ripley_prnn_data":{
+      "files":[
+         [
+            "Cushings.dat",
+            "README",
+            "crabs.dat",
+            "fglass.dat",
+            "fglass.grp",
+            "pima.te",
+            "pima.tr",
+            "pima.tr2",
+            "synth.te",
+            "synth.tr",
+            "viruses.dat",
+            "virus3.dat"
+         ]
+      ],
+      "license":null,
+      "citation":"Pattern Recognition and Neural Networks by B.D. Ripley (1996) Cambridge University Press ISBN 0 521 46986 7",
+      "details":"Data sets from Brian Ripley's Pattern Recognition and Neural Networks",
+      "urls":[
+         "http://www.stats.ox.ac.uk/pub/PRNN/"
+      ],
+      "size":93565
+   },
+   "three_phase_oil_flow":{
+      "files":[
+         [
+            "DataTrnLbls.txt",
+            "DataTrn.txt",
+            "DataTst.txt",
+            "DataTstLbls.txt",
+            "DataVdn.txt",
+            "DataVdnLbls.txt"
+         ]
+      ],
+      "license":null,
+      "citation":"Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593",
+      "details":"The three phase oil data used initially for demonstrating the Generative Topographic mapping.",
+      "urls":[
+         "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/three_phase_oil_flow/"
+      ],
+      "size":712796
+   },
+   "robot_wireless":{
+      "files":[
+         [
+            "uw-floor.txt"
+         ]
+      ],
+      "license":null,
+      "citation":"WiFi-SLAM using Gaussian Process Latent Variable Models by Brian Ferris, Dieter Fox and Neil Lawrence in IJCAI'07 Proceedings pages 2480-2485. Data used in A Unifying Probabilistic Perspective for Spectral Dimensionality Reduction: Insights and New Models by Neil D. Lawrence, JMLR 13 pg 1609--1638, 2012.",
+      "details":"Data created by Brian Ferris and Dieter Fox. Consists of WiFi access point strengths taken during a circuit of the Paul Allen building at the University of Washington.",
+      "urls":[
+         "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/robot_wireless/"
+      ],
+      "size":284390
+   },
+   "xw_pen":{
+      "files":[
+         [
+            "xw_pen_15.csv"
+         ]
+      ],
+      "license":null,
+      "citation":"Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005",
+      "details":"Accelerometer pen data used for robust regression by Tipping and Lawrence.",
+      "urls":[
+         "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/xw_pen/"
+      ],
+      "size":3410
+   },
+   "swiss_roll":{
+      "files":[
+         [
+            "swiss_roll_data.mat"
+         ]
+      ],
+      "license":null,
+      "citation":"A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000",
+      "details":"Swiss roll data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.",
+      "urls":[
+         "http://isomap.stanford.edu/"
+      ],
+      "size":800256
+   },
+   "osu_run1":{
+      "files":[
+         [
+            "run1TXT.ZIP"
+         ],
+         [
+            "connections.txt"
+         ]
+      ],
+      "license":"Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).",
+      "citation":"The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.",
+      "details":"Motion capture data of a stick man running from the Open Motion Data Project at Ohio State University.",
+      "urls":[
+         "http://accad.osu.edu/research/mocap/data/",
+         "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/stick/"
+      ],
+      "size":338103
+   },
+   "creep_rupture":{
+      "files":[
+         [
+            "creeprupt.tar"
+         ]
+      ],
+      "license":null,
+      "citation":"Materials Algorithms Project Data Library: MAP_DATA_CREEP_RUPTURE. F. Brun and T. Yoshida.",
+      "details":"Provides 2066 creep rupture test results of steels (mainly of two kinds of steels: 2.25Cr and 9-12 wt% Cr ferritic steels). See http://www.msm.cam.ac.uk/map/data/materials/creeprupt-b.html.",
+      "urls":[
+         "http://www.msm.cam.ac.uk/map/data/tar/"
+      ],
+      "size":602797
+   },
+   "olivetti_faces":{
+      "files":[
+         [
+            "att_faces.zip"
+         ],
+         [
+            "olivettifaces.mat"
+         ]
+      ],
+      "license":null,
+      "citation":"Ferdinando Samaria and Andy Harter, Parameterisation of a Stochastic Model for Human Face Identification. Proceedings of 2nd IEEE Workshop on Applications of Computer Vision, Sarasota FL, December 1994",
+      "details":"Olivetti Research Labs Face data base, acquired between December 1992 and December 1994 in the Olivetti Research Lab, Cambridge (which later became AT&T Laboratories, Cambridge). When using these images please give credit to AT&T Laboratories, Cambridge. ",
+      "urls":[
+         "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olivetti_faces/",
+         "http://www.cs.nyu.edu/~roweis/data/"
+      ],
+      "size":8561331
+   },
+   "della_gatta":{
+      "files":[
+         [
+            "DellaGattadata.mat"
+         ]
+      ],
+      "license":null,
+      "citation":"Direct targets of the TRP63 transcription factor revealed by a combination of gene expression profiling and reverse engineering. Giusy Della Gatta, Mukesh Bansal, Alberto Ambesi-Impiombato, Dario Antonini, Caterina Missero, and Diego di Bernardo, Genome Research 2008",
+      "details":"The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA.",
+      "urls":[
+         "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/della_gatta/"
+      ],
+      "size":3729650
+   },
+   "epomeo_gpx":{
+      "files":[
+         [
+            "endomondo_1.gpx",
+            "endomondo_2.gpx",
+            "garmin_watch_via_endomondo.gpx",
+            "viewranger_phone.gpx",
+            "viewranger_tablet.gpx"
+         ]
+      ],
+      "license":null,
+      "citation":"",
+      "details":"Five different GPS traces of the same run up Mount Epomeo in Ischia. The traces are from different sources. endomondo_1 and endomondo_2 are traces from the mobile phone app Endomondo, with a split in the middle. garmin_watch_via_endomondo is the trace from a Garmin watch, with a segment missing about 4 kilometers in. viewranger_phone and viewranger_tablet are traces from a phone and a tablet through the viewranger app. The viewranger_phone data comes from the same mobile phone as the Endomondo data (i.e. there are 3 GPS devices, but one device recorded two traces).",
+      "urls":[
+         "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/epomeo_gpx/"
+      ],
+      "size":2031872
+   }
+}
\ No newline at end of file
diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 69f010f9..f33a2e92 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -7,7 +7,7 @@ import urllib as url
 import zipfile
 import tarfile
 import datetime
-
+import json
 ipython_available=True
 try:
     import IPython
@@ -29,129 +29,10 @@ data_path = os.path.join(os.path.dirname(__file__), 'datasets')
 default_seed = 10000
 overide_manual_authorize=False
 neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/'
-sam_url = 'http://www.cs.nyu.edu/~roweis/data/'
-cmu_url = 'http://mocap.cs.cmu.edu/subjects/'
 
-# Note: there may be a better way of storing data resources, for the
-# moment we are storing them in a dictionary.
-data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'],
-                                       'files' : [['ankurDataPoseSilhouette.mat']],
-                                       'license' : None,
-                                       'citation' : """3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.""",
-                                       'details' : """Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing."""},
-
-                  'boston_housing' : {'urls' : ['http://archive.ics.uci.edu/ml/machine-learning-databases/housing/'],
-                                      'files' : [['Index', 'housing.data', 'housing.names']],
-                                      'citation' : """Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.""",
-                                      'details' : """The Boston Housing data relates house values in Boston to a range of input variables.""",
-                                      'license' : None,
-                                      'size' : 51276
-                                      },
-                  'brendan_faces' : {'urls' : [sam_url],
-                                     'files': [['frey_rawface.mat']],
-                                     'citation' : 'Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.',
-                                     'details' : """A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.""",
-                                     'license': None,
-                                     'size' : 1100584},
-                  'cmu_mocap_full' : {'urls' : ['http://mocap.cs.cmu.edu'],
-                                 'files' : [['allasfamc.zip']],
-                                 'citation' : """Please include this in your acknowledgements: The data used in this project was obtained from mocap.cs.cmu.edu.
-The database was created with funding from NSF EIA-0196217.""",
-                                 'details' : """CMU Motion Capture data base. Captured by a Vicon motion capture system consisting of 12 infrared MX-40 cameras, each of which is capable of recording at 120 Hz with images of 4 megapixel resolution. Motions are captured in a working volume of approximately 3m x 8m. The capture subject wears 41 markers and a stylish black garment.""",
-                                 'license' : """From http://mocap.cs.cmu.edu. This data is free for use in research projects. You may include this data in commercially-sold products, but you may not resell this data directly, even in converted form. If you publish results obtained using this data, we would appreciate it if you would send the citation to your published paper to jkh+mocap@cs.cmu.edu, and also would add this text to your acknowledgments section: The data used in this project was obtained from mocap.cs.cmu.edu. The database was created with funding from NSF EIA-0196217.""",
-                                 'size' : None},
-                  'creep_rupture' : {'urls' : ['http://www.msm.cam.ac.uk/map/data/tar/'],
-                                     'files' : [['creeprupt.tar']],
-                                     'citation' : 'Materials Algorithms Project Data Library: MAP_DATA_CREEP_RUPTURE. F. Brun and T. Yoshida.',
-                                     'details' : """Provides 2066 creep rupture test results of steels (mainly of two kinds of steels: 2.25Cr and 9-12 wt% Cr ferritic steels). See http://www.msm.cam.ac.uk/map/data/materials/creeprupt-b.html.""",
-                                     'license' : None,
-                                     'size' : 602797},
-                  'della_gatta' : {'urls' : [neil_url + 'della_gatta/'],
-                                   'files': [['DellaGattadata.mat']],
-                                   'citation' : 'Direct targets of the TRP63 transcription factor revealed by a combination of gene expression profiling and reverse engineering. Giusy Della Gatta, Mukesh Bansal, Alberto Ambesi-Impiombato, Dario Antonini, Caterina Missero, and Diego di Bernardo, Genome Research 2008',
-                                   'details': "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA.",
-                                   'license':None,
-                                   'size':3729650},
-                  'epomeo_gpx' : {'urls' : [neil_url + 'epomeo_gpx/'],
-                                   'files': [['endomondo_1.gpx', 'endomondo_2.gpx', 'garmin_watch_via_endomondo.gpx','viewranger_phone.gpx','viewranger_tablet.gpx']],
-                                   'citation' : '',
-                                   'details': "Five different GPS traces of the same run up Mount Epomeo in Ischia. The traces are from different sources. endomondo_1 and endomondo_2 are traces from the mobile phone app Endomondo, with a split in the middle. garmin_watch_via_endomondo is the trace from a Garmin watch, with a segment missing about 4 kilometers in. viewranger_phone and viewranger_tablet are traces from a phone and a tablet through the viewranger app. The viewranger_phone data comes from the same mobile phone as the Endomondo data (i.e. there are 3 GPS devices, but one device recorded two traces).",
-                                   'license':None,
-                                   'size': 2031872},
-                  'three_phase_oil_flow': {'urls' : [neil_url + 'three_phase_oil_flow/'],
-                                           'files' : [['DataTrnLbls.txt', 'DataTrn.txt', 'DataTst.txt', 'DataTstLbls.txt', 'DataVdn.txt', 'DataVdnLbls.txt']],
-                                           'citation' : 'Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593',
-                                           'details' : """The three phase oil data used initially for demonstrating the Generative Topographic mapping.""",
-                                           'license' : None,
-                                           'size' : 712796},
-                  'rogers_girolami_data' : {'urls' : ['https://www.dropbox.com/sh/7p6tu1t29idgliq/_XqlH_3nt9/'],
-                                            'files' : [['firstcoursemldata.tar.gz']],
-                                            'suffices' : [['?dl=1']],
-                                            'citation' : 'A First Course in Machine Learning. Simon Rogers and Mark Girolami: Chapman & Hall/CRC, ISBN-13: 978-1439824146',
-                                            'details' : """Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.""",
-                                            'license' : None,
-                                            'size' : 21949154},
-                  'olivetti_faces' : {'urls' : [neil_url + 'olivetti_faces/', sam_url],
-                                      'files' : [['att_faces.zip'], ['olivettifaces.mat']],
-                                            'citation' : 'Ferdinando Samaria and Andy Harter, Parameterisation of a Stochastic Model for Human Face Identification. Proceedings of 2nd IEEE Workshop on Applications of Computer Vision, Sarasota FL, December 1994',
-                                            'details' : """Olivetti Research Labs Face data base, acquired between December 1992 and December 1994 in the Olivetti Research Lab, Cambridge (which later became AT&T Laboratories, Cambridge). When using these images please give credit to AT&T Laboratories, Cambridge. """,
-                                            'license': None,
-                                            'size' : 8561331},
-                  'olympic_marathon_men' : {'urls' : [neil_url + 'olympic_marathon_men/'],
-                                            'files' : [['olympicMarathonTimes.csv']],
-                                            'citation' : None,
-                                            'details' : """Olympic mens' marathon gold medal winning times from 1896 to 2012. Time given in pace (minutes per kilometer). Data is originally downloaded and collated from Wikipedia, we are not responsible for errors in the data""",
-                                            'license': None,
-                                            'size' : 584},
-                  'osu_run1' : {'urls': ['http://accad.osu.edu/research/mocap/data/', neil_url + 'stick/'],
-                                'files': [['run1TXT.ZIP'],['connections.txt']],
-                                'details' : "Motion capture data of a stick man running from the Open Motion Data Project at Ohio State University.",
-                                'citation' : 'The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.',
-                                'license' : 'Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).',
-                                'size': 338103},
-                  'osu_accad' : {'urls': ['http://accad.osu.edu/research/mocap/data/', neil_url + 'stick/'],
-                                'files': [['swagger1TXT.ZIP','handspring1TXT.ZIP','quickwalkTXT.ZIP','run1TXT.ZIP','sprintTXT.ZIP','dogwalkTXT.ZIP','camper_04TXT.ZIP','dance_KB3_TXT.ZIP','per20_TXT.ZIP','perTWO07_TXT.ZIP','perTWO13_TXT.ZIP','perTWO14_TXT.ZIP','perTWO15_TXT.ZIP','perTWO16_TXT.ZIP'],['connections.txt']],
-                                'details' : "Motion capture data of different motions from the Open Motion Data Project at Ohio State University.",
-                                'citation' : 'The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.',
-                                'license' : 'Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).',
-                                'size': 15922790},
-                  'pumadyn-32nm' : {'urls' : ['ftp://ftp.cs.toronto.edu/pub/neuron/delve/data/tarfiles/pumadyn-family/'],
-                                    'files' : [['pumadyn-32nm.tar.gz']],
-                                    'details' : """Pumadyn non linear 32 input data set with moderate noise. See http://www.cs.utoronto.ca/~delve/data/pumadyn/desc.html for details.""",
-                                    'citation' : """Created by Zoubin Ghahramani using the Matlab Robotics Toolbox of Peter Corke. Corke, P. I. (1996). A Robotics Toolbox for MATLAB. IEEE Robotics and Automation Magazine, 3 (1): 24-32.""",
-                                    'license' : """Data is made available by the Delve system at the University of Toronto""",
-                                    'size' : 5861646},
-                  'robot_wireless' : {'urls' : [neil_url + 'robot_wireless/'],
-                                      'files' : [['uw-floor.txt']],
-                                      'citation' : """WiFi-SLAM using Gaussian Process Latent Variable Models by Brian Ferris, Dieter Fox and Neil Lawrence in IJCAI'07 Proceedings pages 2480-2485. Data used in A Unifying Probabilistic Perspective for Spectral Dimensionality Reduction: Insights and New Models by Neil D. Lawrence, JMLR 13 pg 1609--1638, 2012.""",
-                                      'details' : """Data created by Brian Ferris and Dieter Fox. Consists of WiFi access point strengths taken during a circuit of the Paul Allen building at the University of Washington.""",
-                                      'license' : None,
-                                      'size' : 284390},
-                  'swiss_roll' : {'urls' : ['http://isomap.stanford.edu/'],
-                                  'files' : [['swiss_roll_data.mat']],
-                                  'details' : """Swiss roll data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.""",
-                                  'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000',
-                                  'license' : None,
-                                  'size' : 800256},
-                  'ripley_prnn_data' : {'urls' : ['http://www.stats.ox.ac.uk/pub/PRNN/'],
-                                        'files' : [['Cushings.dat', 'README', 'crabs.dat', 'fglass.dat', 'fglass.grp', 'pima.te', 'pima.tr', 'pima.tr2', 'synth.te', 'synth.tr', 'viruses.dat', 'virus3.dat']],
-                                        'details' : """Data sets from Brian Ripley's Pattern Recognition and Neural Networks""",
-                                        'citation': """Pattern Recognition and Neural Networks by B.D. Ripley (1996) Cambridge University Press ISBN 0 521 46986 7""",
-                                        'license' : None,
-                                        'size' : 93565},
-                  'isomap_face_data' : {'urls' : [neil_url + 'isomap_face_data/'],
-                                        'files' : [['face_data.mat']],
-                                        'details' : """Face data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.""",
-                                        'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000',
-                                        'license' : None,
-                                        'size' : 24229368},
-                  'xw_pen' : {'urls' : [neil_url + 'xw_pen/'],
-                                        'files' : [['xw_pen_15.csv']],
-                                        'details' : """Accelerometer pen data used for robust regression by Tipping and Lawrence.""",
-                                        'citation' : 'Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005',
-                                        'license' : None,
-                                        'size' : 3410}
-                  }
+# Read data resources from json file.
+json_data=open('data_resources.json').read()
+data_resources = json.loads(json_data)
 
 
 def prompt_user(prompt):
@@ -623,7 +504,7 @@ def xw_pen(data_set='xw_pen'):
     return data_details_return({'Y': Y, 'X': X, 'info': "Tilt data from a personalized digital assistant pen. Plot in original paper showed regression between time steps 175 and 275."}, data_set)
 
 
-def download_rogers_girolami_data():
+def download_rogers_girolami_data(data_set='rogers_girolami_data'):
     if not data_available('rogers_girolami_data'):
         download_data(data_set)
         path = os.path.join(data_path, data_set)
@@ -909,3 +790,5 @@ def cmu_mocap(subject, train_motions, test_motions=[], sample_every=4, data_set=
     if sample_every != 1:
         info += ' Data is sub-sampled to every ' + str(sample_every) + ' frames.'
     return data_details_return({'Y': Y, 'lbls' : lbls, 'Ytest': Ytest, 'lblstest' : lblstest, 'info': info, 'skel': skel}, data_set)
+
+
diff --git a/GPy/util/datasets/data_resources_create.py b/GPy/util/datasets/data_resources_create.py
new file mode 100644
index 00000000..8ae62a85
--- /dev/null
+++ b/GPy/util/datasets/data_resources_create.py
@@ -0,0 +1,127 @@
+import json
+
+neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/'
+sam_url = 'http://www.cs.nyu.edu/~roweis/data/'
+cmu_url = 'http://mocap.cs.cmu.edu/subjects/'
+
+data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'],
+                                       'files' : [['ankurDataPoseSilhouette.mat']],
+                                       'license' : None,
+                                       'citation' : """3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.""",
+                                       'details' : """Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing."""},
+
+                  'boston_housing' : {'urls' : ['http://archive.ics.uci.edu/ml/machine-learning-databases/housing/'],
+                                      'files' : [['Index', 'housing.data', 'housing.names']],
+                                      'citation' : """Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.""",
+                                      'details' : """The Boston Housing data relates house values in Boston to a range of input variables.""",
+                                      'license' : None,
+                                      'size' : 51276
+                                      },
+                  'brendan_faces' : {'urls' : [sam_url],
+                                     'files': [['frey_rawface.mat']],
+                                     'citation' : 'Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.',
+                                     'details' : """A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.""",
+                                     'license': None,
+                                     'size' : 1100584},
+                  'cmu_mocap_full' : {'urls' : ['http://mocap.cs.cmu.edu'],
+                                 'files' : [['allasfamc.zip']],
+                                 'citation' : """Please include this in your acknowledgements: The data used in this project was obtained from mocap.cs.cmu.edu.
+The database was created with funding from NSF EIA-0196217.""",
+                                 'details' : """CMU Motion Capture data base. Captured by a Vicon motion capture system consisting of 12 infrared MX-40 cameras, each of which is capable of recording at 120 Hz with images of 4 megapixel resolution. Motions are captured in a working volume of approximately 3m x 8m. The capture subject wears 41 markers and a stylish black garment.""",
+                                 'license' : """From http://mocap.cs.cmu.edu. This data is free for use in research projects. You may include this data in commercially-sold products, but you may not resell this data directly, even in converted form. If you publish results obtained using this data, we would appreciate it if you would send the citation to your published paper to jkh+mocap@cs.cmu.edu, and also would add this text to your acknowledgments section: The data used in this project was obtained from mocap.cs.cmu.edu. The database was created with funding from NSF EIA-0196217.""",
+                                 'size' : None},
+                  'creep_rupture' : {'urls' : ['http://www.msm.cam.ac.uk/map/data/tar/'],
+                                     'files' : [['creeprupt.tar']],
+                                     'citation' : 'Materials Algorithms Project Data Library: MAP_DATA_CREEP_RUPTURE. F. Brun and T. Yoshida.',
+                                     'details' : """Provides 2066 creep rupture test results of steels (mainly of two kinds of steels: 2.25Cr and 9-12 wt% Cr ferritic steels). See http://www.msm.cam.ac.uk/map/data/materials/creeprupt-b.html.""",
+                                     'license' : None,
+                                     'size' : 602797},
+                  'della_gatta' : {'urls' : [neil_url + 'della_gatta/'],
+                                   'files': [['DellaGattadata.mat']],
+                                   'citation' : 'Direct targets of the TRP63 transcription factor revealed by a combination of gene expression profiling and reverse engineering. Giusy Della Gatta, Mukesh Bansal, Alberto Ambesi-Impiombato, Dario Antonini, Caterina Missero, and Diego di Bernardo, Genome Research 2008',
+                                   'details': "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA.",
+                                   'license':None,
+                                   'size':3729650},
+                  'epomeo_gpx' : {'urls' : [neil_url + 'epomeo_gpx/'],
+                                   'files': [['endomondo_1.gpx', 'endomondo_2.gpx', 'garmin_watch_via_endomondo.gpx','viewranger_phone.gpx','viewranger_tablet.gpx']],
+                                   'citation' : '',
+                                   'details': "Five different GPS traces of the same run up Mount Epomeo in Ischia. The traces are from different sources. endomondo_1 and endomondo_2 are traces from the mobile phone app Endomondo, with a split in the middle. garmin_watch_via_endomondo is the trace from a Garmin watch, with a segment missing about 4 kilometers in. viewranger_phone and viewranger_tablet are traces from a phone and a tablet through the viewranger app. The viewranger_phone data comes from the same mobile phone as the Endomondo data (i.e. there are 3 GPS devices, but one device recorded two traces).",
+                                   'license':None,
+                                   'size': 2031872},
+                  'three_phase_oil_flow': {'urls' : [neil_url + 'three_phase_oil_flow/'],
+                                           'files' : [['DataTrnLbls.txt', 'DataTrn.txt', 'DataTst.txt', 'DataTstLbls.txt', 'DataVdn.txt', 'DataVdnLbls.txt']],
+                                           'citation' : 'Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593',
+                                           'details' : """The three phase oil data used initially for demonstrating the Generative Topographic mapping.""",
+                                           'license' : None,
+                                           'size' : 712796},
+                  'rogers_girolami_data' : {'urls' : ['https://www.dropbox.com/sh/7p6tu1t29idgliq/_XqlH_3nt9/'],
+                                            'files' : [['firstcoursemldata.tar.gz']],
+                                            'suffices' : [['?dl=1']],
+                                            'citation' : 'A First Course in Machine Learning. Simon Rogers and Mark Girolami: Chapman & Hall/CRC, ISBN-13: 978-1439824146',
+                                            'details' : """Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.""",
+                                            'license' : None,
+                                            'size' : 21949154},
+                  'olivetti_faces' : {'urls' : [neil_url + 'olivetti_faces/', sam_url],
+                                      'files' : [['att_faces.zip'], ['olivettifaces.mat']],
+                                            'citation' : 'Ferdinando Samaria and Andy Harter, Parameterisation of a Stochastic Model for Human Face Identification. Proceedings of 2nd IEEE Workshop on Applications of Computer Vision, Sarasota FL, December 1994',
+                                            'details' : """Olivetti Research Labs Face data base, acquired between December 1992 and December 1994 in the Olivetti Research Lab, Cambridge (which later became AT&T Laboratories, Cambridge). When using these images please give credit to AT&T Laboratories, Cambridge. """,
+                                            'license': None,
+                                            'size' : 8561331},
+                  'olympic_marathon_men' : {'urls' : [neil_url + 'olympic_marathon_men/'],
+                                            'files' : [['olympicMarathonTimes.csv']],
+                                            'citation' : None,
+                                            'details' : """Olympic mens' marathon gold medal winning times from 1896 to 2012. Time given in pace (minutes per kilometer). Data is originally downloaded and collated from Wikipedia, we are not responsible for errors in the data""",
+                                            'license': None,
+                                            'size' : 584},
+                  'osu_run1' : {'urls': ['http://accad.osu.edu/research/mocap/data/', neil_url + 'stick/'],
+                                'files': [['run1TXT.ZIP'],['connections.txt']],
+                                'details' : "Motion capture data of a stick man running from the Open Motion Data Project at Ohio State University.",
+                                'citation' : 'The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.',
+                                'license' : 'Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).',
+                                'size': 338103},
+                  'osu_accad' : {'urls': ['http://accad.osu.edu/research/mocap/data/', neil_url + 'stick/'],
+                                'files': [['swagger1TXT.ZIP','handspring1TXT.ZIP','quickwalkTXT.ZIP','run1TXT.ZIP','sprintTXT.ZIP','dogwalkTXT.ZIP','camper_04TXT.ZIP','dance_KB3_TXT.ZIP','per20_TXT.ZIP','perTWO07_TXT.ZIP','perTWO13_TXT.ZIP','perTWO14_TXT.ZIP','perTWO15_TXT.ZIP','perTWO16_TXT.ZIP'],['connections.txt']],
+                                'details' : "Motion capture data of different motions from the Open Motion Data Project at Ohio State University.",
+                                'citation' : 'The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.',
+                                'license' : 'Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).',
+                                'size': 15922790},
+                  'pumadyn-32nm' : {'urls' : ['ftp://ftp.cs.toronto.edu/pub/neuron/delve/data/tarfiles/pumadyn-family/'],
+                                    'files' : [['pumadyn-32nm.tar.gz']],
+                                    'details' : """Pumadyn non linear 32 input data set with moderate noise. See http://www.cs.utoronto.ca/~delve/data/pumadyn/desc.html for details.""",
+                                    'citation' : """Created by Zoubin Ghahramani using the Matlab Robotics Toolbox of Peter Corke. Corke, P. I. (1996). A Robotics Toolbox for MATLAB. IEEE Robotics and Automation Magazine, 3 (1): 24-32.""",
+                                    'license' : """Data is made available by the Delve system at the University of Toronto""",
+                                    'size' : 5861646},
+                  'robot_wireless' : {'urls' : [neil_url + 'robot_wireless/'],
+                                      'files' : [['uw-floor.txt']],
+                                      'citation' : """WiFi-SLAM using Gaussian Process Latent Variable Models by Brian Ferris, Dieter Fox and Neil Lawrence in IJCAI'07 Proceedings pages 2480-2485. Data used in A Unifying Probabilistic Perspective for Spectral Dimensionality Reduction: Insights and New Models by Neil D. Lawrence, JMLR 13 pg 1609--1638, 2012.""",
+                                      'details' : """Data created by Brian Ferris and Dieter Fox. Consists of WiFi access point strengths taken during a circuit of the Paul Allen building at the University of Washington.""",
+                                      'license' : None,
+                                      'size' : 284390},
+                  'swiss_roll' : {'urls' : ['http://isomap.stanford.edu/'],
+                                  'files' : [['swiss_roll_data.mat']],
+                                  'details' : """Swiss roll data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.""",
+                                  'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000',
+                                  'license' : None,
+                                  'size' : 800256},
+                  'ripley_prnn_data' : {'urls' : ['http://www.stats.ox.ac.uk/pub/PRNN/'],
+                                        'files' : [['Cushings.dat', 'README', 'crabs.dat', 'fglass.dat', 'fglass.grp', 'pima.te', 'pima.tr', 'pima.tr2', 'synth.te', 'synth.tr', 'viruses.dat', 'virus3.dat']],
+                                        'details' : """Data sets from Brian Ripley's Pattern Recognition and Neural Networks""",
+                                        'citation': """Pattern Recognition and Neural Networks by B.D. Ripley (1996) Cambridge University Press ISBN 0 521 46986 7""",
+                                        'license' : None,
+                                        'size' : 93565},
+                  'isomap_face_data' : {'urls' : [neil_url + 'isomap_face_data/'],
+                                        'files' : [['face_data.mat']],
+                                        'details' : """Face data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.""",
+                                        'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000',
+                                        'license' : None,
+                                        'size' : 24229368},
+                  'xw_pen' : {'urls' : [neil_url + 'xw_pen/'],
+                                        'files' : [['xw_pen_15.csv']],
+                                        'details' : """Accelerometer pen data used for robust regression by Tipping and Lawrence.""",
+                                        'citation' : 'Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005',
+                                        'license' : None,
+                                        'size' : 3410}
+                  }
+
+with open('data_resources.json', 'w') as file:
+    json.dump(data_resources, file)

From fca3287e9c5c042c044361bd35ceb87287aa843a Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 19 Nov 2013 16:54:07 +0000
Subject: [PATCH 10/28] added a path for the data resources. not all users will
 be working in the GPy directory.

---
 GPy/util/datasets.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index f33a2e92..732e2a1b 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -31,7 +31,8 @@ overide_manual_authorize=False
 neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/'
 
 # Read data resources from json file.
-json_data=open('data_resources.json').read()
+path = os.path.join(os.path.dirname(__file__), 'data_resources.json')
+json_data=open(path).read()
 data_resources = json.loads(json_data)
 
 

From 4948fb1345ac034af8e337ff5c90dfa406a5f478 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 20 Nov 2013 11:45:33 +0000
Subject: [PATCH 11/28] updated crossterms, rbf x any not working yet
 (derivatives)

---
 GPy/kern/kern.py      | 208 +++++++++++++++++++++++++++++-------------
 GPy/kern/parts/rbf.py |  21 ++---
 2 files changed, 155 insertions(+), 74 deletions(-)

diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index d686064a..5cd5b6aa 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -456,7 +456,7 @@ class kern(Parameterized):
         from parts.linear import Linear
         from parts.fixed import Fixed
 
-        for (p1, i1), (p2, i2) in itertools.combinations(itertools.izip(self.parts, self.param_slices), 2):
+        for (p1, i1), (p2, i2) in itertools.combinations(itertools.izip(self.parts, self.input_slices), 2):
             # white doesn;t combine with anything
             if isinstance(p1, White) or isinstance(p2, White):
                 pass
@@ -466,28 +466,30 @@ class kern(Parameterized):
             elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
                 target += p2.variance * (p1._psi1[:, :, None] + p1._psi1[:, None, :])
             # linear X bias
-            elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, Linear):
+            elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (Linear, RBF, RBFInv)):
                 tmp = np.zeros((mu.shape[0], Z.shape[0]))
                 p2.psi1(Z, mu, S, tmp)
                 target += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
-            elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, Linear):
+            elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (Linear, RBF, RBFInv)):
                 tmp = np.zeros((mu.shape[0], Z.shape[0]))
                 p1.psi1(Z, mu, S, tmp)
                 target += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
             # rbf X any
-            elif isinstance(p1, (RBF, RBFInv)):
-                psi11 = np.zeros((mu.shape[0], Z.shape[0]))
-                psi12 = np.zeros((mu.shape[0], Z.shape[0]))
+            elif False:#isinstance(p1, (RBF, RBFInv)) or isinstance(p2, (RBF, RBFInv)):
+                if isinstance(p2, (RBF, RBFInv)) and not isinstance(p1, (RBF, RBFInv)):
+                    p1t = p1; p1 = p2; p2 = p1t; del p1t  
+                N, M = mu.shape[0], Z.shape[0]; NM=N*M
+                psi11 = np.zeros((N, M))
+                psi12 = np.zeros((NM, M))
                 p1.psi1(Z, mu, S, psi11)
-                p2.psi1(Z, mu, S, psi12)
+                Mu, Sigma = p1._crossterm_mu_S(Z, mu, S)
+                Mu, Sigma = Mu.reshape(NM,self.input_dim), Sigma.reshape(NM,self.input_dim)
                 
-                crossterms  = psi11[:, :, None] + psi12[:, None, :]
-                crossterms += psi12[:, :, None] + psi11[:, None, :]
-                
-                target += p1._crossterm_product_expectation(p2, Z, mu, S)
+                p2.psi1(Z, Mu, Sigma, psi12)
+                eK2 = psi12.reshape(N, M, M)
+                crossterms = eK2 * (psi11[:, :, None] + psi11[:, None, :])
+                target += crossterms
                 #import ipdb;ipdb.set_trace()
-            elif isinstance(p2, (RBF, RBFInv)):
-                raise NotImplementedError # TODO
             else:
                 raise NotImplementedError, "psi2 cannot be computed for this kernel"
         return target        
@@ -496,40 +498,81 @@ class kern(Parameterized):
         target = np.zeros(self.num_params)
         [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)]
 
+        from parts.white import White
+        from parts.rbf import RBF
+        from parts.rbf_inv import RBFInv
+        from parts.bias import Bias
+        from parts.linear import Linear
+        from parts.fixed import Fixed
+
         # compute the "cross" terms
         # TODO: better looping, input_slices
         for i1, i2 in itertools.combinations(range(len(self.parts)), 2):
             p1, p2 = self.parts[i1], self.parts[i2]
-#             ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
-            ps1, ps2 = self.param_slices[i1], self.param_slices[i2]
-
-            # white doesn;t combine with anything
-            if p1.name == 'white' or p2.name == 'white':
+            #ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
+            ps1, ps2 = self.param_slices[i1], self.param_slices[i2]            
+            if isinstance(p1, White) or isinstance(p2, White):
                 pass
             # rbf X bias
-            elif p1.name == 'bias' and p2.name == 'rbf':
+            elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
                 p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2])
                 p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2._psi1 * 2., Z, mu, S, target[ps1])
-            elif p2.name == 'bias' and p1.name == 'rbf':
+            elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
                 p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target[ps1])
                 p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1._psi1 * 2., Z, mu, S, target[ps2])
             # linear X bias
-            elif p1.name == 'bias' and p2.name == 'linear':
+            elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, Linear):
                 p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2]) # [ps1])
                 psi1 = np.zeros((mu.shape[0], Z.shape[0]))
                 p2.psi1(Z, mu, S, psi1)
                 p1.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps1])
-            elif p2.name == 'bias' and p1.name == 'linear':
+            elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, Linear):
                 p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target[ps1])
                 psi1 = np.zeros((mu.shape[0], Z.shape[0]))
                 p1.psi1(Z, mu, S, psi1)
                 p2.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps2])
             # rbf X any
-            
-            elif p1.name == 'linear' and p2.name == 'rbf':
-                raise NotImplementedError # TODO
-            elif p2.name == 'linear' and p1.name == 'rbf':
-                raise NotImplementedError # TODO
+            elif False:#isinstance(p1, (RBF, RBFInv)) or isinstance(p2, (RBF, RBFInv)):
+                if isinstance(p2, (RBF, RBFInv)) and not isinstance(p1, (RBF, RBFInv)):
+                    # turn around to have rbf in front
+                    p1, p2 = self.parts[i2], self.parts[i1]
+                    ps1, ps2 = self.param_slices[i2], self.param_slices[i1]  
+                
+                N, M = mu.shape[0], Z.shape[0]; NM=N*M
+
+                psi11 = np.zeros((N, M))
+                p1.psi1(Z, mu, S, psi11)
+                
+                Mu, Sigma = p1._crossterm_mu_S(Z, mu, S)
+                Mu, Sigma = Mu.reshape(NM,self.input_dim), Sigma.reshape(NM,self.input_dim)
+                
+                tmp1 = np.zeros_like(target[ps1])
+                tmp2 = np.zeros_like(target[ps2])
+#                 for n in range(N):
+#                     for m in range(M):
+#                         for m_prime in range(M):
+#                             p1.dpsi1_dtheta((dL_dpsi2[n:n+1,m:m+1,m_prime:m_prime+1]*psi12_t.reshape(N,M,M)[n:n+1,m:m+1,m_prime:m_prime+1])[0], Z[m:m+1], mu[n:n+1], S[n:n+1], tmp2)#Z[m_prime:m_prime+1], mu[n:n+1], S[n:n+1], tmp2)
+#                             p1.dpsi1_dtheta((dL_dpsi2[n:n+1,m:m+1,m_prime:m_prime+1]*psi12_t.reshape(N,M,M)[n:n+1,m_prime:m_prime+1,m:m+1])[0], Z[m_prime:m_prime+1], mu[n:n+1], S[n:n+1], tmp2)
+#                             Mu, Sigma= Mu.reshape(N,M,self.input_dim), Sigma.reshape(N,M,self.input_dim)
+#                             p2.dpsi1_dtheta((dL_dpsi2[n:n+1,m:m+1,m_prime:m_prime+1]*(psi11[n:n+1,m_prime:m_prime+1]))[0], Z[m:m+1], Mu[n:n+1,m], Sigma[n:n+1,m], target[ps2])
+#                             p2.dpsi1_dtheta((dL_dpsi2[n:n+1,m:m+1,m_prime:m_prime+1]*(psi11[n:n+1,m:m+1]))[0], Z[m_prime:m_prime+1], Mu[n:n+1, m_prime], Sigma[n:n+1, m_prime], target[ps2])#Z[m_prime:m_prime+1], Mu[n+m:(n+m)+1], Sigma[n+m:(n+m)+1], target[ps2])
+                
+                if isinstance(p1, RBF) and isinstance(p2, RBF):
+                    psi12 = np.zeros((N, M))
+                    p2.psi1(Z, mu, S, psi12)
+                    Mu2, Sigma2 = p2._crossterm_mu_S(Z, mu, S)
+                    Mu2, Sigma2 = Mu2.reshape(NM,self.input_dim), Sigma2.reshape(NM,self.input_dim)
+                    p1.dpsi1_dtheta((dL_dpsi2*(psi12[:,:,None] + psi12[:,None,:])).reshape(NM,M), Z, Mu2, Sigma2, tmp1)
+                    pass
+
+                if isinstance(p1, RBF) and isinstance(p2, Linear):
+                    #import ipdb;ipdb.set_trace()
+                    pass
+                
+                p2.dpsi1_dtheta((dL_dpsi2*(psi11[:,:,None] + psi11[:,None,:])).reshape(NM,M), Z, Mu, Sigma, tmp2)
+                
+                target[ps1] += tmp1
+                target[ps2] += tmp2                
             else:
                 raise NotImplementedError, "psi2 cannot be computed for this kernel"
 
@@ -539,61 +582,102 @@ class kern(Parameterized):
         target = np.zeros_like(Z)
         [p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
 
+        from parts.white import White
+        from parts.rbf import RBF
+        from parts.rbf_inv import RBFInv
+        from parts.bias import Bias
+        from parts.linear import Linear
+        from parts.fixed import Fixed
+
         # compute the "cross" terms
-        # TODO: we need input_slices here.
+        # TODO: better looping, input_slices
         for p1, p2 in itertools.combinations(self.parts, 2):
-            # white doesn;t combine with anything
-            if p1.name == 'white' or p2.name == 'white':
+            if isinstance(p1, White) or isinstance(p2, White):
                 pass
             # rbf X bias
-            elif p1.name == 'bias' and p2.name == 'rbf':
-                p2.dpsi1_dX(dL_dpsi2.sum(1).T * p1.variance, Z, mu, S, target)
-            elif p2.name == 'bias' and p1.name == 'rbf':
-                p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target)
+            elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
+                p2.dpsi1_dZ(dL_dpsi2.sum(1) * p1.variance, Z, mu, S, target)
+            elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
+                p1.dpsi1_dZ(dL_dpsi2.sum(1) * p2.variance, Z, mu, S, target)
             # linear X bias
-            elif p1.name == 'bias' and p2.name == 'linear':
-                p2.dpsi1_dZ(dL_dpsi2.sum(1).T * p1.variance, Z, mu, S, target)
-            elif p2.name == 'bias' and p1.name == 'linear':
-                p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target)
-            # rbf X linear
-            elif p1.name == 'linear' and p2.name == 'rbf':
-                raise NotImplementedError # TODO
-            elif p2.name == 'linear' and p1.name == 'rbf':
-                raise NotImplementedError # TODO
+            elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, Linear):
+                p2.dpsi1_dZ(dL_dpsi2.sum(1) * p1.variance, Z, mu, S, target)
+            elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, Linear):
+                p1.dpsi1_dZ(dL_dpsi2.sum(1) * p2.variance, Z, mu, S, target)
+            # rbf X any
+            elif False:#isinstance(p1, (RBF, RBFInv)) or isinstance(p2, (RBF, RBFInv)):
+                if isinstance(p2, (RBF, RBFInv)) and not isinstance(p1, (RBF, RBFInv)):
+                    p1t = p1; p1 = p2; p2 = p1t; del p1t  
+                N, M = mu.shape[0], Z.shape[0]; NM=N*M
+                psi11 = np.zeros((N, M))
+                psi12 = np.zeros((NM, M))
+                #psi12_t = np.zeros((N,M))
+                
+                p1.psi1(Z, mu, S, psi11)
+                Mu, Sigma = p1._crossterm_mu_S(Z, mu, S)
+                Mu, Sigma = Mu.reshape(NM,self.input_dim), Sigma.reshape(NM,self.input_dim)
+                
+                p2.psi1(Z, Mu, Sigma, psi12)
+                tmp1 = np.zeros_like(target)
+                p1.dpsi1_dZ((dL_dpsi2*psi12.reshape(N,M,M)).sum(1), Z, mu, S, tmp1)
+                p1.dpsi1_dZ((dL_dpsi2*psi12.reshape(N,M,M)).sum(2), Z, mu, S, tmp1)
+                target += tmp1
+                
+                #p2.dpsi1_dtheta((dL_dpsi2*(psi11[:,:,None] + psi11[:,None,:])).reshape(NM,M), Z, Mu, Sigma, target)
+                p2.dpsi1_dZ((dL_dpsi2*(psi11[:,:,None] + psi11[:,None,:])).reshape(NM,M), Z, Mu, Sigma, target)
             else:
                 raise NotImplementedError, "psi2 cannot be computed for this kernel"
-
-        return target * 2.
+        return target * 2
 
     def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S):
         target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
         [p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
 
+        from parts.white import White
+        from parts.rbf import RBF
+        from parts.rbf_inv import RBFInv
+        from parts.bias import Bias
+        from parts.linear import Linear
+        from parts.fixed import Fixed
+
         # compute the "cross" terms
-        # TODO: we need input_slices here.
+        # TODO: better looping, input_slices
         for p1, p2 in itertools.combinations(self.parts, 2):
-            # white doesn;t combine with anything
-            if p1.name == 'white' or p2.name == 'white':
+            if isinstance(p1, White) or isinstance(p2, White):
                 pass
             # rbf X bias
-            elif p1.name == 'bias' and p2.name == 'rbf':
-                p2.dpsi1_dmuS(dL_dpsi2.sum(1).T * p1.variance * 2., Z, mu, S, target_mu, target_S)
-            elif p2.name == 'bias' and p1.name == 'rbf':
-                p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S)
+            elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
+                p2.dpsi1_dmuS(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target_mu, target_S)
+            elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
+                p1.dpsi1_dmuS(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target_mu, target_S)
             # linear X bias
-            elif p1.name == 'bias' and p2.name == 'linear':
-                p2.dpsi1_dmuS(dL_dpsi2.sum(1).T * p1.variance * 2., Z, mu, S, target_mu, target_S)
-            elif p2.name == 'bias' and p1.name == 'linear':
-                p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S)
-            # rbf X linear
-            elif p1.name == 'linear' and p2.name == 'rbf':
-                raise NotImplementedError # TODO
-            elif p2.name == 'linear' and p1.name == 'rbf':
-                raise NotImplementedError # TODO
+            elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, Linear):
+                p2.dpsi1_dmuS(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target_mu, target_S)
+            elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, Linear):
+                p1.dpsi1_dmuS(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target_mu, target_S)
+            # rbf X any
+            elif False:#isinstance(p1, (RBF, RBFInv)) or isinstance(p2, (RBF, RBFInv)):
+                if isinstance(p2, (RBF, RBFInv)) and not isinstance(p1, (RBF, RBFInv)):
+                    p1t = p1; p1 = p2; p2 = p1t; del p1t  
+                N, M = mu.shape[0], Z.shape[0]; NM=N*M
+                psi11 = np.zeros((N, M))
+                psi12 = np.zeros((NM, M))
+                #psi12_t = np.zeros((N,M))
+                
+                p1.psi1(Z, mu, S, psi11)
+                Mu, Sigma = p1._crossterm_mu_S(Z, mu, S)
+                Mu, Sigma = Mu.reshape(NM,self.input_dim), Sigma.reshape(NM,self.input_dim)
+                
+                p2.psi1(Z, Mu, Sigma, psi12)
+                p1.dpsi1_dmuS((dL_dpsi2*psi12.reshape(N,M,M)).sum(1), Z, mu, S, target_mu, target_S)
+                p1.dpsi1_dmuS((dL_dpsi2*psi12.reshape(N,M,M)).sum(2), Z, mu, S, target_mu, target_S)
+                
+                #p2.dpsi1_dtheta((dL_dpsi2*(psi11[:,:,None] + psi11[:,None,:])).reshape(NM,M), Z, Mu, Sigma, target)
+                p2.dpsi1_dmuS((dL_dpsi2*(psi11[:,:,None])).sum(1)*2, Z, Mu.reshape(N,M,self.input_dim).sum(1), Sigma.reshape(N,M,self.input_dim).sum(1), target_mu, target_S)
             else:
                 raise NotImplementedError, "psi2 cannot be computed for this kernel"
-
         return target_mu, target_S
+    
     def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs):
         if which_parts == 'all':
             which_parts = [True] * self.num_parts
diff --git a/GPy/kern/parts/rbf.py b/GPy/kern/parts/rbf.py
index 56a6b0eb..dbc689d5 100644
--- a/GPy/kern/parts/rbf.py
+++ b/GPy/kern/parts/rbf.py
@@ -186,7 +186,7 @@ class RBF(Kernpart):
         self._psi_computations(Z, mu, S)
         target[0] += np.sum(dL_dpsi1 * self._psi1 / self.variance)
         d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale)
-        dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
+        dpsi1_dlength = d_length * np.atleast_3d(dL_dpsi1)
         if not self.ARD:
             target[1] += dpsi1_dlength.sum()
         else:
@@ -208,22 +208,19 @@ class RBF(Kernpart):
         self._psi_computations(Z, mu, S)
         target += self._psi2
 
-    def _crossterm_product_expectation(self, K, Z, mu, S):
+    def _crossterm_mu_S(self, Z, mu, S):
         # compute the crossterm expectation for K as the other kernel:
-        import ipdb;ipdb.set_trace()
-        Sigma = 1./self.lengthscale[None,:] + 1./S # is independent across M, 
-        M = (Z[None,:,:]/self.lengthscale[None,None,:] + (mu/S)[:,None,:]) / Sigma[:,None,:]
-        psi1_other = K.psi1()
-        self.variance
-        # return is [N x M x M]
-        return 
+        Sigma = 1./self.lengthscale2[None,None,:] + 1./S[:,None,:] # is independent across M, 
+        Sigma_tilde = (self.lengthscale2[None, :] + S)
+        M = (S*mu/Sigma_tilde)[:, None, :] + (self.lengthscale2[None,:]*Z)[None, :, :]/Sigma_tilde[:, None, :]
+        # make sure return is [N x M x Q]
+        return M, Sigma.repeat(Z.shape[0],1) 
 
     def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
         """Shape N,num_inducing,num_inducing,Ntheta"""
         self._psi_computations(Z, mu, S)
         d_var = 2.*self._psi2 / self.variance
         d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom)
-
         target[0] += np.sum(dL_dpsi2 * d_var)
         dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
         if not self.ARD:
@@ -306,8 +303,8 @@ class RBF(Kernpart):
         psi2 = np.empty((N, num_inducing, num_inducing))
 
         psi2_Zdist_sq = self._psi2_Zdist_sq
-        _psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim)
-        half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim)
+        _psi2_denom = self._psi2_denom.squeeze().reshape(-1, input_dim)
+        half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(-1, input_dim)
         variance_sq = float(np.square(self.variance))
         if self.ARD:
             lengthscale2 = self.lengthscale2

From 76bfbee5455a331db25cf4d7443ba760bf10d7d4 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 20 Nov 2013 11:58:30 +0000
Subject: [PATCH 12/28] psistattests update

---
 GPy/kern/kern.py                          |  3 ++
 GPy/testing/psi_stat_expectation_tests.py | 42 +++++++++++------------
 GPy/testing/psi_stat_gradient_tests.py    | 38 ++++++++++++++------
 3 files changed, 51 insertions(+), 32 deletions(-)

diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 5cd5b6aa..f021dc3a 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -412,6 +412,9 @@ class kern(Parameterized):
         [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
         return self._transform_gradients(target)
 
+    def dpsi0_dZ(self, dL_dpsi0, Z, mu, S):
+        return np.zeros_like(Z)
+
     def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S):
         target_mu, target_S = np.zeros_like(mu), np.zeros_like(S)
         [p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
diff --git a/GPy/testing/psi_stat_expectation_tests.py b/GPy/testing/psi_stat_expectation_tests.py
index ae3d1022..90252197 100644
--- a/GPy/testing/psi_stat_expectation_tests.py
+++ b/GPy/testing/psi_stat_expectation_tests.py
@@ -28,8 +28,8 @@ def ard(p):
 class Test(unittest.TestCase):
     input_dim = 9
     num_inducing = 13
-    N = 30
-    Nsamples = 9e6
+    N = 300
+    Nsamples = 1e6
 
     def setUp(self):
         i_s_dim_list = [2,4,3]
@@ -50,20 +50,20 @@ class Test(unittest.TestCase):
 #                        GPy.kern.linear(self.input_dim, ARD=True) +
 #                        GPy.kern.bias(self.input_dim) +
 #                        GPy.kern.white(self.input_dim)),
-#                     (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
-                    (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True)
-                     +GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True)
-#                     GPy.kern.bias(self.input_dim) +
-#                     GPy.kern.white(self.input_dim)),
+                    (#GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True)
+                     GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True)
+                     +GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True)
+#                      +GPy.kern.bias(self.input_dim)
+#                      +GPy.kern.white(self.input_dim)),
                     ),
-        (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True)
-         +GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True)
-         #+GPy.kern.bias(self.input_dim, np.random.rand())
-         #+GPy.kern.white(self.input_dim, np.random.rand())),
-         ),
-                (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
-                    GPy.kern.bias(self.input_dim, np.random.rand()) +
-                    GPy.kern.white(self.input_dim, np.random.rand())),
+#                     (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
+#                      GPy.kern.bias(self.input_dim, np.random.rand())),
+#         (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True)
+#          +GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True)
+#          #+GPy.kern.bias(self.input_dim, np.random.rand())
+#          #+GPy.kern.white(self.input_dim, np.random.rand())),
+#         ),
+#                     GPy.kern.white(self.input_dim, np.random.rand())),
 #                     GPy.kern.rbf(self.input_dim), GPy.kern.rbf(self.input_dim, ARD=True),
 #                       GPy.kern.linear(self.input_dim, ARD=False), GPy.kern.linear(self.input_dim, ARD=True),
 #                       GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim),
@@ -120,25 +120,25 @@ class Test(unittest.TestCase):
             diffs = []
             for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
                 K = kern.K(q_x_sample_stripe, self.Z)
-                K = (K[:, :, None] * K[:, None, :]).mean(0)
-                K_ += K
-                diffs.append(((psi2 - (K_ / (i + 1)))**2).mean())
-            K_ /= self.Nsamples / Nsamples
+                K = (K[:, :, None] * K[:, None, :])
+                K_ += K.sum(0) / self.Nsamples
+                diffs.append(((psi2 - (K_*self.Nsamples/((i+1)*Nsamples)))**2).mean())
+            #K_ /= self.Nsamples / Nsamples
             msg = "psi2: {}".format("+".join([p.name + ard(p) for p in kern.parts]))
             try:
                 import pylab
                 pylab.figure(msg)
-                pylab.plot(diffs, marker='x', mew=1.3)
+                pylab.plot(diffs, marker='x', mew=.2)
 #                 print msg, np.allclose(psi2.squeeze(), K_, rtol=1e-1, atol=.1)
                 self.assertTrue(np.allclose(psi2.squeeze(), K_),
                                             #rtol=1e-1, atol=.1),
                                 msg=msg + ": not matching")
 #                 sys.stdout.write(".")
             except:
-#                 import ipdb;ipdb.set_trace()
 #                 kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
 #                 sys.stdout.write("E")
                 print msg + ": not matching"
+                import ipdb;ipdb.set_trace()
                 pass
 
 if __name__ == "__main__":
diff --git a/GPy/testing/psi_stat_gradient_tests.py b/GPy/testing/psi_stat_gradient_tests.py
index de670f41..edb0f02e 100644
--- a/GPy/testing/psi_stat_gradient_tests.py
+++ b/GPy/testing/psi_stat_gradient_tests.py
@@ -40,10 +40,9 @@ class PsiStatModel(Model):
         return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum()
     def _log_likelihood_gradients(self):
         psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
-        try:
-            psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
-        except AttributeError:
-            psiZ = numpy.zeros(self.num_inducing * self.input_dim)
+        #psimu, psiS = numpy.ones(self.N * self.input_dim), numpy.ones(self.N * self.input_dim)
+        psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
+        #psiZ = numpy.ones(self.num_inducing * self.input_dim)
         thetagrad = self.kern.__getattribute__("d" + self.which + "_dtheta")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance).flatten()
         return numpy.hstack((psimu.flatten(), psiS.flatten(), psiZ.flatten(), thetagrad))
 
@@ -116,9 +115,9 @@ if __name__ == "__main__":
 #         m.randomize()
 # #         self.assertTrue(m.checkgrad())
         numpy.random.seed(0)
-        input_dim = 5
-        N = 50
-        num_inducing = 10
+        input_dim = 3
+        N = 3
+        num_inducing = 2
         D = 15
         X = numpy.random.randn(N, input_dim)
         X_var = .5 * numpy.ones_like(X) + .1 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
@@ -143,10 +142,27 @@ if __name__ == "__main__":
 #                          num_inducing=num_inducing, kernel=kernel)
 #         m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
 #                          num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim))
-        m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
-                         num_inducing=num_inducing, kernel=GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)))
+#         m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
+#                          num_inducing=num_inducing, kernel=GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)))
         # + GPy.kern.bias(input_dim))
-#         m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
-#                          num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim))
+#         m = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
+#                          num_inducing=num_inducing, 
+#                          kernel=(
+#             GPy.kern.rbf(input_dim, ARD=1) 
+#             +GPy.kern.linear(input_dim, ARD=1) 
+#             +GPy.kern.bias(input_dim))
+#                          )
+#         m.ensure_default_constraints()
+        m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
+                         num_inducing=num_inducing, kernel=(
+            GPy.kern.rbf(input_dim, numpy.random.rand(), numpy.random.rand(input_dim), ARD=1) 
+            #+GPy.kern.linear(input_dim, numpy.random.rand(input_dim), ARD=1) 
+            #+GPy.kern.rbf(input_dim, numpy.random.rand(), numpy.random.rand(input_dim), ARD=1) 
+            #+GPy.kern.rbf(input_dim, numpy.random.rand(), numpy.random.rand(), ARD=0) 
+            +GPy.kern.bias(input_dim)
+            +GPy.kern.white(input_dim)
+            )
+            )
+        m2.ensure_default_constraints()
     else:
         unittest.main()

From f114b9fff588fb84c8908af82ea7ee8490a4e755 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 20 Nov 2013 12:47:06 +0000
Subject: [PATCH 13/28] rename models to _models and import models in models.py

---
 GPy/_models/__init__.py                       | 19 ++++++++++++++++
 GPy/{models => _models}/bayesian_gplvm.py     |  4 ++--
 GPy/{models => _models}/bcgplvm.py            |  0
 .../fitc_classification.py                    |  0
 GPy/{models => _models}/gp_classification.py  |  0
 .../gp_multioutput_regression.py              |  0
 GPy/{models => _models}/gp_regression.py      |  1 -
 GPy/{models => _models}/gplvm.py              | 15 +++++--------
 GPy/{models => _models}/gradient_checker.py   |  0
 GPy/{models => _models}/mrd.py                |  4 ++--
 .../sparse_gp_classification.py               |  0
 .../sparse_gp_multioutput_regression.py       |  0
 .../sparse_gp_regression.py                   |  0
 GPy/{models => _models}/sparse_gplvm.py       |  4 ++--
 GPy/{models => _models}/svigp_regression.py   |  0
 GPy/{models => _models}/warped_gp.py          |  0
 GPy/models.py                                 | 22 +++++++++++++++++++
 GPy/models/__init__.py                        | 19 ----------------
 18 files changed, 53 insertions(+), 35 deletions(-)
 create mode 100644 GPy/_models/__init__.py
 rename GPy/{models => _models}/bayesian_gplvm.py (99%)
 rename GPy/{models => _models}/bcgplvm.py (100%)
 rename GPy/{models => _models}/fitc_classification.py (100%)
 rename GPy/{models => _models}/gp_classification.py (100%)
 rename GPy/{models => _models}/gp_multioutput_regression.py (100%)
 rename GPy/{models => _models}/gp_regression.py (98%)
 rename GPy/{models => _models}/gplvm.py (87%)
 rename GPy/{models => _models}/gradient_checker.py (100%)
 rename GPy/{models => _models}/mrd.py (99%)
 rename GPy/{models => _models}/sparse_gp_classification.py (100%)
 rename GPy/{models => _models}/sparse_gp_multioutput_regression.py (100%)
 rename GPy/{models => _models}/sparse_gp_regression.py (100%)
 rename GPy/{models => _models}/sparse_gplvm.py (96%)
 rename GPy/{models => _models}/svigp_regression.py (100%)
 rename GPy/{models => _models}/warped_gp.py (100%)
 create mode 100644 GPy/models.py
 delete mode 100644 GPy/models/__init__.py

diff --git a/GPy/_models/__init__.py b/GPy/_models/__init__.py
new file mode 100644
index 00000000..6fc93631
--- /dev/null
+++ b/GPy/_models/__init__.py
@@ -0,0 +1,19 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# from gp_regression import GPRegression; _gp_regression = gp_regression ; del gp_regression 
+# from gp_classification import GPClassification; _gp_classification = gp_classification ; del gp_classification 
+# from sparse_gp_regression import SparseGPRegression; _sparse_gp_regression = sparse_gp_regression ; del sparse_gp_regression 
+# from svigp_regression import SVIGPRegression; _svigp_regression = svigp_regression ; del svigp_regression 
+# from sparse_gp_classification import SparseGPClassification; _sparse_gp_classification = sparse_gp_classification ; del sparse_gp_classification 
+# from fitc_classification import FITCClassification; _fitc_classification = fitc_classification ; del fitc_classification 
+# from gplvm import GPLVM; _gplvm = gplvm ; del gplvm 
+# from bcgplvm import BCGPLVM; _bcgplvm = bcgplvm; del bcgplvm
+# from sparse_gplvm import SparseGPLVM; _sparse_gplvm = sparse_gplvm ; del sparse_gplvm 
+# from warped_gp import WarpedGP; _warped_gp = warped_gp ; del warped_gp 
+# from bayesian_gplvm import BayesianGPLVM; _bayesian_gplvm = bayesian_gplvm ; del bayesian_gplvm 
+# from mrd import MRD; _mrd = mrd ; del mrd 
+# from gradient_checker import GradientChecker; _gradient_checker = gradient_checker ; del gradient_checker 
+# from gp_multioutput_regression import GPMultioutputRegression; _gp_multioutput_regression = gp_multioutput_regression ; del gp_multioutput_regression 
+# from sparse_gp_multioutput_regression import SparseGPMultioutputRegression; _sparse_gp_multioutput_regression = sparse_gp_multioutput_regression ; del sparse_gp_multioutput_regression 
+
diff --git a/GPy/models/bayesian_gplvm.py b/GPy/_models/bayesian_gplvm.py
similarity index 99%
rename from GPy/models/bayesian_gplvm.py
rename to GPy/_models/bayesian_gplvm.py
index 21b46a8a..2b299ad8 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/_models/bayesian_gplvm.py
@@ -2,14 +2,14 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
-from ..core import SparseGP
+from ..core.sparse_gp import SparseGP
 from ..likelihoods import Gaussian
 from .. import kern
 import itertools
 from matplotlib.colors import colorConverter
 from GPy.inference.optimization import SCG
 from GPy.util import plot_latent, linalg
-from GPy.models.gplvm import GPLVM
+from .gplvm import GPLVM
 from GPy.util.plot_latent import most_significant_input_dimensions
 from matplotlib import pyplot
 
diff --git a/GPy/models/bcgplvm.py b/GPy/_models/bcgplvm.py
similarity index 100%
rename from GPy/models/bcgplvm.py
rename to GPy/_models/bcgplvm.py
diff --git a/GPy/models/fitc_classification.py b/GPy/_models/fitc_classification.py
similarity index 100%
rename from GPy/models/fitc_classification.py
rename to GPy/_models/fitc_classification.py
diff --git a/GPy/models/gp_classification.py b/GPy/_models/gp_classification.py
similarity index 100%
rename from GPy/models/gp_classification.py
rename to GPy/_models/gp_classification.py
diff --git a/GPy/models/gp_multioutput_regression.py b/GPy/_models/gp_multioutput_regression.py
similarity index 100%
rename from GPy/models/gp_multioutput_regression.py
rename to GPy/_models/gp_multioutput_regression.py
diff --git a/GPy/models/gp_regression.py b/GPy/_models/gp_regression.py
similarity index 98%
rename from GPy/models/gp_regression.py
rename to GPy/_models/gp_regression.py
index 633fc1c8..8b44c1ba 100644
--- a/GPy/models/gp_regression.py
+++ b/GPy/_models/gp_regression.py
@@ -2,7 +2,6 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-import numpy as np
 from ..core import GP
 from .. import likelihoods
 from .. import kern
diff --git a/GPy/models/gplvm.py b/GPy/_models/gplvm.py
similarity index 87%
rename from GPy/models/gplvm.py
rename to GPy/_models/gplvm.py
index 795389a7..f27f861c 100644
--- a/GPy/models/gplvm.py
+++ b/GPy/_models/gplvm.py
@@ -4,15 +4,11 @@
 
 import numpy as np
 import pylab as pb
-import sys, pdb
 from .. import kern
-from ..core import Model
-from ..util.linalg import pdinv, PCA
-from ..core.priors import Gaussian as Gaussian_prior
+from ..core import priors
 from ..core import GP
 from ..likelihoods import Gaussian
 from .. import util
-from GPy.util import plot_latent
 
 
 class GPLVM(GP):
@@ -34,12 +30,13 @@ class GPLVM(GP):
             kernel = kern.rbf(input_dim, ARD=input_dim > 1) + kern.bias(input_dim, np.exp(-2))
         likelihood = Gaussian(Y, normalize=normalize_Y, variance=np.exp(-2.))
         GP.__init__(self, X, likelihood, kernel, normalize_X=False)
-        self.set_prior('.*X', Gaussian_prior(0, 1))
+        self.set_prior('.*X', priors.Gaussian(0, 1))
         self.ensure_default_constraints()
 
     def initialise_latent(self, init, input_dim, Y):
         Xr = np.random.randn(Y.shape[0], input_dim)
         if init == 'PCA':
+            from ..util.linalg import PCA
             PC = PCA(Y, input_dim)[0]
             Xr[:PC.shape[0], :PC.shape[1]] = PC
         return Xr
@@ -62,15 +59,15 @@ class GPLVM(GP):
     def jacobian(self,X):
         target = np.zeros((X.shape[0],X.shape[1],self.output_dim))
         for i in range(self.output_dim):
-        	target[:,:,i] = self.kern.dK_dX(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X)
+            target[:,:,i] = self.kern.dK_dX(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X)
         return target
    
     def magnification(self,X):
         target=np.zeros(X.shape[0])
         J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
-    	J=self.jacobian(X)
+        J=self.jacobian(X)
         for i in range(X.shape[0]):
-		    target[i]=np.sqrt(pb.det(np.dot(J[i,:,:],np.transpose(J[i,:,:]))))
+            target[i]=np.sqrt(pb.det(np.dot(J[i,:,:],np.transpose(J[i,:,:]))))
         return target
 
     def plot(self):
diff --git a/GPy/models/gradient_checker.py b/GPy/_models/gradient_checker.py
similarity index 100%
rename from GPy/models/gradient_checker.py
rename to GPy/_models/gradient_checker.py
diff --git a/GPy/models/mrd.py b/GPy/_models/mrd.py
similarity index 99%
rename from GPy/models/mrd.py
rename to GPy/_models/mrd.py
index 2aaa731c..b9c99a64 100644
--- a/GPy/models/mrd.py
+++ b/GPy/_models/mrd.py
@@ -9,8 +9,8 @@ from GPy.util.linalg import PCA
 import numpy
 import itertools
 import pylab
-from GPy.kern.kern import kern
-from GPy.models.bayesian_gplvm import BayesianGPLVM
+from ..kern import kern
+from bayesian_gplvm import BayesianGPLVM
 
 class MRD(Model):
     """
diff --git a/GPy/models/sparse_gp_classification.py b/GPy/_models/sparse_gp_classification.py
similarity index 100%
rename from GPy/models/sparse_gp_classification.py
rename to GPy/_models/sparse_gp_classification.py
diff --git a/GPy/models/sparse_gp_multioutput_regression.py b/GPy/_models/sparse_gp_multioutput_regression.py
similarity index 100%
rename from GPy/models/sparse_gp_multioutput_regression.py
rename to GPy/_models/sparse_gp_multioutput_regression.py
diff --git a/GPy/models/sparse_gp_regression.py b/GPy/_models/sparse_gp_regression.py
similarity index 100%
rename from GPy/models/sparse_gp_regression.py
rename to GPy/_models/sparse_gp_regression.py
diff --git a/GPy/models/sparse_gplvm.py b/GPy/_models/sparse_gplvm.py
similarity index 96%
rename from GPy/models/sparse_gplvm.py
rename to GPy/_models/sparse_gplvm.py
index 6e7e40b1..ab616d5a 100644
--- a/GPy/models/sparse_gplvm.py
+++ b/GPy/_models/sparse_gplvm.py
@@ -5,8 +5,8 @@
 import numpy as np
 import pylab as pb
 import sys, pdb
-from GPy.models.sparse_gp_regression import SparseGPRegression
-from GPy.models.gplvm import GPLVM
+from sparse_gp_regression import SparseGPRegression
+from gplvm import GPLVM
 # from .. import kern
 # from ..core import model
 # from ..util.linalg import pdinv, PCA
diff --git a/GPy/models/svigp_regression.py b/GPy/_models/svigp_regression.py
similarity index 100%
rename from GPy/models/svigp_regression.py
rename to GPy/_models/svigp_regression.py
diff --git a/GPy/models/warped_gp.py b/GPy/_models/warped_gp.py
similarity index 100%
rename from GPy/models/warped_gp.py
rename to GPy/_models/warped_gp.py
diff --git a/GPy/models.py b/GPy/models.py
new file mode 100644
index 00000000..9a847ea0
--- /dev/null
+++ b/GPy/models.py
@@ -0,0 +1,22 @@
+'''
+Created on 14 Nov 2013
+
+@author: maxz
+'''
+
+from _models.bayesian_gplvm import BayesianGPLVM
+from _models.gp_regression import GPRegression
+from _models.gp_classification import GPClassification#; _gp_classification = gp_classification ; del gp_classification 
+from _models.sparse_gp_regression import SparseGPRegression#; _sparse_gp_regression = sparse_gp_regression ; del sparse_gp_regression 
+from _models.svigp_regression import SVIGPRegression#; _svigp_regression = svigp_regression ; del svigp_regression 
+from _models.sparse_gp_classification import SparseGPClassification#; _sparse_gp_classification = sparse_gp_classification ; del sparse_gp_classification 
+from _models.fitc_classification import FITCClassification#; _fitc_classification = fitc_classification ; del fitc_classification 
+from _models.gplvm import GPLVM#; _gplvm = gplvm ; del gplvm 
+from _models.bcgplvm import BCGPLVM#; _bcgplvm = bcgplvm; del bcgplvm
+from _models.sparse_gplvm import SparseGPLVM#; _sparse_gplvm = sparse_gplvm ; del sparse_gplvm 
+from _models.warped_gp import WarpedGP#; _warped_gp = warped_gp ; del warped_gp 
+from _models.bayesian_gplvm import BayesianGPLVM#; _bayesian_gplvm = bayesian_gplvm ; del bayesian_gplvm 
+from _models.mrd import MRD#; _mrd = mrd; del mrd 
+from _models.gradient_checker import GradientChecker#; _gradient_checker = gradient_checker ; del gradient_checker 
+from _models.gp_multioutput_regression import GPMultioutputRegression#; _gp_multioutput_regression = gp_multioutput_regression ; del gp_multioutput_regression 
+from _models.sparse_gp_multioutput_regression import SparseGPMultioutputRegression#; _sparse_gp_multioutput_regression = sparse_gp_multioutput_regression ; del sparse_gp_multioutput_regression 
diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py
deleted file mode 100644
index a8be5890..00000000
--- a/GPy/models/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-from gp_regression import GPRegression; _gp_regression = gp_regression ; del gp_regression 
-from gp_classification import GPClassification; _gp_classification = gp_classification ; del gp_classification 
-from sparse_gp_regression import SparseGPRegression; _sparse_gp_regression = sparse_gp_regression ; del sparse_gp_regression 
-from svigp_regression import SVIGPRegression; _svigp_regression = svigp_regression ; del svigp_regression 
-from sparse_gp_classification import SparseGPClassification; _sparse_gp_classification = sparse_gp_classification ; del sparse_gp_classification 
-from fitc_classification import FITCClassification; _fitc_classification = fitc_classification ; del fitc_classification 
-from gplvm import GPLVM; _gplvm = gplvm ; del gplvm 
-from bcgplvm import BCGPLVM; _bcgplvm = bcgplvm; del bcgplvm
-from sparse_gplvm import SparseGPLVM; _sparse_gplvm = sparse_gplvm ; del sparse_gplvm 
-from warped_gp import WarpedGP; _warped_gp = warped_gp ; del warped_gp 
-from bayesian_gplvm import BayesianGPLVM; _bayesian_gplvm = bayesian_gplvm ; del bayesian_gplvm 
-from mrd import MRD; _mrd = mrd ; del mrd 
-from gradient_checker import GradientChecker; _gradient_checker = gradient_checker ; del gradient_checker 
-from gp_multioutput_regression import GPMultioutputRegression; _gp_multioutput_regression = gp_multioutput_regression ; del gp_multioutput_regression 
-from sparse_gp_multioutput_regression import SparseGPMultioutputRegression; _sparse_gp_multioutput_regression = sparse_gp_multioutput_regression ; del sparse_gp_multioutput_regression 
-

From d4dff8360bd8770c853e709d9fc030b799c2d962 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 20 Nov 2013 12:47:55 +0000
Subject: [PATCH 14/28] testing imports update and expected failure for
 crossterms

---
 GPy/testing/bgplvm_tests.py            |  2 +-
 GPy/testing/psi_stat_gradient_tests.py | 34 ++++++++++++++++++--------
 GPy/testing/sparse_gplvm_tests.py      |  2 +-
 GPy/testing/unit_tests.py              |  2 ++
 4 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/GPy/testing/bgplvm_tests.py b/GPy/testing/bgplvm_tests.py
index a8777e11..1192448a 100644
--- a/GPy/testing/bgplvm_tests.py
+++ b/GPy/testing/bgplvm_tests.py
@@ -4,7 +4,7 @@
 import unittest
 import numpy as np
 import GPy
-from GPy.models.bayesian_gplvm import BayesianGPLVM
+from ..models import BayesianGPLVM
 
 class BGPLVMTests(unittest.TestCase):
     def test_bias_kern(self):
diff --git a/GPy/testing/psi_stat_gradient_tests.py b/GPy/testing/psi_stat_gradient_tests.py
index edb0f02e..e373aaa3 100644
--- a/GPy/testing/psi_stat_gradient_tests.py
+++ b/GPy/testing/psi_stat_gradient_tests.py
@@ -63,40 +63,54 @@ class DPsiStatTest(unittest.TestCase):
 
     def testPsi0(self):
         for k in self.kernels:
-            m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,
+            m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,\
                              num_inducing=self.num_inducing, kernel=k)
+            m.ensure_default_constraints()
+            m.randomize()
             assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts)))
-
-#     def testPsi1(self):
-#         for k in self.kernels:
-#             m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z,
-#                      num_inducing=self.num_inducing, kernel=k)
-#             assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
+        
+    def testPsi1(self):
+        for k in self.kernels:
+            m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z,
+                     num_inducing=self.num_inducing, kernel=k)
+            m.ensure_default_constraints()
+            m.randomize()
+            assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
 
     def testPsi2_lin(self):
         k = self.kernels[0]
         m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
-                     num_inducing=self.num_inducing, kernel=k)
+                 num_inducing=self.num_inducing, kernel=k)
+        m.ensure_default_constraints()
+        m.randomize()
         assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
     def testPsi2_lin_bia(self):
         k = self.kernels[3]
         m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
                      num_inducing=self.num_inducing, kernel=k)
+        m.ensure_default_constraints()
+        m.randomize()
         assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
     def testPsi2_rbf(self):
         k = self.kernels[1]
         m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
                      num_inducing=self.num_inducing, kernel=k)
+        m.ensure_default_constraints()
+        m.randomize()
         assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
     def testPsi2_rbf_bia(self):
         k = self.kernels[-1]
         m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
                      num_inducing=self.num_inducing, kernel=k)
+        m.ensure_default_constraints()
+        m.randomize()
         assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
     def testPsi2_bia(self):
         k = self.kernels[2]
         m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
                      num_inducing=self.num_inducing, kernel=k)
+        m.ensure_default_constraints()
+        m.randomize()
         assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
 
 
@@ -134,8 +148,8 @@ if __name__ == "__main__":
 #                      num_inducing=num_inducing, kernel=k)
 #             assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
 #
-#         m0 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z,
-#                          num_inducing=num_inducing, kernel=GPy.kern.linear(input_dim))
+        m0 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z,
+                         num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim)+GPy.kern.bias(input_dim))
 #         m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
 #                          num_inducing=num_inducing, kernel=kernel)
 #         m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
diff --git a/GPy/testing/sparse_gplvm_tests.py b/GPy/testing/sparse_gplvm_tests.py
index e27fccff..c3942b95 100644
--- a/GPy/testing/sparse_gplvm_tests.py
+++ b/GPy/testing/sparse_gplvm_tests.py
@@ -4,7 +4,7 @@
 import unittest
 import numpy as np
 import GPy
-from GPy.models.sparse_gplvm import SparseGPLVM
+from ..models import SparseGPLVM
 
 class sparse_GPLVMTests(unittest.TestCase):
     def test_bias_kern(self):
diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py
index 818cb56e..69a15a7f 100644
--- a/GPy/testing/unit_tests.py
+++ b/GPy/testing/unit_tests.py
@@ -163,11 +163,13 @@ class GradientTests(unittest.TestCase):
         rbflin = GPy.kern.rbf(2) + GPy.kern.linear(2)
         self.check_model(rbflin, model_type='SparseGPRegression', dimension=2)
 
+    @unittest.expectedFailure
     def test_SparseGPRegression_rbf_linear_white_kern_2D_uncertain_inputs(self):
         ''' Testing the sparse GP regression with rbf, linear kernel on 2d data with uncertain inputs'''
         rbflin = GPy.kern.rbf(2) + GPy.kern.linear(2)
         self.check_model(rbflin, model_type='SparseGPRegression', dimension=2, uncertain_inputs=1)
 
+    @unittest.expectedFailure
     def test_SparseGPRegression_rbf_linear_white_kern_1D_uncertain_inputs(self):
         ''' Testing the sparse GP regression with rbf, linear kernel on 1d data with uncertain inputs'''
         rbflin = GPy.kern.rbf(1) + GPy.kern.linear(1)

From f04a4fa98bc394fb41c9e6914006f92c100ad280 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 20 Nov 2013 12:48:09 +0000
Subject: [PATCH 15/28] dim reduction imports

---
 GPy/examples/dimensionality_reduction.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 666209f9..cdd69ab5 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -6,8 +6,8 @@ from matplotlib import pyplot as plt, cm
 
 import GPy
 from GPy.core.transformations import logexp
-from GPy.models.bayesian_gplvm import BayesianGPLVM
 from GPy.likelihoods.gaussian import Gaussian
+from GPy.models import BayesianGPLVM
 
 default_seed = np.random.seed(123344)
 

From 3a08c0d9ab546a7a5969c7c80e83f2fc90054329 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 20 Nov 2013 14:37:14 +0000
Subject: [PATCH 16/28] skipping crossterm tests instead of expected failure

---
 GPy/testing/unit_tests.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py
index 69a15a7f..9269a4c4 100644
--- a/GPy/testing/unit_tests.py
+++ b/GPy/testing/unit_tests.py
@@ -163,16 +163,18 @@ class GradientTests(unittest.TestCase):
         rbflin = GPy.kern.rbf(2) + GPy.kern.linear(2)
         self.check_model(rbflin, model_type='SparseGPRegression', dimension=2)
 
-    @unittest.expectedFailure
+    #@unittest.expectedFailure
     def test_SparseGPRegression_rbf_linear_white_kern_2D_uncertain_inputs(self):
         ''' Testing the sparse GP regression with rbf, linear kernel on 2d data with uncertain inputs'''
         rbflin = GPy.kern.rbf(2) + GPy.kern.linear(2)
+        raise unittest.SkipTest("This is not implemented yet!")
         self.check_model(rbflin, model_type='SparseGPRegression', dimension=2, uncertain_inputs=1)
 
-    @unittest.expectedFailure
+    #@unittest.expectedFailure
     def test_SparseGPRegression_rbf_linear_white_kern_1D_uncertain_inputs(self):
         ''' Testing the sparse GP regression with rbf, linear kernel on 1d data with uncertain inputs'''
         rbflin = GPy.kern.rbf(1) + GPy.kern.linear(1)
+        raise unittest.SkipTest("This is not implemented yet!")
         self.check_model(rbflin, model_type='SparseGPRegression', dimension=1, uncertain_inputs=1)
 
     def test_GPLVM_rbf_bias_white_kern_2D(self):

From f9e2a389e862a56d43ecefd96788982fae60be73 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Thu, 21 Nov 2013 20:20:03 +0000
Subject: [PATCH 17/28] Committing change for master check out.

---
 GPy/kern/parts/sympykern.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/kern/parts/sympykern.py b/GPy/kern/parts/sympykern.py
index 7f7fba11..7b98e47b 100644
--- a/GPy/kern/parts/sympykern.py
+++ b/GPy/kern/parts/sympykern.py
@@ -345,7 +345,7 @@ class spkern(Kernpart):
         self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[')
         self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= partial[', '+= 2*partial[')
         self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z2(', 'X2(')
-        self._dK_dX_code_X = self._dK_dX_code.replace('Z2(', 'X2(')
+        self._dK_dX_code_X = self._dK_dX_code_X.replace('Z2(', 'X2(')
 
 
         #TODO: insert multiple functions here via string manipulation

From a8cf725102af1dee769207472bbf59ccede8eec8 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 21 Nov 2013 20:51:28 +0000
Subject: [PATCH 18/28] removed some sympy stuff

---
 GPy/kern/constructors.py    | 22 -----------------
 GPy/kern/parts/sympykern.py |  8 +++---
 GPy/testing/kernel_tests.py |  6 +----
 GPy/util/symbolic.py        | 49 -------------------------------------
 4 files changed, 6 insertions(+), 79 deletions(-)

diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 083960b4..4ab06bba 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -292,7 +292,6 @@ except ImportError:
 if sympy_available:
     from parts.sympykern import spkern
     from sympy.parsing.sympy_parser import parse_expr
-    from GPy.util.symbolic import sinc
     
     def rbf_sympy(input_dim, ARD=False, variance=1., lengthscale=1.):
         """
@@ -337,27 +336,6 @@ if sympy_available:
             f =  scale_i*scale_j*sp.exp(-dist/(2*(lengthscale_i**2 + lengthscale_j**2 + shared_lengthscale**2)))
         return kern(input_dim, [spkern(input_dim, f, output_dim=output_dim, name='eq_sympy')])
 
-    def sinc(input_dim, ARD=False, variance=1., lengthscale=1.):
-        """
-        TODO: Not clear why this isn't working, suggests argument of sinc is not a number.
-        sinc covariance funciton
-        """
-        X = sp.symbols('x_:' + str(input_dim))
-        Z = sp.symbols('z_:' + str(input_dim))
-        variance = sp.var('variance',positive=True)
-        if ARD:
-            lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)]
-            dist_string = ' + '.join(['(x_%i-z_%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
-            dist = parse_expr(dist_string)
-            f =  variance*sinc(sp.pi*sp.sqrt(dist))
-        else:
-            lengthscale = sp.var('lengthscale',positive=True)
-            dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(input_dim)])
-            dist = parse_expr(dist_string)
-            f =  variance*sinc(sp.pi*sp.sqrt(dist)/lengthscale)
-            
-        return kern(input_dim, [spkern(input_dim, f, name='sinc')])
-
     def sympykern(input_dim, k=None, output_dim=1, name=None, param=None):
         """
         A base kernel object, where all the hard work in done by sympy.
diff --git a/GPy/kern/parts/sympykern.py b/GPy/kern/parts/sympykern.py
index 7f7fba11..d109fea7 100644
--- a/GPy/kern/parts/sympykern.py
+++ b/GPy/kern/parts/sympykern.py
@@ -11,6 +11,7 @@ import tempfile
 import pdb
 import ast
 from kernpart import Kernpart
+from ...util.config import config
 
 class spkern(Kernpart):
     """
@@ -110,8 +111,9 @@ class spkern(Kernpart):
             'headers':['"sympy_helpers.h"'],
             'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")],
             'extra_compile_args':extra_compile_args,
-            'extra_link_args':['-lgomp'],
+            'extra_link_args':[],
             'verbose':True}
+        if config.getboolean('parallel', 'openmp'): self.weave_kwargs.append('-lgomp')
 
     def __add__(self,other):
         return spkern(self._sp_k+other._sp_k)
@@ -343,9 +345,9 @@ class spkern(Kernpart):
 
         # Code to use when only X is provided. 
         self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[')
-        self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= partial[', '+= 2*partial[')
+        self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= partial[', '+= 2*partial[') 
         self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z2(', 'X2(')
-        self._dK_dX_code_X = self._dK_dX_code.replace('Z2(', 'X2(')
+        self._dK_dX_code_X = self._dK_dX_code_X.replace('Z2(', 'X2(')
 
 
         #TODO: insert multiple functions here via string manipulation
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index f64dac2b..301fa54f 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -34,11 +34,7 @@ class KernelTests(unittest.TestCase):
             self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
 
     def test_eq_sympykernel(self):
-        kern = GPy.kern.eq_sympy(5, 3, output_ind=4)
-        self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
-
-    def test_sinckernel(self):
-        kern = GPy.kern.sinc(5)
+        kern = GPy.kern.eq_sympy(5, 3)
         self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
 
     def test_rbf_invkernel(self):
diff --git a/GPy/util/symbolic.py b/GPy/util/symbolic.py
index 395f9e3e..4b660c7f 100644
--- a/GPy/util/symbolic.py
+++ b/GPy/util/symbolic.py
@@ -237,52 +237,3 @@ class erfcx(Function):
     def eval(cls, arg):
         return erfc(arg)*exp(arg*arg)
 
-class sinc_grad(Function):
-    nargs = 1
-    
-    def fdiff(self, argindex=1):
-        if argindex==1:
-            # Strictly speaking this should be computed separately, as it won't work when x=0. See http://calculus.subwiki.org/wiki/Sinc_function
-            return ((2-x*x)*sin(self.args[0]) - 2*x*cos(x))/(x*x*x)
-        else:
-            raise ArgumentIndexError(self, argindex)
-
-    
-    @classmethod
-    def eval(cls, x):
-        if x.is_Number:
-            if x is S.NaN:
-                return S.NaN
-            elif x is S.Zero:
-                return S.Zero
-            else:
-                return (x*cos(x) - sin(x))/(x*x)
-            
-class sinc(Function):
-    
-    nargs = 1
-    
-    def fdiff(self, argindex=1):
-        if argindex==1:
-            return sinc_grad(self.args[0])
-        else:
-            raise ArgumentIndexError(self, argindex)
-
-    
-    @classmethod
-    def eval(cls, arg):
-        if arg.is_Number:
-            if arg is S.NaN:
-                return S.NaN
-            elif arg is S.Zero:
-                return S.One
-            else:
-                return sin(arg)/arg
-
-        if arg.func is asin:
-            x = arg.args[0]
-            return x / arg
-
-    def _eval_is_real(self):
-        return self.args[0].is_real
-

From 1deb1bee86871df6ec70b92e2f7928450094dc27 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Thu, 21 Nov 2013 21:42:09 +0000
Subject: [PATCH 19/28] Merge with James's changes

---
 GPy/kern/constructors.py    | 22 ----------------------
 GPy/testing/kernel_tests.py |  9 +++------
 2 files changed, 3 insertions(+), 28 deletions(-)

diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 083960b4..4ab06bba 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -292,7 +292,6 @@ except ImportError:
 if sympy_available:
     from parts.sympykern import spkern
     from sympy.parsing.sympy_parser import parse_expr
-    from GPy.util.symbolic import sinc
     
     def rbf_sympy(input_dim, ARD=False, variance=1., lengthscale=1.):
         """
@@ -337,27 +336,6 @@ if sympy_available:
             f =  scale_i*scale_j*sp.exp(-dist/(2*(lengthscale_i**2 + lengthscale_j**2 + shared_lengthscale**2)))
         return kern(input_dim, [spkern(input_dim, f, output_dim=output_dim, name='eq_sympy')])
 
-    def sinc(input_dim, ARD=False, variance=1., lengthscale=1.):
-        """
-        TODO: Not clear why this isn't working, suggests argument of sinc is not a number.
-        sinc covariance funciton
-        """
-        X = sp.symbols('x_:' + str(input_dim))
-        Z = sp.symbols('z_:' + str(input_dim))
-        variance = sp.var('variance',positive=True)
-        if ARD:
-            lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)]
-            dist_string = ' + '.join(['(x_%i-z_%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
-            dist = parse_expr(dist_string)
-            f =  variance*sinc(sp.pi*sp.sqrt(dist))
-        else:
-            lengthscale = sp.var('lengthscale',positive=True)
-            dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(input_dim)])
-            dist = parse_expr(dist_string)
-            f =  variance*sinc(sp.pi*sp.sqrt(dist)/lengthscale)
-            
-        return kern(input_dim, [spkern(input_dim, f, name='sinc')])
-
     def sympykern(input_dim, k=None, output_dim=1, name=None, param=None):
         """
         A base kernel object, where all the hard work in done by sympy.
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index f64dac2b..f75eb580 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -34,12 +34,9 @@ class KernelTests(unittest.TestCase):
             self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
 
     def test_eq_sympykernel(self):
-        kern = GPy.kern.eq_sympy(5, 3, output_ind=4)
-        self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
-
-    def test_sinckernel(self):
-        kern = GPy.kern.sinc(5)
-        self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
+        if SYMPY_AVAILABLE:
+            kern = GPy.kern.eq_sympy(5, 3, output_ind=4)
+            self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
 
     def test_rbf_invkernel(self):
         kern = GPy.kern.rbf_inv(5)

From fedaa5e1f1b6876ca6c41b7923a1e4b347a48f2d Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Thu, 21 Nov 2013 22:15:20 +0000
Subject: [PATCH 20/28] Fixed bug in sympy kernel and added sympolic.py back
 into utils __init__.py

---
 GPy/kern/parts/sympykern.py | 6 +++---
 GPy/util/__init__.py        | 1 +
 GPy/util/symbolic.py        | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/GPy/kern/parts/sympykern.py b/GPy/kern/parts/sympykern.py
index d109fea7..bcd52fe2 100644
--- a/GPy/kern/parts/sympykern.py
+++ b/GPy/kern/parts/sympykern.py
@@ -345,8 +345,8 @@ class spkern(Kernpart):
 
         # Code to use when only X is provided. 
         self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[')
-        self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= partial[', '+= 2*partial[') 
-        self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z2(', 'X2(')
+        self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= PARTIAL2(', '+= 2*PARTIAL2(') 
+        self._dK_dtheta_code_X = self._dK_dtheta_code_X.replace('Z2(', 'X2(')
         self._dK_dX_code_X = self._dK_dX_code_X.replace('Z2(', 'X2(')
 
 
@@ -402,7 +402,7 @@ class spkern(Kernpart):
             self._weave_inline(self._dK_dX_code, X, target, Z, partial)
 
     def dKdiag_dX(self,partial,X,target):
-        self._weave.inline(self._dKdiag_dX_code, X, target, Z, partial)
+        self._weave_inline(self._dKdiag_dX_code, X, target, Z=None, partial=partial)
 
     def compute_psi_stats(self):
         #define some normal distributions
diff --git a/GPy/util/__init__.py b/GPy/util/__init__.py
index db9b7362..629b3f48 100644
--- a/GPy/util/__init__.py
+++ b/GPy/util/__init__.py
@@ -14,5 +14,6 @@ import visualize
 import decorators
 import classification
 import latent_space_visualizations
+import symbolic
 
 import netpbmfile
diff --git a/GPy/util/symbolic.py b/GPy/util/symbolic.py
index 4b660c7f..49c8c33a 100644
--- a/GPy/util/symbolic.py
+++ b/GPy/util/symbolic.py
@@ -1,4 +1,4 @@
-from sympy import Function, S, oo, I, cos, sin, asin, log, erf,pi,exp,sqrt,sign
+from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign
 
 
 class ln_diff_erf(Function):

From 09de9d7195ca8f6770cd28d695d92d6b9682bfd9 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Thu, 21 Nov 2013 22:35:58 +0000
Subject: [PATCH 21/28] Added eq_ode1 to constructors.py

---
 GPy/kern/constructors.py    | 40 +++++++++++++++++++++++++++++++++----
 GPy/testing/kernel_tests.py |  5 +++++
 2 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 4ab06bba..500ab92f 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -5,6 +5,7 @@ import numpy as np
 from kern import kern
 import parts
 
+
 def rbf_inv(input_dim,variance=1., inv_lengthscale=None,ARD=False):
     """
     Construct an RBF kernel
@@ -292,7 +293,8 @@ except ImportError:
 if sympy_available:
     from parts.sympykern import spkern
     from sympy.parsing.sympy_parser import parse_expr
-    
+    from GPy.util import symbolic
+
     def rbf_sympy(input_dim, ARD=False, variance=1., lengthscale=1.):
         """
         Radial Basis Function covariance.
@@ -312,9 +314,19 @@ if sympy_available:
             f =  variance*sp.exp(-dist/(2*lengthscale**2))
         return kern(input_dim, [spkern(input_dim, f, name='rbf_sympy')])
 
-    def eq_sympy(input_dim, output_dim, ARD=False, variance=1., lengthscale=1.):
+    def eq_sympy(input_dim, output_dim, ARD=False):
         """
-        Exponentiated quadratic with multiple outputs.
+        Latent force model covariance, exponentiated quadratic with multiple outputs. Derived from a diffusion equation with the initial spatial condition layed down by a Gaussian process with lengthscale given by shared_lengthscale.
+
+        See IEEE Trans Pattern Anal Mach Intell. 2013 Nov;35(11):2693-705. doi: 10.1109/TPAMI.2013.86. Linear latent force models using Gaussian processes. Alvarez MA, Luengo D, Lawrence ND.
+
+        :param input_dim: Dimensionality of the kernel
+        :type input_dim: int
+        :param output_dim: number of outputs in the covariance function.
+        :type output_dim: int
+        :param ARD: whether or not to user ARD (default False).
+        :type ARD: bool
+
         """
         real_input_dim = input_dim
         if output_dim>1:
@@ -325,7 +337,7 @@ if sympy_available:
         if ARD:
             lengthscales = [sp.var('lengthscale%i_i lengthscale%i_j' % i, positive=True) for i in range(real_input_dim)]
             shared_lengthscales = [sp.var('shared_lengthscale%i' % i, positive=True) for i in range(real_input_dim)]
-            dist_string = ' + '.join(['(x_%i-z_%i)**2/(shared_lengthscale%i**2 + lengthscale%i_i*lengthscale%i_j)' % (i, i, i) for i in range(real_input_dim)])
+            dist_string = ' + '.join(['(x_%i-z_%i)**2/(shared_lengthscale%i**2 + lengthscale%i_i**2 + lengthscale%i_j**2)' % (i, i, i) for i in range(real_input_dim)])
             dist = parse_expr(dist_string)
             f =  variance*sp.exp(-dist/2.)
         else:
@@ -336,6 +348,26 @@ if sympy_available:
             f =  scale_i*scale_j*sp.exp(-dist/(2*(lengthscale_i**2 + lengthscale_j**2 + shared_lengthscale**2)))
         return kern(input_dim, [spkern(input_dim, f, output_dim=output_dim, name='eq_sympy')])
 
+    def ode1_eq(output_dim=1):
+        """
+        Latent force model covariance, first order differential
+        equation driven by exponentiated quadratic.
+
+        See N. D. Lawrence, G. Sanguinetti and M. Rattray. (2007)
+        'Modelling transcriptional regulation using Gaussian
+        processes' in B. Schoelkopf, J. C. Platt and T. Hofmann (eds)
+        Advances in Neural Information Processing Systems, MIT Press,
+        Cambridge, MA, pp 785--792.
+
+        :param output_dim: number of outputs in the covariance function.
+        :type output_dim: int
+        """
+        input_dim = 2
+        x_0, z_0, decay_i, decay_j, scale_i, scale_j, lengthscale = sp.symbols('x_0, z_0, decay_i, decay_j, scale_i, scale_j, lengthscale')
+        f = scale_i*scale_j*(symbolic.h(x_0, z_0, decay_i, decay_j, lengthscale) 
+     + symbolic.h(z_0, x_0, decay_j, decay_i, lengthscale))
+        return kern(input_dim, [spkern(input_dim, f, output_dim=output_dim, name='ode1_eq')])
+
     def sympykern(input_dim, k=None, output_dim=1, name=None, param=None):
         """
         A base kernel object, where all the hard work in done by sympy.
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index a2194b65..92cad687 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -38,6 +38,11 @@ class KernelTests(unittest.TestCase):
             kern = GPy.kern.eq_sympy(5, 3)
             self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
 
+    def test_eq_ode1kernel(self):
+        if SYMPY_AVAILABLE:
+            kern = GPy.kern.eq_ode1(3)
+            self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
+
     def test_rbf_invkernel(self):
         kern = GPy.kern.rbf_inv(5)
         self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))

From 98b9dc0163e376b2e5b76872a8bc77c91916c591 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Thu, 21 Nov 2013 23:07:43 +0000
Subject: [PATCH 22/28] eq_ode1 working but test failing?

---
 GPy/kern/constructors.py    | 27 ---------------------------
 GPy/kern/kern.py            | 12 ++++++++----
 GPy/testing/kernel_tests.py |  8 ++++----
 3 files changed, 12 insertions(+), 35 deletions(-)

diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 500ab92f..05eaa028 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -150,33 +150,6 @@ def white(input_dim,variance=1.):
     part = parts.white.White(input_dim,variance)
     return kern(input_dim, [part])
 
-def eq_ode1(output_dim, W=None, rank=1,  kappa=None, length_scale=1., decay=None, delay=None):
-    """Covariance function for first order differential equation driven by an exponentiated quadratic covariance.
-
-    This outputs of this kernel have the form
-    .. math::
-       \frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} f_i(t-\delta_j) +\sqrt{\kappa_j}g_j(t) - d_jy_j(t)
-
-    where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
-    
-    :param output_dim: number of outputs driven by latent function.
-    :type output_dim: int
-    :param W: sensitivities of each output to the latent driving function. 
-    :type W: ndarray (output_dim x rank).
-    :param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
-    :type rank: int
-    :param decay: decay rates for the first order system. 
-    :type decay: array of length output_dim.
-    :param delay: delay between latent force and output response.
-    :type delay: array of length output_dim.
-    :param kappa: diagonal term that allows each latent output to have an independent component to the response.
-    :type kappa: array of length output_dim.
-    
-    .. Note: see first order differential equation examples in GPy.examples.regression for some usage.
-    """
-    part = parts.eq_ode1.Eq_ode1(output_dim, W, rank, kappa, length_scale, decay, delay)
-    return kern(2, [part])
-
 
 def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
     """
diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index f021dc3a..46bb01c8 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -747,7 +747,7 @@ class Kern_check_model(Model):
         if kernel==None:
             kernel = GPy.kern.rbf(1)
         if X==None:
-            X = np.random.randn(num_samples, kernel.input_dim)
+            X = np.random.normal(size=(num_samples, kernel.input_dim))
         if dL_dK==None:
             if X2==None:
                 dL_dK = np.ones((X.shape[0], X.shape[0]))
@@ -844,7 +844,7 @@ class Kern_check_dKdiag_dX(Kern_check_model):
     def _set_params(self, x):
         self.X=x.reshape(self.X.shape)
 
-def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
+def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False, X_positive=False):
     """This function runs on kernels to check the correctness of their implementation. It checks that the covariance function is positive definite for a randomly generated data set.
 
     :param kern: the kernel to be tested.
@@ -858,12 +858,16 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
     pass_checks = True
     if X==None:
         X = np.random.randn(10, kern.input_dim)
+        if X_positive:
+            X = abs(X)
         if output_ind is not None:
-            X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
+            X[:, output_ind] = np.random.randint(kern.parts[0].output_dim, X.shape[0])
     if X2==None:
         X2 = np.random.randn(20, kern.input_dim)
+        if X_positive:
+            X2 = abs(X2)
         if output_ind is not None:
-            X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
+            X2[:, output_ind] = np.random.randint(kern.parts[0].output_dim, X2.shape[0])
 
     if verbose:
         print("Checking covariance function is positive definite.")
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index 92cad687..5d2fbeec 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -36,12 +36,12 @@ class KernelTests(unittest.TestCase):
     def test_eq_sympykernel(self):
         if SYMPY_AVAILABLE:
             kern = GPy.kern.eq_sympy(5, 3)
-            self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
+            self.assertTrue(GPy.kern.kern_test(kern, output_ind=3, verbose=verbose))
 
-    def test_eq_ode1kernel(self):
+    def test_ode1_eqkernel(self):
         if SYMPY_AVAILABLE:
-            kern = GPy.kern.eq_ode1(3)
-            self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
+            kern = GPy.kern.ode1_eq(3)
+            self.assertTrue(GPy.kern.kern_test(kern, output_ind=1, verbose=verbose, X_positive=True))
 
     def test_rbf_invkernel(self):
         kern = GPy.kern.rbf_inv(5)

From 5b1f7002389f4fb2fc4c9e75e32cfb26a4e7680d Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 22 Nov 2013 08:58:29 +0000
Subject: [PATCH 23/28] changed nasty whitespace

---
 GPy/core/mapping.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/GPy/core/mapping.py b/GPy/core/mapping.py
index 0da93c7c..5f517706 100644
--- a/GPy/core/mapping.py
+++ b/GPy/core/mapping.py
@@ -36,7 +36,6 @@ class Mapping(Parameterized):
 
     def df_dtheta(self, dL_df, X):
         """The gradient of the outputs of the multi-layer perceptron with respect to each of the parameters.
-        
         :param dL_df: gradient of the objective with respect to the function.
         :type dL_df: ndarray (num_data x output_dim)
         :param X: input locations where the function is evaluated.
@@ -44,14 +43,13 @@ class Mapping(Parameterized):
         :returns: Matrix containing gradients with respect to parameters of each output for each input data.
         :rtype: ndarray (num_params length)
         """
-        
         raise NotImplementedError
 
     def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue']):
         """
 
         Plot the mapping.
-        
+
         Plots the mapping associated with the model.
           - In one dimension, the function is plotted.
           - In two dimsensions, a contour-plot shows the function
@@ -110,7 +108,7 @@ class Mapping(Parameterized):
             for d in range(y.shape[1]):
                 ax.plot(Xnew, f[:, d], edgecol=linecol)
 
-        elif self.X.shape[1] == 2: 
+        elif self.X.shape[1] == 2:
             resolution = resolution or 50
             Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
             x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
@@ -135,14 +133,14 @@ class Mapping_check_model(Model):
             X = np.random.randn(num_samples, mapping.input_dim)
         if dL_df==None:
             dL_df = np.ones((num_samples, mapping.output_dim))
-        
+
         self.mapping=mapping
         self.X = X
         self.dL_df = dL_df
         self.num_params = self.mapping.num_params
         Model.__init__(self)
 
-        
+
     def _get_params(self):
         return self.mapping._get_params()
 
@@ -157,7 +155,7 @@ class Mapping_check_model(Model):
 
     def _log_likelihood_gradients(self):
         raise NotImplementedError, "This needs to be implemented to use the Mapping_check_model class."
-    
+
 class Mapping_check_df_dtheta(Mapping_check_model):
     """This class allows gradient checks for the gradient of a mapping with respect to parameters. """
     def __init__(self, mapping=None, dL_df=None, X=None):
@@ -175,13 +173,13 @@ class Mapping_check_df_dX(Mapping_check_model):
         if dL_df==None:
             dL_df = np.ones((self.X.shape[0],self.mapping.output_dim))
         self.num_params = self.X.shape[0]*self.mapping.input_dim
-        
+
     def _log_likelihood_gradients(self):
         return self.mapping.df_dX(self.dL_df, self.X).flatten()
 
     def _get_param_names(self):
         return ['X_'  +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
-                
+
     def _get_params(self):
         return self.X.flatten()
 

From 9feb1304091bc19b0c3d3121a90af84de36125fc Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 22 Nov 2013 08:59:29 +0000
Subject: [PATCH 24/28] formatting docstring

---
 GPy/core/mapping.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/GPy/core/mapping.py b/GPy/core/mapping.py
index 5f517706..7b2c89b9 100644
--- a/GPy/core/mapping.py
+++ b/GPy/core/mapping.py
@@ -124,7 +124,11 @@ class Mapping(Parameterized):
 from GPy.core.model import Model
 
 class Mapping_check_model(Model):
-    """This is a dummy model class used as a base class for checking that the gradients of a given mapping are implemented correctly. It enables checkgradient() to be called independently on each mapping."""
+    """
+    This is a dummy model class used as a base class for checking that the
+    gradients of a given mapping are implemented correctly. It enables
+    checkgradient() to be called independently on each mapping.
+    """
     def __init__(self, mapping=None, dL_df=None, X=None):
         num_samples = 20
         if mapping==None:

From ae0f5134c2a2d76228f6c000b7dfba64173d11b6 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 22 Nov 2013 14:36:47 +0000
Subject: [PATCH 25/28] lots of medding with the likelihoods to get the tests
 working. the tests still don;t work

---
 GPy/likelihoods/laplace.py                    |  5 +-
 .../noise_models/bernoulli_noise.py           |  2 +
 .../noise_models/gaussian_noise.py            |  2 +
 .../noise_models/noise_distributions.py       |  2 +-
 GPy/testing/likelihoods_tests.py              | 63 ++++++++++---------
 5 files changed, 44 insertions(+), 30 deletions(-)

diff --git a/GPy/likelihoods/laplace.py b/GPy/likelihoods/laplace.py
index 6a44d5b6..6941de48 100644
--- a/GPy/likelihoods/laplace.py
+++ b/GPy/likelihoods/laplace.py
@@ -278,7 +278,10 @@ class Laplace(likelihood):
         #W is diagonal so its sqrt is just the sqrt of the diagonal elements
         W_12 = np.sqrt(W)
         B = np.eye(self.N) + W_12*K*W_12.T
-        L = jitchol(B)
+        try:
+            L = jitchol(B)
+        except:
+            import ipdb; ipdb.set_trace()
 
         W12BiW12 = W_12*dpotrs(L, np.asfortranarray(W_12*a), lower=1)[0]
         ln_B_det = 2*np.sum(np.log(np.diag(L)))
diff --git a/GPy/likelihoods/noise_models/bernoulli_noise.py b/GPy/likelihoods/noise_models/bernoulli_noise.py
index 17390e55..14f4adc8 100644
--- a/GPy/likelihoods/noise_models/bernoulli_noise.py
+++ b/GPy/likelihoods/noise_models/bernoulli_noise.py
@@ -22,6 +22,8 @@ class Bernoulli(NoiseDistribution):
     """
     def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
         super(Bernoulli, self).__init__(gp_link,analytical_mean,analytical_variance)
+        if isinstance(gp_link , (gp_transformations.Heaviside, gp_transformations.Probit)):
+            self.log_concave = True
 
     def _preprocess_values(self,Y):
         """
diff --git a/GPy/likelihoods/noise_models/gaussian_noise.py b/GPy/likelihoods/noise_models/gaussian_noise.py
index fce84d27..3da6bcc8 100644
--- a/GPy/likelihoods/noise_models/gaussian_noise.py
+++ b/GPy/likelihoods/noise_models/gaussian_noise.py
@@ -24,6 +24,8 @@ class Gaussian(NoiseDistribution):
         self.N = N
         self._set_params(np.asarray(variance))
         super(Gaussian, self).__init__(gp_link,analytical_mean,analytical_variance)
+        if isinstance(gp_link , gp_transformations.Identity):
+            self.log_concave = True
 
     def _get_params(self):
         return np.array([self.variance])
diff --git a/GPy/likelihoods/noise_models/noise_distributions.py b/GPy/likelihoods/noise_models/noise_distributions.py
index 8ee7a2cd..a67d8792 100644
--- a/GPy/likelihoods/noise_models/noise_distributions.py
+++ b/GPy/likelihoods/noise_models/noise_distributions.py
@@ -33,7 +33,7 @@ class NoiseDistribution(object):
         else:
             self.predictive_variance = self._predictive_variance_numerical
 
-        self.log_concave = True
+        self.log_concave = False
 
     def _get_params(self):
         return np.zeros(0)
diff --git a/GPy/testing/likelihoods_tests.py b/GPy/testing/likelihoods_tests.py
index 8d1466fb..709fe002 100644
--- a/GPy/testing/likelihoods_tests.py
+++ b/GPy/testing/likelihoods_tests.py
@@ -186,33 +186,33 @@ class TestNoiseModels(object):
                             "laplace": True,
                             "ep": True
                             },
-                        "Gaussian_log": {
-                            "model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log(), variance=self.var, D=self.D, N=self.N),
-                            "grad_params": {
-                                "names": ["noise_model_variance"],
-                                "vals": [self.var],
-                                "constraints": [constrain_positive]
-                                },
-                            "laplace": True
-                            },
-                        "Gaussian_probit": {
-                            "model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Probit(), variance=self.var, D=self.D, N=self.N),
-                            "grad_params": {
-                                "names": ["noise_model_variance"],
-                                "vals": [self.var],
-                                "constraints": [constrain_positive]
-                                },
-                            "laplace": True
-                            },
-                        "Gaussian_log_ex": {
-                            "model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log_ex_1(), variance=self.var, D=self.D, N=self.N),
-                            "grad_params": {
-                                "names": ["noise_model_variance"],
-                                "vals": [self.var],
-                                "constraints": [constrain_positive]
-                                },
-                            "laplace": True
-                            },
+                        #"Gaussian_log": {
+                            #"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log(), variance=self.var, D=self.D, N=self.N),
+                            #"grad_params": {
+                                #"names": ["noise_model_variance"],
+                                #"vals": [self.var],
+                                #"constraints": [constrain_positive]
+                                #},
+                            #"laplace": True
+                            #},
+                        #"Gaussian_probit": {
+                            #"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Probit(), variance=self.var, D=self.D, N=self.N),
+                            #"grad_params": {
+                                #"names": ["noise_model_variance"],
+                                #"vals": [self.var],
+                                #"constraints": [constrain_positive]
+                                #},
+                            #"laplace": True
+                            #},
+                        #"Gaussian_log_ex": {
+                            #"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log_ex_1(), variance=self.var, D=self.D, N=self.N),
+                            #"grad_params": {
+                                #"names": ["noise_model_variance"],
+                                #"vals": [self.var],
+                                #"constraints": [constrain_positive]
+                                #},
+                            #"laplace": True
+                            #},
                         "Bernoulli_default": {
                             "model": GPy.likelihoods.bernoulli(),
                             "link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)],
@@ -253,6 +253,7 @@ class TestNoiseModels(object):
                 param_vals = []
                 param_names = []
                 constrain_positive = []
+                param_constraints = [] # ??? TODO: Saul to Fix.
             if "link_f_constraints" in attributes:
                 link_f_constraints = attributes["link_f_constraints"]
             else:
@@ -490,8 +491,14 @@ class TestNoiseModels(object):
             constraints[param_num](name, m)
 
         m.randomize()
-        m.checkgrad(verbose=1, step=step)
+        m.optimize(max_iters=8)
         print m
+        m.checkgrad(verbose=1, step=step)
+        if not m.checkgrad(step=step):
+            m.checkgrad(verbose=1, step=step)
+            import ipdb; ipdb.set_trace()
+            #NOTE this test appears to be stochastic for some likelihoods (student t?)
+            # appears to all be working in test mode right now...
         assert m.checkgrad(step=step)
 
     ###########

From 129917ec8c638806213368f651ee36db480a6d25 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 22 Nov 2013 14:37:14 +0000
Subject: [PATCH 26/28] removing ipdb statements

---
 GPy/testing/likelihoods_tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/testing/likelihoods_tests.py b/GPy/testing/likelihoods_tests.py
index 709fe002..191dae57 100644
--- a/GPy/testing/likelihoods_tests.py
+++ b/GPy/testing/likelihoods_tests.py
@@ -496,7 +496,7 @@ class TestNoiseModels(object):
         m.checkgrad(verbose=1, step=step)
         if not m.checkgrad(step=step):
             m.checkgrad(verbose=1, step=step)
-            import ipdb; ipdb.set_trace()
+            #import ipdb; ipdb.set_trace()
             #NOTE this test appears to be stochastic for some likelihoods (student t?)
             # appears to all be working in test mode right now...
         assert m.checkgrad(step=step)

From aa7f1d53f9aa8f8b42304b13f4dba66c9ab5e0ce Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 25 Nov 2013 11:14:04 +0000
Subject: [PATCH 27/28] fixing up the blas detectino in linalg

---
 GPy/util/linalg.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index f68e1a0b..9db769e6 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -21,9 +21,9 @@ else:
 try:
     _blaslib = ctypes.cdll.LoadLibrary(np.core._dotblas.__file__) # @UndefinedVariable
     _blas_available = True
-    assert hasattr('dsyrk_',_blaslib)
-    assert hasattr('dsyr_',_blaslib)
-except:
+    assert hasattr(_blaslib, 'dsyrk_')
+    assert hasattr(_blaslib, 'dsyr_')
+except AssertionError:
     _blas_available = False
 
 def dtrtrs(A, B, lower=0, trans=0, unitdiag=0):

From 58ffdd813e9f3b868b8ad33fa39dcea945c0395a Mon Sep 17 00:00:00 2001
From: mu <m.niu@sheffield.ac.uk>
Date: Mon, 25 Nov 2013 13:58:06 +0000
Subject: [PATCH 28/28] ODE_UY

---
 GPy/kern/parts/ODE_UY.py   | 253 +++++++++++++++++++++++++++++++++++++
 GPy/kern/parts/__init__.py |   2 +-
 2 files changed, 254 insertions(+), 1 deletion(-)
 create mode 100644 GPy/kern/parts/ODE_UY.py

diff --git a/GPy/kern/parts/ODE_UY.py b/GPy/kern/parts/ODE_UY.py
new file mode 100644
index 00000000..8e0096d2
--- /dev/null
+++ b/GPy/kern/parts/ODE_UY.py
@@ -0,0 +1,253 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+from kernpart import Kernpart
+import numpy as np
+
+def index_to_slices(index):
+    """
+    take a numpy array of integers (index) and return a  nested list of slices such that the slices describe the start, stop points for each integer in the index. 
+
+    e.g.
+    >>> index = np.asarray([0,0,0,1,1,1,2,2,2])
+    returns
+    >>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
+
+    or, a more complicated example
+    >>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
+    returns
+    >>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
+    """
+
+    #contruct the return structure
+    ind = np.asarray(index,dtype=np.int64)
+    ret = [[] for i in range(ind.max()+1)]
+
+    #find the switchpoints
+    ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
+    switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
+
+    [ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
+    return ret
+
+class ODE_UY(Kernpart):
+    """
+    kernel resultiong from a first order ODE with OU driving GP
+
+    :param input_dim: the number of input dimension, has to be equal to one
+    :type input_dim: int
+    :param input_lengthU: the number of input U length
+    :type input_dim: int   
+    :param varianceU: variance of the driving GP
+    :type varianceU: float
+    :param lengthscaleU: lengthscale of the driving GP  (sqrt(3)/lengthscaleU)
+    :type lengthscaleU: float
+    :param varianceY: 'variance' of the transfer function
+    :type varianceY: float
+    :param lengthscaleY: 'lengthscale' of the transfer function (1/lengthscaleY)
+    :type lengthscaleY: float
+    :rtype: kernel object
+
+    """
+
+
+
+
+    def __init__(self, input_dim=2,varianceU=1., varianceY=1., lengthscaleU=None, lengthscaleY=None):
+        assert input_dim==2, "Only defined for input_dim = 1"
+        self.input_dim = input_dim
+        self.num_params = 4
+        self.name = 'ODE_UY'
+
+
+        if lengthscaleU is not None:
+            lengthscaleU = np.asarray(lengthscaleU)
+            assert lengthscaleU.size == 1, "lengthscaleU should be one dimensional"
+        else:
+            lengthscaleU = np.ones(1)
+        if lengthscaleY is not None:
+            lengthscaleY = np.asarray(lengthscaleY)
+            assert lengthscaleY.size == 1, "lengthscaleY should be one dimensional"
+        else:
+            lengthscaleY = np.ones(1)
+            #lengthscaleY = 0.5
+        self._set_params(np.hstack((varianceU, varianceY, lengthscaleU,lengthscaleY)))
+
+    def _get_params(self):
+        """return the value of the parameters."""
+        return np.hstack((self.varianceU,self.varianceY, self.lengthscaleU,self.lengthscaleY))
+
+    def _set_params(self, x):
+        """set the value of the parameters."""
+        assert x.size == self.num_params
+
+        self.varianceU = x[0]
+        self.varianceY = x[1]
+        self.lengthscaleU = x[2]
+        self.lengthscaleY = x[3]
+
+
+    def _get_param_names(self):
+        """return parameter names."""
+        return ['varianceU','varianceY', 'lengthscaleU', 'lengthscaleY']
+
+
+    def K(self, X, X2, target):
+        """Compute the covariance matrix between X and X2."""
+
+        X,slices = X[:,:-1],index_to_slices(X[:,-1])
+        if X2 is None:
+            X2,slices2 = X,slices
+        else:
+            X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
+
+
+        #rdist = X[:,0][:,None] - X2[:,0][:,None].T
+        rdist = X - X2.T
+        ly=1/self.lengthscaleY
+        lu=np.sqrt(3)/self.lengthscaleU
+        #iu=self.input_lengthU  #dimention of U
+        
+        Vu=self.varianceU
+        Vy=self.varianceY
+
+        kuu = lambda dist:Vu * (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist))
+
+        k1 = lambda dist:np.exp(-ly*np.abs(dist))*(2*lu+ly)/(lu+ly)**2
+        k2 = lambda dist:(np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2 
+        k3 = lambda dist:np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
+        kyy = lambda dist:Vu*Vy*(k1(dist) + k2(dist) + k3(dist))
+
+        kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly)))
+        kyup = lambda dist:Vu*Vy*(k1(dist)+k2(dist))    #t>0 kyu
+        kyun = lambda dist:Vu*Vy*(kyu3(dist))       #t<0 kyu
+
+        kuyp = lambda dist:Vu*Vy*(kyu3(dist))       #t>0 kuy
+        kuyn = lambda dist:Vu*Vy*(k1(dist)+k2(dist))      #t<0 kuy
+        
+        for i, s1 in enumerate(slices):
+            for j, s2 in enumerate(slices2):
+                for ss1 in s1:
+                    for ss2 in s2:
+                        if i==0 and j==0:
+                            target[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2]))
+                        elif i==0 and j==1:
+                            target[ss1,ss2] = np.where(  rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) )   )
+                        elif i==1 and j==1:
+                            target[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2]))
+                        else:
+                            target[ss1,ss2] = np.where(  rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[s1[0],s2[0]]) )   )
+
+
+        #KUU = kuu(np.abs(rdist[:iu,:iu]))
+
+        #KYY = kyy(np.abs(rdist[iu:,iu:]))
+
+        #KYU = np.where(rdist[iu:,:iu]>0,kyup(np.abs(rdist[iu:,:iu])),kyun(np.abs(rdist[iu:,:iu]) ))
+
+        #KUY = np.where(rdist[:iu,iu:]>0,kuyp(np.abs(rdist[:iu,iu:])),kuyn(np.abs(rdist[:iu,iu:]) ))
+
+        #ker=np.vstack((np.hstack([KUU,KUY]),np.hstack([KYU,KYY])))
+
+        #np.add(ker, target, target)
+
+    def Kdiag(self, X, target):
+        """Compute the diagonal of the covariance matrix associated to X."""
+        ly=1/self.lengthscaleY
+        lu=np.sqrt(3)/self.lengthscaleU
+        #ly=self.lengthscaleY
+        #lu=self.lengthscaleU
+        
+        k1 = (2*lu+ly)/(lu+ly)**2
+        k2 = (ly-2*lu + 2*lu-ly ) / (ly-lu)**2 
+        k3 = 1/(lu+ly) + (lu)/(lu+ly)**2 
+
+        slices = index_to_slices(X[:,-1])
+
+        for i, ss1 in enumerate(slices):
+            for s1 in ss1:
+                if i==0:
+                    target[s1]+= self.varianceU 
+                elif i==1:
+                    target[s1]+= self.varianceU*self.varianceY*(k1+k2+k3)
+                else:
+                    raise ValueError, "invalid input/output index"
+        
+        #target[slices[0][0]]+= self.varianceU   #matern32 diag
+        #target[slices[1][0]]+= self.varianceU*self.varianceY*(k1+k2+k3)  #  diag
+
+
+
+
+
+
+    def dK_dtheta(self, dL_dK, X, X2, target):
+        """derivative of the covariance matrix with respect to the parameters."""
+        if X2 is None: X2 = X
+        dist = np.abs(X - X2.T)
+
+        ly=1/self.lengthscaleY
+        lu=np.sqrt(3)/self.lengthscaleU
+        #ly=self.lengthscaleY
+        #lu=self.lengthscaleU
+
+        dk1theta1 = lambda dist: np.exp(-ly*dist)*2*(-lu)/(lu+ly)**3
+        #c=np.sqrt(3)
+        #t1=c/lu
+        #t2=1/ly
+        #dk1theta1=np.exp(-dist*ly)*t2*( (2*c*t2+2*t1)/(c*t2+t1)**2 -2*(2*c*t2*t1+t1**2)/(c*t2+t1)**3   )
+
+        dk2theta1 = lambda dist: 1*( 
+            np.exp(-lu*dist)*dist*(-ly+2*lu-lu*ly*dist+dist*lu**2)*(ly-lu)**(-2) + np.exp(-lu*dist)*(-2+ly*dist-2*dist*lu)*(ly-lu)**(-2) 
+            +np.exp(-dist*lu)*(ly-2*lu+ly*lu*dist-dist*lu**2)*2*(ly-lu)**(-3) 
+            +np.exp(-dist*ly)*2*(ly-lu)**(-2)
+            +np.exp(-dist*ly)*2*(2*lu-ly)*(ly-lu)**(-3)
+            )
+      
+        dk3theta1 = lambda dist: np.exp(-dist*lu)*(lu+ly)**(-2)*((2*lu+ly+dist*lu**2+lu*ly*dist)*(-dist-2/(lu+ly))+2+2*lu*dist+ly*dist)
+
+        dktheta1 = lambda dist: self.varianceU*self.varianceY*(dk1theta1+dk2theta1+dk3theta1)
+
+
+
+
+        dk1theta2 = lambda dist: np.exp(-ly*dist) * ((lu+ly)**(-2)) * (  (-dist)*(2*lu+ly)  +  1  +  (-2)*(2*lu+ly)/(lu+ly)  )
+
+        dk2theta2 =lambda dist:  1*(
+            np.exp(-dist*lu)*(ly-lu)**(-2) * ( 1+lu*dist+(-2)*(ly-2*lu+lu*ly*dist-dist*lu**2)*(ly-lu)**(-1) )
+            +np.exp(-dist*ly)*(ly-lu)**(-2) * ( (-dist)*(2*lu-ly) -1+(2*lu-ly)*(-2)*(ly-lu)**(-1) )
+            )
+
+        dk3theta2 = lambda dist: np.exp(-dist*lu) * (-3*lu-ly-dist*lu**2-lu*ly*dist)/(lu+ly)**3
+
+        dktheta2 = lambda dist: self.varianceU*self.varianceY*(dk1theta2 + dk2theta2 +dk3theta2)
+
+
+
+        k1 = lambda dist: np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
+        k2 = lambda dist: (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2 
+        k3 = lambda dist: np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
+        dkdvar = k1+k2+k3
+
+        target[0] += np.sum(self.varianceY*dkdvar * dL_dK)
+        target[1] += np.sum(self.varianceU*dkdvar * dL_dK)
+        target[2] += np.sum(dktheta1*(-np.sqrt(3)*self.lengthscaleU**(-2)) * dL_dK)
+        target[3] += np.sum(dktheta2*(-self.lengthscaleY**(-2)) * dL_dK)
+
+
+    # def dKdiag_dtheta(self, dL_dKdiag, X, target):
+    #     """derivative of the diagonal of the covariance matrix with respect to the parameters."""
+    #     # NB: derivative of diagonal elements wrt lengthscale is 0
+    #     target[0] += np.sum(dL_dKdiag)
+
+    # def dK_dX(self, dL_dK, X, X2, target):
+    #     """derivative of the covariance matrix with respect to X."""
+    #     if X2 is None: X2 = X
+    #     dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
+    #     ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
+    #     dK_dX = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
+    #     target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
+
+    # def dKdiag_dX(self, dL_dKdiag, X, target):
+    #     pass
diff --git a/GPy/kern/parts/__init__.py b/GPy/kern/parts/__init__.py
index f278941a..d8e7f8e6 100644
--- a/GPy/kern/parts/__init__.py
+++ b/GPy/kern/parts/__init__.py
@@ -14,7 +14,7 @@ import Matern32
 import Matern52
 import mlp
 import ODE_1
-#import ODE_UY
+import ODE_UY
 import periodic_exponential
 import periodic_Matern32
 import periodic_Matern52