[huge merge] the second

This commit is contained in:
Max Zwiessele 2014-11-21 16:42:01 +00:00
parent 180650ec85
commit 187f85c239
35 changed files with 40 additions and 3018 deletions

2
.gitignore vendored
View file

@ -45,4 +45,4 @@ iterate.dat
# git merge files #
###################
*.orig
*.orig

View file

@ -2,14 +2,14 @@ language: python
python:
- "2.7"
#Set virtual env with system-site-packages to true
virtualenv:
system_site_packages: true
# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
before_install:
- sudo apt-get install -qq python-scipy python-pip
- sudo apt-get install -qq python-matplotlib
#Install a mini version of anaconda such that we can easily install our dependencies
- wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
- chmod +x miniconda.sh
- ./miniconda.sh -b
- export PATH=/home/travis/miniconda/bin:$PATH
- conda update --yes conda
# Workaround for a permissions issue with Travis virtual machine images
# that breaks Python's multiprocessing:
# https://github.com/travis-ci/travis-cookbooks/issues/155
@ -17,11 +17,10 @@ before_install:
- sudo ln -s /run/shm /dev/shm
install:
- pip install --upgrade numpy==1.7.1
- pip install sphinx
- pip install nose
- pip install . --use-mirrors
- conda install --yes python=$TRAVIS_PYTHON_VERSION atlas numpy=1.7 scipy=0.12 matplotlib nose sphinx pip nose
- pip install .
#--use-mirrors
#
# command to run tests, e.g. python setup.py test
script:
script:
- nosetests GPy/testing
#- yes | nosetests GPy/testing

View file

@ -1,7 +0,0 @@
# This is the configuration file for GPy
[parallel]
# Enable openmp support. This speeds up some computations, depending on the number
# of cores available. Setting up a compiler with openmp support can be difficult on
# some platforms, hence this option.
openmp=False

View file

@ -1 +1,2 @@
from scg import SCG
from optimization import *

View file

@ -225,11 +225,13 @@ class opt_SCG(Optimizer):
self.status = opt_result[3]
def get_optimizer(f_min):
from sgd import opt_SGD
optimizers = {'fmin_tnc': opt_tnc,
'simplex': opt_simplex,
'lbfgsb': opt_lbfgsb,
'scg': opt_SCG,
}
'sgd': opt_SGD}
if rasm_available:
optimizers['rasmussen'] = opt_rasm

View file

@ -1,9 +1,7 @@
#include "Python.h"
#include <math.h>
#include <float.h>
#include <stdlib.h>
#include <iostream>
#include <stdexcept>
double DiracDelta(double x){
// TODO: this doesn't seem to be a dirac delta ... should return infinity. Neil
if((x<0.000001) & (x>-0.000001))//go on, laugh at my c++ skills
@ -16,7 +14,6 @@ double DiracDelta(double x,int foo){
};
double sinc(double x){
// compute the sinc function
if (x==0)
return 1.0;
else
@ -24,39 +21,28 @@ double sinc(double x){
}
double sinc_grad(double x){
// compute the gradient of the sinc function.
if (x==0)
return 0.0;
else
return (x*cos(x) - sin(x))/(x*x);
}
double erfcx(double x){
// Based on code by Soren Hauberg 2010 for Octave.
// compute the scaled complex error function.
//return erfc(x)*exp(x*x);
double xneg=-sqrt(log(DBL_MAX/2));
double xmax = 1/(sqrt(M_PI)*DBL_MIN);
xmax = DBL_MAX<xmax ? DBL_MAX : xmax;
// Find values where erfcx can be evaluated
double t = 3.97886080735226 / (fabs(x) + 3.97886080735226);
double t = 3.97886080735226 / (abs(x) + 3.97886080735226);
double u = t-0.5;
double y = (((((((((u * 0.00127109764952614092 + 1.19314022838340944e-4) * u
- 0.003963850973605135) * u - 8.70779635317295828e-4) * u
+ 0.00773672528313526668) * u + 0.00383335126264887303) * u
- 0.0127223813782122755) * u - 0.0133823644533460069) * u
+ 0.0161315329733252248) * u + 0.0390976845588484035) * u + 0.00249367200053503304;
y = ((((((((((((y * u - 0.0838864557023001992) * u -
0.119463959964325415) * u + 0.0166207924969367356) * u +
0.357524274449531043) * u + 0.805276408752910567) * u +
1.18902982909273333) * u + 1.37040217682338167) * u +
1.31314653831023098) * u + 1.07925515155856677) * u +
0.774368199119538609) * u + 0.490165080585318424) * u +
0.275374741597376782) * t;
- 0.003963850973605135) * u - 8.70779635317295828e-4) * u
+ 0.00773672528313526668) * u + 0.00383335126264887303) * u
- 0.0127223813782122755) * u - 0.0133823644533460069) * u
+ 0.0161315329733252248) * u + 0.0390976845588484035) * u + 0.00249367200053503304;
if (x<xneg)
return -INFINITY;
else if (x<0)
return 2.0*exp(x*x)-y;
return 2*exp(x*x)-y;
else if (x>xmax)
return 0.0;
else
@ -64,133 +50,12 @@ double erfcx(double x){
}
double ln_diff_erf(double x0, double x1){
// stably compute the log of difference between two erfs.
if (x1>x0){
PyErr_SetString(PyExc_RuntimeError,"second argument must be smaller than or equal to first in ln_diff_erf");
throw 1;
}
if (x0==x1){
PyErr_WarnEx(PyExc_RuntimeWarning,"divide by zero encountered in log", 1);
return -INFINITY;
}
else if(x0<0 && x1>0 || x0>0 && x1<0) //x0 and x1 have opposite signs
if (x0==x1)
return INFINITY;
else if(x0<0 && x1>0 || x0>0 && x1<0)
return log(erf(x0)-erf(x1));
else if(x0>0) //x0 positive, x1 non-negative
return log(erfcx(x1)-erfcx(x0)*exp(x1*x1- x0*x0))-x1*x1;
else //x0 and x1 non-positive
else if(x1>0)
return log(erfcx(x1)-erfcx(x0)*exp(x1*x1)- x0*x0)-x1*x1;
else
return log(erfcx(-x0)-erfcx(-x1)*exp(x0*x0 - x1*x1))-x0*x0;
}
// TODO: For all these computations of h things are very efficient at the moment. Need to recode sympykern to allow the precomputations to take place and all the gradients to be computed in one function. Not sure of best way forward for that yet. Neil
double h(double t, double tprime, double d_i, double d_j, double l){
// Compute the h function for the sim covariance.
double half_l_di = 0.5*l*d_i;
double arg_1 = half_l_di + tprime/l;
double arg_2 = half_l_di - (t-tprime)/l;
double ln_part_1 = ln_diff_erf(arg_1, arg_2);
arg_2 = half_l_di - t/l;
double sign_val = 1.0;
if(t/l==0)
sign_val = 0.0;
else if (t/l < 0)
sign_val = -1.0;
arg_2 = half_l_di - t/l;
double ln_part_2 = ln_diff_erf(half_l_di, arg_2);
// if either ln_part_1 or ln_part_2 are -inf, don't bother computing rest of that term.
double part_1 = 0.0;
if(isfinite(ln_part_1))
part_1 = sign_val*exp(half_l_di*half_l_di - d_i*(t-tprime) + ln_part_1 - log(d_i + d_j));
double part_2 = 0.0;
if(isfinite(ln_part_2))
part_2 = sign_val*exp(half_l_di*half_l_di - d_i*t - d_j*tprime + ln_part_2 - log(d_i + d_j));
return part_1 - part_2;
}
double dh_dd_i(double t, double tprime, double d_i, double d_j, double l){
double diff_t = (t-tprime);
double l2 = l*l;
double hv = h(t, tprime, d_i, d_j, l);
double half_l_di = 0.5*l*d_i;
double arg_1 = half_l_di + tprime/l;
double arg_2 = half_l_di - (t-tprime)/l;
double ln_part_1 = ln_diff_erf(arg_1, arg_2);
arg_1 = half_l_di;
arg_2 = half_l_di - t/l;
double sign_val = 1.0;
if(t/l==0)
sign_val = 0.0;
else if (t/l < 0)
sign_val = -1.0;
double ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l);
double base = (0.5*d_i*l2*(d_i+d_j)-1)*hv;
if(isfinite(ln_part_1))
base -= diff_t*sign_val*exp(half_l_di*half_l_di
-d_i*diff_t
+ln_part_1);
if(isfinite(ln_part_2))
base += t*sign_val*exp(half_l_di*half_l_di
-d_i*t-d_j*tprime
+ln_part_2);
base += l/sqrt(M_PI)*(-exp(-diff_t*diff_t/l2)
+exp(-tprime*tprime/l2-d_i*t)
+exp(-t*t/l2-d_j*tprime)
-exp(-(d_i*t + d_j*tprime)));
return base/(d_i+d_j);
}
double dh_dd_j(double t, double tprime, double d_i, double d_j, double l){
double half_l_di = 0.5*l*d_i;
double hv = h(t, tprime, d_i, d_j, l);
double sign_val = 1.0;
if(t/l==0)
sign_val = 0.0;
else if (t/l < 0)
sign_val = -1.0;
double ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l);
double base = -hv;
if(isfinite(ln_part_2))
base += tprime*sign_val*exp(half_l_di*half_l_di-(d_i*t+d_j*tprime)+ln_part_2);
return base/(d_i+d_j);
}
double dh_dl(double t, double tprime, double d_i, double d_j, double l){
// compute gradient of h function with respect to lengthscale for sim covariance
// TODO a lot of energy wasted recomputing things here, need to do this in a shared way somehow ... perhaps needs rewrite of sympykern.
double half_l_di = 0.5*l*d_i;
double arg_1 = half_l_di + tprime/l;
double arg_2 = half_l_di - (t-tprime)/l;
double ln_part_1 = ln_diff_erf(arg_1, arg_2);
arg_2 = half_l_di - t/l;
double ln_part_2 = ln_diff_erf(half_l_di, arg_2);
double diff_t = t - tprime;
double l2 = l*l;
double hv = h(t, tprime, d_i, d_j, l);
return 0.5*d_i*d_i*l*hv + 2/(sqrt(M_PI)*(d_i+d_j))*((-diff_t/l2-d_i/2)*exp(-diff_t*diff_t/l2)+(-tprime/l2+d_i/2)*exp(-tprime*tprime/l2-d_i*t)-(-t/l2-d_i/2)*exp(-t*t/l2-d_j*tprime)-d_i/2*exp(-(d_i*t+d_j*tprime)));
}
double dh_dt(double t, double tprime, double d_i, double d_j, double l){
// compute gradient of h function with respect to t.
double diff_t = t - tprime;
double half_l_di = 0.5*l*d_i;
double arg_1 = half_l_di + tprime/l;
double arg_2 = half_l_di - diff_t/l;
double ln_part_1 = ln_diff_erf(arg_1, arg_2);
arg_2 = half_l_di - t/l;
double ln_part_2 = ln_diff_erf(half_l_di, arg_2);
return (d_i*exp(ln_part_2-d_i*t - d_j*tprime) - d_i*exp(ln_part_1-d_i*diff_t) + 2*exp(-d_i*diff_t - pow(half_l_di - diff_t/l, 2))/(sqrt(M_PI)*l) - 2*exp(-d_i*t - d_j*tprime - pow(half_l_di - t/l,2))/(sqrt(M_PI)*l))*exp(half_l_di*half_l_di)/(d_i + d_j);
}
double dh_dtprime(double t, double tprime, double d_i, double d_j, double l){
// compute gradient of h function with respect to tprime.
double diff_t = t - tprime;
double half_l_di = 0.5*l*d_i;
double arg_1 = half_l_di + tprime/l;
double arg_2 = half_l_di - diff_t/l;
double ln_part_1 = ln_diff_erf(arg_1, arg_2);
arg_2 = half_l_di - t/l;
double ln_part_2 = ln_diff_erf(half_l_di, arg_2);
return (d_i*exp(ln_part_1-d_i*diff_t) + d_j*exp(ln_part_2-d_i*t - d_j*tprime) + (-2*exp(-pow(half_l_di - diff_t/l,2)) + 2*exp(-pow(half_l_di + tprime/l,2)))*exp(-d_i*diff_t)/(sqrt(M_PI)*l))*exp(half_l_di*half_l_di)/(d_i + d_j);
}

View file

@ -7,10 +7,3 @@ double sinc_grad(double x);
double erfcx(double x);
double ln_diff_erf(double x0, double x1);
double h(double t, double tprime, double d_i, double d_j, double l);
double dh_dl(double t, double tprime, double d_i, double d_j, double l);
double dh_dd_i(double t, double tprime, double d_i, double d_j, double l);
double dh_dd_j(double t, double tprime, double d_i, double d_j, double l);
double dh_dt(double t, double tprime, double d_i, double d_j, double l);
double dh_dtprime(double t, double tprime, double d_i, double d_j, double l);

View file

@ -137,11 +137,7 @@ class ODE_1(Kernpart):
k2 = (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
dkdvar = k1+k2+k3
#target[0] dk dvarU
#target[1] dk dvarY
#target[2] dk d theta1
#target[3] dk d theta2
target[0] += np.sum(self.varianceY*dkdvar * dL_dK)
target[1] += np.sum(self.varianceU*dkdvar * dL_dK)
target[2] += np.sum(dktheta1*(-np.sqrt(3)*self.lengthscaleU**(-2)) * dL_dK)

View file

@ -1,6 +1,7 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from IPython.core.debugger import Tracer; debug_here=Tracer()
from kernpart import Kernpart
import numpy as np
from ...util.linalg import tdot

View file

@ -45,7 +45,7 @@ class Symmetric(Kernpart):
AX = np.dot(X,self.transform)
if X2 is None:
X2 = X
AX2 = AX
ZX2 = AX
else:
AX2 = np.dot(X2, self.transform)
self.k._param_grad_helper(dL_dK,X,X2,target)

View file

@ -1,335 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
def index_to_slices(index):
"""
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
e.g.
>>> index = np.asarray([0,0,0,1,1,1,2,2,2])
returns
>>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
or, a more complicated example
>>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
returns
>>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
"""
#contruct the return structure
ind = np.asarray(index,dtype=np.int64)
ret = [[] for i in range(ind.max()+1)]
#find the switchpoints
ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
return ret
class ODE_UY(Kernpart):
"""
kernel resultiong from a first order ODE with OU driving GP
:param input_dim: the number of input dimension, has to be equal to one
:type input_dim: int
:param input_lengthU: the number of input U length
:type input_dim: int
:param varianceU: variance of the driving GP
:type varianceU: float
:param lengthscaleU: lengthscale of the driving GP (sqrt(3)/lengthscaleU)
:type lengthscaleU: float
:param varianceY: 'variance' of the transfer function
:type varianceY: float
:param lengthscaleY: 'lengthscale' of the transfer function (1/lengthscaleY)
:type lengthscaleY: float
:rtype: kernel object
"""
def __init__(self, input_dim=2,varianceU=1., varianceY=1., lengthscaleU=None, lengthscaleY=None):
assert input_dim==2, "Only defined for input_dim = 1"
self.input_dim = input_dim
self.num_params = 4
self.name = 'ODE_UY'
if lengthscaleU is not None:
lengthscaleU = np.asarray(lengthscaleU)
assert lengthscaleU.size == 1, "lengthscaleU should be one dimensional"
else:
lengthscaleU = np.ones(1)
if lengthscaleY is not None:
lengthscaleY = np.asarray(lengthscaleY)
assert lengthscaleY.size == 1, "lengthscaleY should be one dimensional"
else:
lengthscaleY = np.ones(1)
#lengthscaleY = 0.5
self._set_params(np.hstack((varianceU, varianceY, lengthscaleU,lengthscaleY)))
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.varianceU,self.varianceY, self.lengthscaleU,self.lengthscaleY))
def _set_params(self, x):
"""set the value of the parameters."""
assert x.size == self.num_params
self.varianceU = x[0]
self.varianceY = x[1]
self.lengthscaleU = x[2]
self.lengthscaleY = x[3]
def _get_param_names(self):
"""return parameter names."""
return ['varianceU','varianceY', 'lengthscaleU', 'lengthscaleY']
def K(self, X, X2, target):
"""Compute the covariance matrix between X and X2."""
# model : a * dy/dt + b * y = U
#lu=sqrt(3)/theta1 ly=1/theta2 theta2= a/b :thetay sigma2=1/(2ab) :sigmay
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
#rdist = X[:,0][:,None] - X2[:,0][:,None].T
rdist = X - X2.T
ly=1/self.lengthscaleY
lu=np.sqrt(3)/self.lengthscaleU
#iu=self.input_lengthU #dimention of U
Vu=self.varianceU
Vy=self.varianceY
# kernel for kuu matern3/2
kuu = lambda dist:Vu * (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist))
# kernel for kyy
k1 = lambda dist:np.exp(-ly*np.abs(dist))*(2*lu+ly)/(lu+ly)**2
k2 = lambda dist:(np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = lambda dist:np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
kyy = lambda dist:Vu*Vy*(k1(dist) + k2(dist) + k3(dist))
# cross covariance function
kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly)))
# cross covariance kyu
kyup = lambda dist:Vu*Vy*(k1(dist)+k2(dist)) #t>0 kyu
kyun = lambda dist:Vu*Vy*(kyu3(dist)) #t<0 kyu
# cross covariance kuy
kuyp = lambda dist:Vu*Vy*(kyu3(dist)) #t>0 kuy
kuyn = lambda dist:Vu*Vy*(k1(dist)+k2(dist)) #t<0 kuy
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
target[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2]))
elif i==0 and j==1:
#target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) )
target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[ss1,ss2]) ) )
elif i==1 and j==1:
target[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2]))
else:
#target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[s1[0],s2[0]]) ) )
target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[ss1,ss2]) ) )
#KUU = kuu(np.abs(rdist[:iu,:iu]))
#KYY = kyy(np.abs(rdist[iu:,iu:]))
#KYU = np.where(rdist[iu:,:iu]>0,kyup(np.abs(rdist[iu:,:iu])),kyun(np.abs(rdist[iu:,:iu]) ))
#KUY = np.where(rdist[:iu,iu:]>0,kuyp(np.abs(rdist[:iu,iu:])),kuyn(np.abs(rdist[:iu,iu:]) ))
#ker=np.vstack((np.hstack([KUU,KUY]),np.hstack([KYU,KYY])))
#np.add(ker, target, target)
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix associated to X."""
ly=1/self.lengthscaleY
lu=np.sqrt(3)/self.lengthscaleU
#ly=self.lengthscaleY
#lu=self.lengthscaleU
k1 = (2*lu+ly)/(lu+ly)**2
k2 = (ly-2*lu + 2*lu-ly ) / (ly-lu)**2
k3 = 1/(lu+ly) + (lu)/(lu+ly)**2
slices = index_to_slices(X[:,-1])
for i, ss1 in enumerate(slices):
for s1 in ss1:
if i==0:
target[s1]+= self.varianceU
elif i==1:
target[s1]+= self.varianceU*self.varianceY*(k1+k2+k3)
else:
raise ValueError, "invalid input/output index"
#target[slices[0][0]]+= self.varianceU #matern32 diag
#target[slices[1][0]]+= self.varianceU*self.varianceY*(k1+k2+k3) # diag
def dK_dtheta(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters."""
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
#rdist = X[:,0][:,None] - X2[:,0][:,None].T
rdist = X - X2.T
ly=1/self.lengthscaleY
lu=np.sqrt(3)/self.lengthscaleU
rd=rdist.shape[0]
dktheta1 = np.zeros([rd,rd])
dktheta2 = np.zeros([rd,rd])
dkUdvar = np.zeros([rd,rd])
dkYdvar = np.zeros([rd,rd])
# dk dtheta for UU
UUdtheta1 = lambda dist: np.exp(-lu* dist)*dist + (-dist)*np.exp(-lu* dist)*(1+lu*dist)
UUdtheta2 = lambda dist: 0
#UUdvar = lambda dist: (1 + lu*dist)*np.exp(-lu*dist)
UUdvar = lambda dist: (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist))
# dk dtheta for YY
dk1theta1 = lambda dist: np.exp(-ly*dist)*2*(-lu)/(lu+ly)**3
#c=np.sqrt(3)
#t1=c/lu
#t2=1/ly
#dk1theta1=np.exp(-dist*ly)*t2*( (2*c*t2+2*t1)/(c*t2+t1)**2 -2*(2*c*t2*t1+t1**2)/(c*t2+t1)**3 )
dk2theta1 = lambda dist: 1*(
np.exp(-lu*dist)*dist*(-ly+2*lu-lu*ly*dist+dist*lu**2)*(ly-lu)**(-2) + np.exp(-lu*dist)*(-2+ly*dist-2*dist*lu)*(ly-lu)**(-2)
+np.exp(-dist*lu)*(ly-2*lu+ly*lu*dist-dist*lu**2)*2*(ly-lu)**(-3)
+np.exp(-dist*ly)*2*(ly-lu)**(-2)
+np.exp(-dist*ly)*2*(2*lu-ly)*(ly-lu)**(-3)
)
dk3theta1 = lambda dist: np.exp(-dist*lu)*(lu+ly)**(-2)*((2*lu+ly+dist*lu**2+lu*ly*dist)*(-dist-2/(lu+ly))+2+2*lu*dist+ly*dist)
#dktheta1 = lambda dist: self.varianceU*self.varianceY*(dk1theta1+dk2theta1+dk3theta1)
dk1theta2 = lambda dist: np.exp(-ly*dist) * ((lu+ly)**(-2)) * ( (-dist)*(2*lu+ly) + 1 + (-2)*(2*lu+ly)/(lu+ly) )
dk2theta2 =lambda dist: 1*(
np.exp(-dist*lu)*(ly-lu)**(-2) * ( 1+lu*dist+(-2)*(ly-2*lu+lu*ly*dist-dist*lu**2)*(ly-lu)**(-1) )
+np.exp(-dist*ly)*(ly-lu)**(-2) * ( (-dist)*(2*lu-ly) -1+(2*lu-ly)*(-2)*(ly-lu)**(-1) )
)
dk3theta2 = lambda dist: np.exp(-dist*lu) * (-3*lu-ly-dist*lu**2-lu*ly*dist)/(lu+ly)**3
#dktheta2 = lambda dist: self.varianceU*self.varianceY*(dk1theta2 + dk2theta2 +dk3theta2)
# kyy kernel
#k1 = lambda dist: np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
#k2 = lambda dist: (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
#k3 = lambda dist: np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
k1 = lambda dist: np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
k2 = lambda dist: (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = lambda dist: np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
#dkdvar = k1+k2+k3
#cross covariance kernel
kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly)))
# dk dtheta for UY
dkcrtheta2 = lambda dist: np.exp(-lu*dist) * ( (-1)*(lu+ly)**(-2)*(1+lu*dist+lu*(lu+ly)**(-1)) + (lu+ly)**(-1)*(-lu)*(lu+ly)**(-2) )
dkcrtheta1 = lambda dist: np.exp(-lu*dist)*(lu+ly)**(-1)* ( (-dist)*(1+dist*lu+lu*(lu+ly)**(-1)) - (lu+ly)**(-1)*(1+dist*lu+lu*(lu+ly)**(-1)) +dist+(lu+ly)**(-1)-lu*(lu+ly)**(-2) )
#dkuyp dtheta
#dkuyp dtheta1 = self.varianceU*self.varianceY* (dk1theta1() + dk2theta1())
#dkuyp dtheta2 = self.varianceU*self.varianceY* (dk1theta2() + dk2theta2())
#dkuyp dVar = k1() + k2()
#dkyup dtheta
#dkyun dtheta1 = self.varianceU*self.varianceY* (dk1theta1() + dk2theta1())
#dkyun dtheta2 = self.varianceU*self.varianceY* (dk1theta2() + dk2theta2())
#dkyup dVar = k1() + k2() #
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
#target[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2]))
dktheta1[ss1,ss2] = self.varianceU*self.varianceY*UUdtheta1(np.abs(rdist[ss1,ss2]))
dktheta2[ss1,ss2] = 0
dkUdvar[ss1,ss2] = UUdvar(np.abs(rdist[ss1,ss2]))
dkYdvar[ss1,ss2] = 0
elif i==0 and j==1:
#target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) )
#dktheta1[ss1,ss2] =
#dktheta2[ss1,ss2] =
#dkdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) )
dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , dkcrtheta1(np.abs(rdist[ss1,ss2])) ,self.varianceU*self.varianceY*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) )
dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , dkcrtheta2(np.abs(rdist[ss1,ss2])) ,self.varianceU*self.varianceY*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) )
dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyu3(np.abs(rdist[ss1,ss2])) ,k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2])) )
dkYdvar[ss1,ss2] = dkUdvar[ss1,ss2]
elif i==1 and j==1:
#target[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2]))
dktheta1[ss1,ss2] = self.varianceU*self.varianceY*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))+dk3theta1(np.abs(rdist[ss1,ss2])))
dktheta2[ss1,ss2] = self.varianceU*self.varianceY*(dk1theta2(np.abs(rdist[ss1,ss2])) + dk2theta2(np.abs(rdist[ss1,ss2])) +dk3theta2(np.abs(rdist[ss1,ss2])))
dkUdvar[ss1,ss2] = (k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) )
dkYdvar[ss1,ss2] = dkUdvar[ss1,ss2]
else:
#target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[s1[0],s2[0]]) ) )
dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.varianceU*self.varianceY*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) , dkcrtheta1(np.abs(rdist[ss1,ss2])) )
dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.varianceU*self.varianceY*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) , dkcrtheta2(np.abs(rdist[ss1,ss2])) )
dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2])), kyu3(np.abs(rdist[ss1,ss2])) )
dkYdvar[ss1,ss2] = dkUdvar[ss1,ss2]
target[0] += np.sum(self.varianceY*dkUdvar * dL_dK)
target[1] += np.sum(self.varianceU*dkYdvar * dL_dK)
target[2] += np.sum(dktheta1*(-np.sqrt(3)*self.lengthscaleU**(-2)) * dL_dK)
target[3] += np.sum(dktheta2*(-self.lengthscaleY**(-2)) * dL_dK)
# def dKdiag_dtheta(self, dL_dKdiag, X, target):
# """derivative of the diagonal of the covariance matrix with respect to the parameters."""
# # NB: derivative of diagonal elements wrt lengthscale is 0
# target[0] += np.sum(dL_dKdiag)
# def dK_dX(self, dL_dK, X, X2, target):
# """derivative of the covariance matrix with respect to X."""
# if X2 is None: X2 = X
# dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
# ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
# dK_dX = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
# target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
# def dKdiag_dX(self, dL_dKdiag, X, target):
# pass

View file

@ -1,71 +0,0 @@
# Code for testing functions written in sympy_helpers.cpp
from scipy import weave
import tempfile
import os
import numpy as np
current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
extra_compile_args = []
weave_kwargs = {
'support_code': "",
'include_dirs':[tempfile.gettempdir(), current_dir],
'headers':['"parts/sympy_helpers.h"'],
'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")],
'extra_compile_args':extra_compile_args,
'extra_link_args':['-lgomp'],
'verbose':True}
def erfcx(x):
code = """
// Code for computing scaled complementary erf
int i;
int dim;
int elements = Ntarget[0];
for (dim=1; dim<Dtarget; dim++)
elements *= Ntarget[dim];
for (i=0;i<elements;i++)
target[i] = erfcx(x[i]);
"""
x = np.asarray(x)
arg_names = ['target','x']
target = np.zeros_like(x)
weave.inline(code=code, arg_names=arg_names,**weave_kwargs)
return target
def ln_diff_erf(x, y):
code = """
// Code for computing scaled complementary erf
int i;
int dim;
int elements = Ntarget[0];
for (dim=1; dim<Dtarget; dim++)
elements *= Ntarget[dim];
for (i=0;i<elements;i++)
target[i] = ln_diff_erf(x[i], y[i]);
"""
x = np.asarray(x)
y = np.asarray(y)
assert(x.shape==y.shape)
target = np.zeros_like(x)
arg_names = ['target','x', 'y']
weave.inline(code=code, arg_names=arg_names,**weave_kwargs)
return target
def h(t, tprime, d_i, d_j, l):
code = """
// Code for computing the 1st order ODE h helper function.
int i;
int dim;
int elements = Ntarget[0];
for (dim=1; dim<Dtarget; dim++)
elements *= Ntarget[dim];
for (i=0;i<elements;i++)
target[i] = h(t[i], tprime[i], d_i, d_j, l);
"""
t = np.asarray(t)
tprime = np.asarray(tprime)
assert(tprime.shape==t.shape)
target = np.zeros_like(t)
arg_names = ['target','t', 'tprime', 'd_i', 'd_j', 'l']
weave.inline(code=code, arg_names=arg_names,**weave_kwargs)
return target

View file

@ -1,406 +0,0 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#
#Parts of this file were influenced by the Matlab GPML framework written by
#Carl Edward Rasmussen & Hannes Nickisch, however all bugs are our own.
#
#The GPML code is released under the FreeBSD License.
#Copyright (c) 2005-2013 Carl Edward Rasmussen & Hannes Nickisch. All rights reserved.
#
#The code and associated documentation is available from
#http://gaussianprocess.org/gpml/code.
import numpy as np
import scipy as sp
from likelihood import likelihood
from ..util.linalg import mdot, jitchol, pddet, dpotrs
from functools import partial as partial_func
import warnings
class Laplace(likelihood):
"""Laplace approximation to a posterior"""
def __init__(self, data, noise_model, extra_data=None):
"""
Laplace Approximation
Find the moments \hat{f} and the hessian at this point
(using Newton-Raphson) of the unnormalised posterior
Compute the GP variables (i.e. generate some Y^{squiggle} and
z^{squiggle} which makes a gaussian the same as the laplace
approximation to the posterior, but normalised
Arguments
---------
:param data: array of data the likelihood function is approximating
:type data: NxD
:param noise_model: likelihood function - subclass of noise_model
:type noise_model: noise_model
:param extra_data: additional data used by some likelihood functions,
"""
self.data = data
self.noise_model = noise_model
self.extra_data = extra_data
#Inital values
self.N, self.D = self.data.shape
self.is_heteroscedastic = True
self.Nparams = 0
self.NORMAL_CONST = ((0.5 * self.N) * np.log(2 * np.pi))
self.restart()
likelihood.__init__(self)
def restart(self):
"""
Reset likelihood variables to their defaults
"""
#Initial values for the GP variables
self.Y = np.zeros((self.N, 1))
self.covariance_matrix = np.eye(self.N)
self.precision = np.ones(self.N)[:, None]
self.Z = 0
self.YYT = None
self.old_Ki_f = None
self.bad_fhat = False
def predictive_values(self,mu,var,full_cov,**noise_args):
if full_cov:
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
return self.noise_model.predictive_values(mu,var,**noise_args)
def log_predictive_density(self, y_test, mu_star, var_star):
"""
Calculation of the log predictive density
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
:type mu_star: (Nx1) array
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
:type var_star: (Nx1) array
"""
return self.noise_model.log_predictive_density(y_test, mu_star, var_star)
def _get_params(self):
return np.asarray(self.noise_model._get_params())
def _get_param_names(self):
return self.noise_model._get_param_names()
def _set_params(self, p):
return self.noise_model._set_params(p)
def _shared_gradients_components(self):
d3lik_d3fhat = self.noise_model.d3logpdf_df3(self.f_hat, self.data, extra_data=self.extra_data)
dL_dfhat = 0.5*(np.diag(self.Ki_W_i)[:, None]*d3lik_d3fhat).T #why isn't this -0.5?
I_KW_i = np.eye(self.N) - np.dot(self.K, self.Wi_K_i)
return dL_dfhat, I_KW_i
def _Kgradients(self):
"""
Gradients with respect to prior kernel parameters dL_dK to be chained
with dK_dthetaK to give dL_dthetaK
:returns: dL_dK matrix
:rtype: Matrix (1 x num_kernel_params)
"""
dL_dfhat, I_KW_i = self._shared_gradients_components()
dlp = self.noise_model.dlogpdf_df(self.f_hat, self.data, extra_data=self.extra_data)
#Explicit
#expl_a = np.dot(self.Ki_f, self.Ki_f.T)
#expl_b = self.Wi_K_i
#expl = 0.5*expl_a - 0.5*expl_b
#dL_dthetaK_exp = dK_dthetaK(expl, X)
#Implicit
impl = mdot(dlp, dL_dfhat, I_KW_i)
#No longer required as we are computing these in the gp already
#otherwise we would take them away and add them back
#dL_dthetaK_imp = dK_dthetaK(impl, X)
#dL_dthetaK = dL_dthetaK_exp + dL_dthetaK_imp
#dL_dK = expl + impl
#No need to compute explicit as we are computing dZ_dK to account
#for the difference between the K gradients of a normal GP,
#and the K gradients including the implicit part
dL_dK = impl
return dL_dK
def _gradients(self, partial):
"""
Gradients with respect to likelihood parameters (dL_dthetaL)
:param partial: Not needed by this likelihood
:type partial: lambda function
:rtype: array of derivatives (1 x num_likelihood_params)
"""
dL_dfhat, I_KW_i = self._shared_gradients_components()
dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = self.noise_model._laplace_gradients(self.f_hat, self.data, extra_data=self.extra_data)
#len(dlik_dthetaL)
num_params = len(self._get_param_names())
# make space for one derivative for each likelihood parameter
dL_dthetaL = np.zeros(num_params)
for thetaL_i in range(num_params):
#Explicit
dL_dthetaL_exp = ( np.sum(dlik_dthetaL[:, thetaL_i])
#- 0.5*np.trace(mdot(self.Ki_W_i, (self.K, np.diagflat(dlik_hess_dthetaL[thetaL_i]))))
+ np.dot(0.5*np.diag(self.Ki_W_i)[:,None].T, dlik_hess_dthetaL[:, thetaL_i])
)
#Implicit
dfhat_dthetaL = mdot(I_KW_i, self.K, dlik_grad_dthetaL[:, thetaL_i])
dL_dthetaL_imp = np.dot(dL_dfhat, dfhat_dthetaL)
dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp
return dL_dthetaL
def _compute_GP_variables(self):
"""
Generate data Y which would give the normal distribution identical
to the laplace approximation to the posterior, but normalised
GPy expects a likelihood to be gaussian, so need to caluclate
the data Y^{\tilde} that makes the posterior match that found
by a laplace approximation to a non-gaussian likelihood but with
a gaussian likelihood
Firstly,
The hessian of the unormalised posterior distribution is (K^{-1} + W)^{-1},
i.e. z*N(f|f^{\hat}, (K^{-1} + W)^{-1}) but this assumes a non-gaussian likelihood,
we wish to find the hessian \Sigma^{\tilde}
that has the same curvature but using our new simulated data Y^{\tilde}
i.e. we do N(Y^{\tilde}|f^{\hat}, \Sigma^{\tilde})N(f|0, K) = z*N(f|f^{\hat}, (K^{-1} + W)^{-1})
and we wish to find what Y^{\tilde} and \Sigma^{\tilde}
We find that Y^{\tilde} = W^{-1}(K^{-1} + W)f^{\hat} and \Sigma^{tilde} = W^{-1}
Secondly,
GPy optimizes the log marginal log p(y) = -0.5*ln|K+\Sigma^{\tilde}| - 0.5*Y^{\tilde}^{T}(K^{-1} + \Sigma^{tilde})^{-1}Y + lik.Z
So we can suck up any differences between that and our log marginal likelihood approximation
p^{\squiggle}(y) = -0.5*f^{\hat}K^{-1}f^{\hat} + log p(y|f^{\hat}) - 0.5*log |K||K^{-1} + W|
which we want to optimize instead, by equating them and rearranging, the difference is added onto
the log p(y) that GPy optimizes by default
Thirdly,
Since we have gradients that depend on how we move f^{\hat}, we have implicit components
aswell as the explicit dL_dK, we hold these differences in dZ_dK and add them to dL_dK in the
gp.py code
"""
Wi = 1.0/self.W
self.Sigma_tilde = np.diagflat(Wi)
Y_tilde = Wi*self.Ki_f + self.f_hat
self.Wi_K_i = self.W12BiW12
ln_det_Wi_K = pddet(self.Sigma_tilde + self.K)
lik = self.noise_model.logpdf(self.f_hat, self.data, extra_data=self.extra_data)
y_Wi_K_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde)
Z_tilde = (+ lik
- 0.5*self.ln_B_det
+ 0.5*ln_det_Wi_K
- 0.5*self.f_Ki_f
+ 0.5*y_Wi_K_i_y
+ self.NORMAL_CONST
)
#Convert to float as its (1, 1) and Z must be a scalar
self.Z = np.float64(Z_tilde)
self.Y = Y_tilde
self.YYT = np.dot(self.Y, self.Y.T)
self.covariance_matrix = self.Sigma_tilde
self.precision = 1.0 / np.diag(self.covariance_matrix)[:, None]
#Compute dZ_dK which is how the approximated distributions gradients differ from the dL_dK computed for other likelihoods
self.dZ_dK = self._Kgradients()
#+ 0.5*self.Wi_K_i - 0.5*np.dot(self.Ki_f, self.Ki_f.T) #since we are not adding the K gradients explicit part theres no need to compute this again
def fit_full(self, K):
"""
The laplace approximation algorithm, find K and expand hessian
For nomenclature see Rasmussen & Williams 2006 - modified for numerical stability
:param K: Prior covariance matrix evaluated at locations X
:type K: NxN matrix
"""
self.K = K.copy()
#Find mode
self.f_hat = self.rasm_mode(self.K)
#Compute hessian and other variables at mode
self._compute_likelihood_variables()
#Compute fake variables replicating laplace approximation to posterior
self._compute_GP_variables()
def _compute_likelihood_variables(self):
"""
Compute the variables required to compute gaussian Y variables
"""
#At this point get the hessian matrix (or vector as W is diagonal)
self.W = -self.noise_model.d2logpdf_df2(self.f_hat, self.data, extra_data=self.extra_data)
if not self.noise_model.log_concave:
i = self.W < 1e-6
if np.any(i):
warnings.warn('truncating non log-concave likelihood curvature')
# FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
self.W[i] = 1e-6
self.W12BiW12, self.ln_B_det = self._compute_B_statistics(self.K, self.W, np.eye(self.N))
self.Ki_f = self.Ki_f
self.f_Ki_f = np.dot(self.f_hat.T, self.Ki_f)
self.Ki_W_i = self.K - mdot(self.K, self.W12BiW12, self.K)
def _compute_B_statistics(self, K, W, a):
"""
Rasmussen suggests the use of a numerically stable positive definite matrix B
Which has a positive diagonal element and can be easyily inverted
:param K: Prior Covariance matrix evaluated at locations X
:type K: NxN matrix
:param W: Negative hessian at a point (diagonal matrix)
:type W: Vector of diagonal values of hessian (1xN)
:param a: Matrix to calculate W12BiW12a
:type a: Matrix NxN
:returns: (W12BiW12a, ln_B_det)
"""
if not self.noise_model.log_concave:
#print "Under 1e-10: {}".format(np.sum(W < 1e-6))
W[W < 1e-10] = 1e-10 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
# To cause the posterior to become less certain than the prior and likelihood,
# This is a property only held by non-log-concave likelihoods
#W is diagonal so its sqrt is just the sqrt of the diagonal elements
W_12 = np.sqrt(W)
B = np.eye(self.N) + W_12*K*W_12.T
L = jitchol(B)
W12BiW12a = W_12*dpotrs(L, np.asfortranarray(W_12*a), lower=1)[0]
ln_B_det = 2*np.sum(np.log(np.diag(L)))
return W12BiW12a, ln_B_det
def rasm_mode(self, K, MAX_ITER=40):
"""
Rasmussen's numerically stable mode finding
For nomenclature see Rasmussen & Williams 2006
Influenced by GPML (BSD) code, all errors are our own
:param K: Covariance matrix evaluated at locations X
:type K: NxD matrix
:param MAX_ITER: Maximum number of iterations of newton-raphson before forcing finish of optimisation
:type MAX_ITER: scalar
:returns: f_hat, mode on which to make laplace approxmiation
:rtype: NxD matrix
"""
#old_Ki_f = np.zeros((self.N, 1))
#Start f's at zero originally of if we have gone off track, try restarting
if self.old_Ki_f is None or self.bad_fhat:
old_Ki_f = np.random.rand(self.N, 1)/50.0
#old_Ki_f = self.Y
f = np.dot(K, old_Ki_f)
else:
#Start at the old best point
old_Ki_f = self.old_Ki_f.copy()
f = self.f_hat.copy()
new_obj = -np.inf
old_obj = np.inf
def obj(Ki_f, f):
return -0.5*np.dot(Ki_f.T, f) + self.noise_model.logpdf(f, self.data, extra_data=self.extra_data)
difference = np.inf
epsilon = 1e-7
#step_size = 1
#rs = 0
i = 0
while difference > epsilon and i < MAX_ITER:
W = -self.noise_model.d2logpdf_df2(f, self.data, extra_data=self.extra_data)
W_f = W*f
grad = self.noise_model.dlogpdf_df(f, self.data, extra_data=self.extra_data)
b = W_f + grad
W12BiW12Kb, _ = self._compute_B_statistics(K, W.copy(), np.dot(K, b))
#Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
full_step_Ki_f = b - W12BiW12Kb
dKi_f = full_step_Ki_f - old_Ki_f
f_old = f.copy()
def inner_obj(step_size, old_Ki_f, dKi_f, K):
Ki_f = old_Ki_f + step_size*dKi_f
f = np.dot(K, Ki_f)
# This is nasty, need to set something within an optimization though
self.tmp_Ki_f = Ki_f.copy()
self.tmp_f = f.copy()
return -obj(Ki_f, f)
i_o = partial_func(inner_obj, old_Ki_f=old_Ki_f, dKi_f=dKi_f, K=K)
#Find the stepsize that minimizes the objective function using a brent line search
#The tolerance and maxiter matter for speed! Seems to be best to keep them low and make more full
#steps than get this exact then make a step, if B was bigger it might be the other way around though
#new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-4, options={'maxiter':5}).fun
new_obj = sp.optimize.brent(i_o, tol=1e-4, maxiter=10)
f = self.tmp_f.copy()
Ki_f = self.tmp_Ki_f.copy()
#Optimize without linesearch
#f_old = f.copy()
#update_passed = False
#while not update_passed:
#Ki_f = old_Ki_f + step_size*dKi_f
#f = np.dot(K, Ki_f)
#old_obj = new_obj
#new_obj = obj(Ki_f, f)
#difference = new_obj - old_obj
##print "difference: ",difference
#if difference < 0:
##print "Objective function rose", np.float(difference)
##If the objective function isn't rising, restart optimization
#step_size *= 0.8
##print "Reducing step-size to {ss:.3} and restarting optimization".format(ss=step_size)
##objective function isn't increasing, try reducing step size
#f = f_old.copy() #it's actually faster not to go back to old location and just zigzag across the mode
#old_obj = new_obj
#rs += 1
#else:
#update_passed = True
#old_Ki_f = self.Ki_f.copy()
#difference = abs(new_obj - old_obj)
#old_obj = new_obj.copy()
difference = np.abs(np.sum(f - f_old)) + np.abs(np.sum(Ki_f - old_Ki_f))
#difference = np.abs(np.sum(Ki_f - old_Ki_f))/np.float(self.N)
old_Ki_f = Ki_f.copy()
i += 1
self.old_Ki_f = old_Ki_f.copy()
#Warn of bad fits
if difference > epsilon:
self.bad_fhat = True
warnings.warn("Not perfect f_hat fit difference: {}".format(difference))
elif self.bad_fhat:
self.bad_fhat = False
warnings.warn("f_hat now perfect again")
self.Ki_f = Ki_f
return f

View file

@ -1,222 +0,0 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from noise_distributions import NoiseDistribution
class Bernoulli(NoiseDistribution):
"""
Bernoulli likelihood
.. math::
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
.. Note::
Y is expected to take values in {-1,1}
Probit likelihood usually used
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
super(Bernoulli, self).__init__(gp_link,analytical_mean,analytical_variance)
if isinstance(gp_link , (gp_transformations.Heaviside, gp_transformations.Probit)):
self.log_concave = True
def _preprocess_values(self,Y):
"""
Check if the values of the observations correspond to the values
assumed by the likelihood function.
..Note:: Binary classification algorithm works better with classes {-1,1}
"""
Y_prep = Y.copy()
Y1 = Y[Y.flatten()==1].size
Y2 = Y[Y.flatten()==0].size
assert Y1 + Y2 == Y.size, 'Bernoulli likelihood is meant to be used only with outputs in {0,1}.'
Y_prep[Y.flatten() == 0] = -1
return Y_prep
def _moments_match_analytical(self,data_i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
if data_i == 1:
sign = 1.
elif data_i == 0:
sign = -1
else:
raise ValueError("bad value for Bernouilli observation (0,1)")
if isinstance(self.gp_link,gp_transformations.Probit):
z = sign*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = std_norm_cdf(z)
phi = std_norm_pdf(z)
mu_hat = v_i/tau_i + sign*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
elif isinstance(self.gp_link,gp_transformations.Heaviside):
a = sign*v_i/np.sqrt(tau_i)
Z_hat = std_norm_cdf(a)
N = std_norm_pdf(a)
mu_hat = v_i/tau_i + sign*N/Z_hat/np.sqrt(tau_i)
sigma2_hat = (1. - a*N/Z_hat - np.square(N/Z_hat))/tau_i
if np.any(np.isnan([Z_hat, mu_hat, sigma2_hat])):
stop
else:
raise ValueError("Exact moment matching not available for link {}".format(self.gp_link.gp_transformations.__name__))
return Z_hat, mu_hat, sigma2_hat
def _predictive_mean_analytical(self,mu,variance):
if isinstance(self.gp_link,gp_transformations.Probit):
return stats.norm.cdf(mu/np.sqrt(1+variance))
elif isinstance(self.gp_link,gp_transformations.Heaviside):
return stats.norm.cdf(mu/np.sqrt(variance))
else:
raise NotImplementedError
def _predictive_variance_analytical(self,mu,variance, pred_mean):
if isinstance(self.gp_link,gp_transformations.Heaviside):
return 0.
else:
raise NotImplementedError
def pdf_link(self, link_f, y, extra_data=None):
"""
Likelihood function given link(f)
.. math::
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:returns: likelihood evaluated for this point
:rtype: float
.. Note:
Each y_i must be in {0,1}
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
objective = (link_f**y) * ((1.-link_f)**(1.-y))
return np.exp(np.sum(np.log(objective)))
def logpdf_link(self, link_f, y, extra_data=None):
"""
Log Likelihood function given link(f)
.. math::
\\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i})
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:returns: log likelihood evaluated at points link(f)
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
#objective = y*np.log(link_f) + (1.-y)*np.log(link_f)
objective = np.where(y==1, np.log(link_f), np.log(1-link_f))
return np.sum(objective)
def dlogpdf_dlink(self, link_f, y, extra_data=None):
"""
Gradient of the pdf at y, given link(f) w.r.t link(f)
.. math::
\\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:returns: gradient of log likelihood evaluated at points link(f)
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
grad = (y/link_f) - (1.-y)/(1-link_f)
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
"""
Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
.. math::
\\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2)
return d2logpdf_dlink2
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:returns: third derivative of log likelihood evaluated at points link(f)
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3))
return d3logpdf_dlink3
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
p = self.gp_link.transf(gp)
return p*(1.-p)
def samples(self, gp):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
ns = np.ones_like(gp, dtype=int)
Ysim = np.random.binomial(ns, self.gp_link.transf(gp))
return Ysim.reshape(orig_shape)

View file

@ -1,277 +0,0 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats, special
import scipy as sp
import gp_transformations
from noise_distributions import NoiseDistribution
from scipy import stats, integrate
from scipy.special import gammaln, gamma
class StudentT(NoiseDistribution):
"""
Student T likelihood
For nomanclature see Bayesian Data Analysis 2003 p576
.. math::
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
"""
def __init__(self,gp_link=None,analytical_mean=True,analytical_variance=True, deg_free=5, sigma2=2):
self.v = deg_free
self.sigma2 = sigma2
self._set_params(np.asarray(sigma2))
super(StudentT, self).__init__(gp_link,analytical_mean,analytical_variance)
self.log_concave = False
def _get_params(self):
return np.asarray(self.sigma2)
def _get_param_names(self):
return ["t_noise_std2"]
def _set_params(self, x):
self.sigma2 = float(x)
@property
def variance(self, extra_data=None):
return (self.v / float(self.v - 2)) * self.sigma2
def pdf_link(self, link_f, y, extra_data=None):
"""
Likelihood function given link(f)
.. math::
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
#Careful gamma(big_number) is infinity!
objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5))
/ (np.sqrt(self.v * np.pi * self.sigma2)))
* ((1 + (1./float(self.v))*((e**2)/float(self.sigma2)))**(-0.5*(self.v + 1)))
)
return np.prod(objective)
def logpdf_link(self, link_f, y, extra_data=None):
"""
Log Likelihood Function given link(f)
.. math::
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)
:param link_f: latent variables (link(f))
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
objective = (+ gammaln((self.v + 1) * 0.5)
- gammaln(self.v * 0.5)
- 0.5*np.log(self.sigma2 * self.v * np.pi)
- 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
)
return np.sum(objective)
def dlogpdf_dlink(self, link_f, y, extra_data=None):
"""
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
.. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v}
:param link_f: latent variables (f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
((e**2 + self.sigma2*self.v)**3)
)
return d3lik_dlink3
def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
"""
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
.. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
return np.sum(dlogpdf_dvar)
def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
"""
Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
.. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2}
:param link_f: latent variables link_f
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
return dlogpdf_dlink_dvar
def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None):
"""
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
.. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
/ ((self.sigma2*self.v + (e**2))**3)
)
return d2logpdf_dlink2_dvar
def dlogpdf_link_dtheta(self, f, y, extra_data=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
return np.asarray([[dlogpdf_dvar]])
def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
return dlogpdf_dlink_dvar
def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
return d2logpdf_dlink2_dvar
def _predictive_variance_analytical(self, mu, sigma, predictive_mean=None):
"""
Compute predictive variance of student_t*normal p(y*|f*)p(f*)
Need to find what the variance is at the latent points for a student t*normal p(y*|f*)p(f*)
(((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))
*((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2)))
"""
#FIXME: Not correct
#We want the variance around test points y which comes from int p(y*|f*)p(f*) df*
#Var(y*) = Var(E[y*|f*]) + E[Var(y*|f*)]
#Since we are given f* (mu) which is our mean (expected) value of y*|f* then the variance is the variance around this
#Which was also given to us as (var)
#We also need to know the expected variance of y* around samples f*, this is the variance of the student t distribution
#However the variance of the student t distribution is not dependent on f, only on sigma and the degrees of freedom
true_var = 1/(1/sigma**2 + 1/self.variance)
return true_var
def _predictive_mean_analytical(self, mu, sigma):
"""
Compute mean of the prediction
"""
#FIXME: Not correct
return mu
def samples(self, gp):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
#FIXME: Very slow as we are computing a new random variable per input!
#Can't get it to sample all at the same time
#student_t_samples = np.array([stats.t.rvs(self.v, self.gp_link.transf(gpj),scale=np.sqrt(self.sigma2), size=1) for gpj in gp])
dfs = np.ones_like(gp)*self.v
scales = np.ones_like(gp)*np.sqrt(self.sigma2)
student_t_samples = stats.t.rvs(dfs, loc=self.gp_link.transf(gp),
scale=scales)
return student_t_samples.reshape(orig_shape)

View file

@ -1,33 +0,0 @@
'''
.. module:: GPy.models
Implementations for common models used in GP regression and classification.
The different models can be viewed in :mod:`GPy.models_modules`, which holds
detailed explanations for the different models.
.. note::
This module is a convienince module for endusers to use. For developers
see :mod:`GPy.models_modules`, which holds the implementions for each model.:
.. moduleauthor:: Max Zwiessele <ibinbei@gmail.com>
'''
__updated__ = '2013-11-28'
from models_modules.bayesian_gplvm import BayesianGPLVM, BayesianGPLVMWithMissingData
from models_modules.gp_regression import GPRegression
from models_modules.gp_classification import GPClassification#; _gp_classification = gp_classification ; del gp_classification
from models_modules.sparse_gp_regression import SparseGPRegression#; _sparse_gp_regression = sparse_gp_regression ; del sparse_gp_regression
from models_modules.svigp_regression import SVIGPRegression#; _svigp_regression = svigp_regression ; del svigp_regression
from models_modules.sparse_gp_classification import SparseGPClassification#; _sparse_gp_classification = sparse_gp_classification ; del sparse_gp_classification
from models_modules.fitc_classification import FITCClassification#; _fitc_classification = fitc_classification ; del fitc_classification
from models_modules.gplvm import GPLVM#; _gplvm = gplvm ; del gplvm
from models_modules.bcgplvm import BCGPLVM#; _bcgplvm = bcgplvm; del bcgplvm
from models_modules.sparse_gplvm import SparseGPLVM#; _sparse_gplvm = sparse_gplvm ; del sparse_gplvm
from models_modules.warped_gp import WarpedGP#; _warped_gp = warped_gp ; del warped_gp
from models_modules.bayesian_gplvm import BayesianGPLVM#; _bayesian_gplvm = bayesian_gplvm ; del bayesian_gplvm
from models_modules.mrd import MRD#; _mrd = mrd; del mrd
from models_modules.gradient_checker import GradientChecker#; _gradient_checker = gradient_checker ; del gradient_checker
from models_modules.gp_multioutput_regression import GPMultioutputRegression#; _gp_multioutput_regression = gp_multioutput_regression ; del gp_multioutput_regression
from models_modules.sparse_gp_multioutput_regression import SparseGPMultioutputRegression#; _sparse_gp_multioutput_regression = sparse_gp_multioutput_regression ; del sparse_gp_multioutput_regression
from models_modules.gradient_checker import GradientChecker

View file

@ -1,19 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
# from gp_regression import GPRegression; _gp_regression = gp_regression ; del gp_regression
# from gp_classification import GPClassification; _gp_classification = gp_classification ; del gp_classification
# from sparse_gp_regression import SparseGPRegression; _sparse_gp_regression = sparse_gp_regression ; del sparse_gp_regression
# from svigp_regression import SVIGPRegression; _svigp_regression = svigp_regression ; del svigp_regression
# from sparse_gp_classification import SparseGPClassification; _sparse_gp_classification = sparse_gp_classification ; del sparse_gp_classification
# from fitc_classification import FITCClassification; _fitc_classification = fitc_classification ; del fitc_classification
# from gplvm import GPLVM; _gplvm = gplvm ; del gplvm
# from bcgplvm import BCGPLVM; _bcgplvm = bcgplvm; del bcgplvm
# from sparse_gplvm import SparseGPLVM; _sparse_gplvm = sparse_gplvm ; del sparse_gplvm
# from warped_gp import WarpedGP; _warped_gp = warped_gp ; del warped_gp
# from bayesian_gplvm import BayesianGPLVM; _bayesian_gplvm = bayesian_gplvm ; del bayesian_gplvm
# from mrd import MRD; _mrd = mrd ; del mrd
# from gradient_checker import GradientChecker; _gradient_checker = gradient_checker ; del gradient_checker
# from gp_multioutput_regression import GPMultioutputRegression; _gp_multioutput_regression = gp_multioutput_regression ; del gp_multioutput_regression
# from sparse_gp_multioutput_regression import SparseGPMultioutputRegression; _sparse_gp_multioutput_regression = sparse_gp_multioutput_regression ; del sparse_gp_multioutput_regression

View file

@ -1,234 +0,0 @@
# Copyright (c) 2012 - 2014 the GPy Austhors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from .. import kern
from ..core.sparse_gp_mpi import SparseGP_MPI
from ..likelihoods import Gaussian
from ..core.parameterization.variational import NormalPosterior, NormalPrior
from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_minibatch
import logging
class BayesianGPLVM(SparseGP_MPI):
"""
Bayesian Gaussian Process Latent Variable Model
:param Y: observed data (np.ndarray) or GPy.likelihood
:type Y: np.ndarray| GPy.likelihood instance
:param input_dim: latent dimensionality
:type input_dim: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
"""
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None,
name='bayesian gplvm', mpi_comm=None, normalizer=None,
missing_data=False, stochastic=False, batchsize=1):
self.logger = logging.getLogger(self.__class__.__name__)
if X is None:
from ..util.initialization import initialize_latent
self.logger.info("initializing latent space X with method {}".format(init))
X, fracs = initialize_latent(init, input_dim, Y)
else:
fracs = np.ones(input_dim)
self.init = init
if X_variance is None:
self.logger.info("initializing latent space variance ~ uniform(0,.1)")
X_variance = np.random.uniform(0,.1,X.shape)
if Z is None:
self.logger.info("initializing inducing inputs")
Z = np.random.permutation(X.copy())[:num_inducing]
assert Z.shape[1] == X.shape[1]
if kernel is None:
self.logger.info("initializing kernel RBF")
kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) #+ kern.Bias(input_dim) + kern.White(input_dim)
if likelihood is None:
likelihood = Gaussian()
self.variational_prior = NormalPrior()
X = NormalPosterior(X, X_variance)
if inference_method is None:
if mpi_comm is not None:
inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
else:
from ..inference.latent_function_inference.var_dtc import VarDTC
self.logger.debug("creating inference_method var_dtc")
inference_method = VarDTC(limit=1 if not missing_data else Y.shape[1])
if isinstance(inference_method,VarDTC_minibatch):
inference_method.mpi_comm = mpi_comm
super(BayesianGPLVM,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
name=name, inference_method=inference_method,
normalizer=normalizer, mpi_comm=mpi_comm,
variational_prior=self.variational_prior,
)
self.link_parameter(self.X, index=0)
def set_X_gradients(self, X, X_grad):
"""Set the gradients of the posterior distribution of X in its specific form."""
X.mean.gradient, X.variance.gradient = X_grad
def get_X_gradients(self, X):
"""Get the gradients of the posterior distribution of X in its specific form."""
return X.mean.gradient, X.variance.gradient
def parameters_changed(self):
super(BayesianGPLVM,self).parameters_changed()
if isinstance(self.inference_method, VarDTC_minibatch):
return
kl_fctr = 1.
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(
variational_posterior=self.X,
Z=self.Z,
dL_dpsi0=self.grad_dict['dL_dpsi0'],
dL_dpsi1=self.grad_dict['dL_dpsi1'],
dL_dpsi2=self.grad_dict['dL_dpsi2'])
self.variational_prior.update_gradients_KL(self.X)
#super(BayesianGPLVM, self).parameters_changed()
#self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
#self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
# This is testing code -------------------------
# i = np.random.randint(self.X.shape[0])
# X_ = self.X.mean
# which = np.sqrt(((X_ - X_[i:i+1])**2).sum(1)).argsort()>(max(0, self.X.shape[0]-51))
# _, _, grad_dict = self.inference_method.inference(self.kern, self.X[which], self.Z, self.likelihood, self.Y[which], self.Y_metadata)
# grad = self.kern.gradients_qX_expectations(variational_posterior=self.X[which], Z=self.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
#
# self.X.mean.gradient[:] = 0
# self.X.variance.gradient[:] = 0
# self.X.mean.gradient[which] = grad[0]
# self.X.variance.gradient[which] = grad[1]
# update for the KL divergence
# self.variational_prior.update_gradients_KL(self.X, which)
# -----------------------------------------------
# update for the KL divergence
#self.variational_prior.update_gradients_KL(self.X)
def plot_latent(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40,
fignum=None, plot_inducing=True, legend=True,
plot_limits=None,
aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}):
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import dim_reduction_plots
return dim_reduction_plots.plot_latent(self, labels, which_indices,
resolution, ax, marker, s,
fignum, plot_inducing, legend,
plot_limits, aspect, updates, predict_kwargs, imshow_kwargs)
def do_test_latents(self, Y):
"""
Compute the latent representation for a set of new points Y
Notes:
This will only work with a univariate Gaussian likelihood (for now)
"""
N_test = Y.shape[0]
input_dim = self.Z.shape[1]
means = np.zeros((N_test, input_dim))
covars = np.zeros((N_test, input_dim))
dpsi0 = -0.5 * self.input_dim / self.likelihood.variance
dpsi2 = self.grad_dict['dL_dpsi2'][0][None, :, :] # TODO: this may change if we ignore het. likelihoods
V = Y/self.likelihood.variance
#compute CPsi1V
#if self.Cpsi1V is None:
# psi1V = np.dot(self.psi1.T, self.likelihood.V)
# tmp, _ = linalg.dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0)
# tmp, _ = linalg.dpotrs(self.LB, tmp, lower=1)
# self.Cpsi1V, _ = linalg.dtrtrs(self._Lm, tmp, lower=1, trans=1)
dpsi1 = np.dot(self.posterior.woodbury_vector, V.T)
#start = np.zeros(self.input_dim * 2)
from scipy.optimize import minimize
for n, dpsi1_n in enumerate(dpsi1.T[:, :, None]):
args = (input_dim, self.kern.copy(), self.Z, dpsi0, dpsi1_n.T, dpsi2)
res = minimize(latent_cost_and_grad, jac=True, x0=np.hstack((means[n], covars[n])), args=args, method='BFGS')
xopt = res.x
mu, log_S = xopt.reshape(2, 1, -1)
means[n] = mu[0].copy()
covars[n] = np.exp(log_S[0]).copy()
X = NormalPosterior(means, covars)
return X
def dmu_dX(self, Xnew):
"""
Calculate the gradient of the prediction at Xnew w.r.t Xnew.
"""
dmu_dX = np.zeros_like(Xnew)
for i in range(self.Z.shape[0]):
dmu_dX += self.kern.gradients_X(self.grad_dict['dL_dpsi1'][i:i + 1, :], Xnew, self.Z[i:i + 1, :])
return dmu_dX
def dmu_dXnew(self, Xnew):
"""
Individual gradient of prediction at Xnew w.r.t. each sample in Xnew
"""
gradients_X = np.zeros((Xnew.shape[0], self.num_inducing))
ones = np.ones((1, 1))
for i in range(self.Z.shape[0]):
gradients_X[:, i] = self.kern.gradients_X(ones, Xnew, self.Z[i:i + 1, :]).sum(-1)
return np.dot(gradients_X, self.grad_dict['dL_dpsi1'])
def plot_steepest_gradient_map(self, *args, ** kwargs):
"""
See GPy.plotting.matplot_dep.dim_reduction_plots.plot_steepest_gradient_map
"""
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import dim_reduction_plots
return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs)
def latent_cost_and_grad(mu_S, input_dim, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
"""
objective function for fitting the latent variables for test points
(negative log-likelihood: should be minimised!)
"""
mu = mu_S[:input_dim][None]
log_S = mu_S[input_dim:][None]
S = np.exp(log_S)
X = NormalPosterior(mu, S)
psi0 = kern.psi0(Z, X)
psi1 = kern.psi1(Z, X)
psi2 = kern.psi2(Z, X)
lik = dL_dpsi0 * psi0.sum() + np.einsum('ij,kj->...', dL_dpsi1, psi1) + np.einsum('ijk,lkj->...', dL_dpsi2, psi2) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
dLdmu, dLdS = kern.gradients_qX_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, X)
dmu = dLdmu - mu
# dS = S0 + S1 + S2 -0.5 + .5/S
dlnS = S * (dLdS - 0.5) + .5
return -lik, -np.hstack((dmu.flatten(), dlnS.flatten()))

View file

@ -1,48 +0,0 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import GP
from ..models import GPLVM
from ..mappings import Kernel
class BCGPLVM(GPLVM):
"""
Back constrained Gaussian Process Latent Variable Model
:param Y: observed data
:type Y: np.ndarray
:param input_dim: latent dimensionality
:type input_dim: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
:param mapping: mapping for back constraint
:type mapping: GPy.core.Mapping object
"""
def __init__(self, Y, input_dim, init='PCA', X=None, kernel=None, normalize_Y=False, mapping=None):
if mapping is None:
mapping = Kernel(X=Y, output_dim=input_dim)
self.mapping = mapping
GPLVM.__init__(self, Y, input_dim, init, X, kernel, normalize_Y)
self.X = self.mapping.f(self.likelihood.Y)
def _get_param_names(self):
return self.mapping._get_param_names() + GP._get_param_names(self)
def _get_params(self):
return np.hstack((self.mapping._get_params(), GP._get_params(self)))
def _set_params(self, x):
self.mapping._set_params(x[:self.mapping.num_params])
self.X = self.mapping.f(self.likelihood.Y)
GP._set_params(self, x[self.mapping.num_params:])
def _log_likelihood_gradients(self):
dL_df = self.kern.gradients_X(self.dL_dK, self.X)
dL_dtheta = self.mapping.df_dtheta(dL_df, self.likelihood.Y)
return np.hstack((dL_dtheta.flatten(), GP._log_likelihood_gradients(self)))

View file

@ -1,29 +0,0 @@
# Copyright (c) 2013, the GPy Authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from ..core import GP
from .. import likelihoods
from .. import kern
from ..inference.latent_function_inference.expectation_propagation import EP
class GPClassification(GP):
"""
Gaussian Process classification
This is a thin wrapper around the models.GP class, with a set of sensible defaults
:param X: input observations
:param Y: observed values, can be None if likelihood is not None
:param kernel: a GPy kernel, defaults to rbf
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self, X, Y, kernel=None,Y_metadata=None):
if kernel is None:
kernel = kern.RBF(X.shape[1])
likelihood = likelihoods.Bernoulli()
GP.__init__(self, X=X, Y=Y, kernel=kernel, likelihood=likelihood, inference_method=EP(), name='gp_classification')

View file

@ -1,36 +0,0 @@
# Copyright (c) 2012 - 2014 the GPy Austhors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import GP
from .. import likelihoods
from .. import kern
class GPRegression(GP):
"""
Gaussian Process model for regression
This is a thin wrapper around the models.GP class, with a set of sensible defaults
:param X: input observations
:param Y: observed values
:param kernel: a GPy kernel, defaults to rbf
:param Norm normalizer: [False]
Normalize Y with the norm given.
If normalizer is False, no normalization will be done
If it is None, we use GaussianNorm(alization)
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None):
if kernel is None:
kernel = kern.RBF(X.shape[1])
likelihood = likelihoods.Gaussian()
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer)

View file

@ -1,83 +0,0 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from .. import kern
from ..core import GP, Param
from ..likelihoods import Gaussian
from .. import util
class GPLVM(GP):
"""
Gaussian Process Latent Variable Model
"""
def __init__(self, Y, input_dim, init='PCA', X=None, kernel=None, name="gplvm"):
"""
:param Y: observed data
:type Y: np.ndarray
:param input_dim: latent dimensionality
:type input_dim: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
"""
if X is None:
from ..util.initialization import initialize_latent
X, fracs = initialize_latent(init, input_dim, Y)
else:
fracs = np.ones(input_dim)
if kernel is None:
kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2))
likelihood = Gaussian()
super(GPLVM, self).__init__(X, Y, kernel, likelihood, name='GPLVM')
self.X = Param('latent_mean', X)
self.link_parameter(self.X, index=0)
def parameters_changed(self):
super(GPLVM, self).parameters_changed()
self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None)
def jacobian(self,X):
J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
for i in range(self.output_dim):
J[:,:,i] = self.kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1], X, self.X)
return J
def magnification(self,X):
target=np.zeros(X.shape[0])
#J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
J = self.jacobian(X)
for i in range(X.shape[0]):
target[i]=np.sqrt(np.linalg.det(np.dot(J[i,:,:],np.transpose(J[i,:,:]))))
return target
def plot(self):
assert self.likelihood.Y.shape[1] == 2
pb.scatter(self.likelihood.Y[:, 0], self.likelihood.Y[:, 1], 40, self.X[:, 0].copy(), linewidth=0, cmap=pb.cm.jet) # @UndefinedVariable
Xnew = np.linspace(self.X.min(), self.X.max(), 200)[:, None]
mu, _ = self.predict(Xnew)
import pylab as pb
pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
def plot_latent(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40,
fignum=None, legend=True,
plot_limits=None,
aspect='auto', updates=False, **kwargs):
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import dim_reduction_plots
return dim_reduction_plots.plot_latent(self, labels, which_indices,
resolution, ax, marker, s,
fignum, False, legend,
plot_limits, aspect, updates, **kwargs)
def plot_magnification(self, *args, **kwargs):
return util.plot_latent.plot_magnification(self, *args, **kwargs)

View file

@ -1,113 +0,0 @@
# ## Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from ..core.model import Model
import itertools
import numpy
from ..core.parameterization import Param
def get_shape(x):
if isinstance(x, numpy.ndarray):
return x.shape
return ()
def at_least_one_element(x):
if isinstance(x, (list, tuple)):
return x
return [x]
def flatten_if_needed(x):
return numpy.atleast_1d(x).flatten()
class GradientChecker(Model):
def __init__(self, f, df, x0, names=None, *args, **kwargs):
"""
:param f: Function to check gradient for
:param df: Gradient of function to check
:param x0:
Initial guess for inputs x (if it has a shape (a,b) this will be reflected in the parameter names).
Can be a list of arrays, if takes a list of arrays. This list will be passed
to f and df in the same order as given here.
If only one argument, make sure not to pass a list!!!
:type x0: [array-like] | array-like | float | int
:param names:
Names to print, when performing gradcheck. If a list was passed to x0
a list of names with the same length is expected.
:param args: Arguments passed as f(x, *args, **kwargs) and df(x, *args, **kwargs)
Examples:
---------
from GPy.models import GradientChecker
N, M, Q = 10, 5, 3
Sinusoid:
X = numpy.random.rand(N, Q)
grad = GradientChecker(numpy.sin,numpy.cos,X,'x')
grad.checkgrad(verbose=1)
Using GPy:
X, Z = numpy.random.randn(N,Q), numpy.random.randn(M,Q)
kern = GPy.kern.linear(Q, ARD=True) + GPy.kern.rbf(Q, ARD=True)
grad = GradientChecker(kern.K,
lambda x: 2*kern.dK_dX(numpy.ones((1,1)), x),
x0 = X.copy(),
names='X')
grad.checkgrad(verbose=1)
grad.randomize()
grad.checkgrad(verbose=1)
"""
Model.__init__(self, 'GradientChecker')
if isinstance(x0, (list, tuple)) and names is None:
self.shapes = [get_shape(xi) for xi in x0]
self.names = ['X{i}'.format(i=i) for i in range(len(x0))]
elif isinstance(x0, (list, tuple)) and names is not None:
self.shapes = [get_shape(xi) for xi in x0]
self.names = names
elif names is None:
self.names = ['X']
self.shapes = [get_shape(x0)]
else:
self.names = names
self.shapes = [get_shape(x0)]
for name, xi in zip(self.names, at_least_one_element(x0)):
self.__setattr__(name, Param(name, xi))
self.link_parameter(self.__getattribute__(name))
# self._param_names = []
# for name, shape in zip(self.names, self.shapes):
# self._param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
self.args = args
self.kwargs = kwargs
self.f = f
self.df = df
def _get_x(self):
if len(self.names) > 1:
return [self.__getattribute__(name) for name in self.names] + list(self.args)
return [self.__getattribute__(self.names[0])] + list(self.args)
def log_likelihood(self):
return float(numpy.sum(self.f(*self._get_x(), **self.kwargs)))
def _log_likelihood_gradients(self):
return numpy.atleast_1d(self.df(*self._get_x(), **self.kwargs)).flatten()
#def _get_params(self):
#return numpy.atleast_1d(numpy.hstack(map(lambda name: flatten_if_needed(self.__getattribute__(name)), self.names)))
#def _set_params(self, x):
#current_index = 0
#for name, shape in zip(self.names, self.shapes):
#current_size = numpy.prod(shape)
#self.__setattr__(name, x[current_index:current_index + current_size].reshape(shape))
#current_index += current_size
#def _get_param_names(self):
#_param_names = []
#for name, shape in zip(self.names, self.shapes):
#_param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
#return _param_names

View file

@ -1,341 +0,0 @@
# ## Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import itertools, logging
from ..kern import Kern
from ..core.parameterization.variational import NormalPosterior, NormalPrior
from ..core.parameterization import Param, Parameterized
from ..core.parameterization.observable_array import ObsAr
from ..inference.latent_function_inference.var_dtc import VarDTC
from ..inference.latent_function_inference import InferenceMethodList
from ..likelihoods import Gaussian
from ..util.initialization import initialize_latent
from ..core.sparse_gp import SparseGP, GP
from GPy.core.parameterization.variational import VariationalPosterior
from GPy.models.bayesian_gplvm_minibatch import BayesianGPLVMMiniBatch
from GPy.models.sparse_gp_minibatch import SparseGPMiniBatch
class MRD(BayesianGPLVMMiniBatch):
"""
!WARNING: This is bleeding edge code and still in development.
Functionality may change fundamentally during development!
Apply MRD to all given datasets Y in Ylist.
Y_i in [n x p_i]
If Ylist is a dictionary, the keys of the dictionary are the names, and the
values are the different datasets to compare.
The samples n in the datasets need
to match up, whereas the dimensionality p_d can differ.
:param [array-like] Ylist: List of datasets to apply MRD on
:param input_dim: latent dimensionality
:type input_dim: int
:param array-like X: mean of starting latent space q in [n x q]
:param array-like X_variance: variance of starting latent space q in [n x q]
:param initx: initialisation method for the latent space :
* 'concat' - PCA on concatenation of all datasets
* 'single' - Concatenation of PCA on datasets, respectively
* 'random' - Random draw from a Normal(0,1)
:type initx: ['concat'|'single'|'random']
:param initz: initialisation method for inducing inputs
:type initz: 'permute'|'random'
:param num_inducing: number of inducing inputs to use
:param Z: initial inducing inputs
:param kernel: list of kernels or kernel to copy for each output
:type kernel: [GPy.kernels.kernels] | GPy.kernels.kernels | None (default)
:param :class:`~GPy.inference.latent_function_inference inference_method:
InferenceMethodList of inferences, or one inference method for all
:param :class:`~GPy.likelihoodss.likelihoods.likelihoods` likelihoods: the likelihoods to use
:param str name: the name of this model
:param [str] Ynames: the names for the datasets given, must be of equal length as Ylist or None
:param bool|Norm normalizer: How to normalize the data?
:param bool stochastic: Should this model be using stochastic gradient descent over the dimensions?
:param bool|[bool] batchsize: either one batchsize for all, or one batchsize per dataset.
"""
def __init__(self, Ylist, input_dim, X=None, X_variance=None,
initx = 'PCA', initz = 'permute',
num_inducing=10, Z=None, kernel=None,
inference_method=None, likelihoods=None, name='mrd',
Ynames=None, normalizer=False, stochastic=False, batchsize=10):
self.logger = logging.getLogger(self.__class__.__name__)
self.input_dim = input_dim
self.num_inducing = num_inducing
if isinstance(Ylist, dict):
Ynames, Ylist = zip(*Ylist.items())
self.logger.debug("creating observable arrays")
self.Ylist = [ObsAr(Y) for Y in Ylist]
if Ynames is None:
self.logger.debug("creating Ynames")
Ynames = ['Y{}'.format(i) for i in range(len(Ylist))]
self.names = Ynames
assert len(self.names) == len(self.Ylist), "one name per dataset, or None if Ylist is a dict"
if inference_method is None:
self.inference_method = InferenceMethodList([VarDTC() for _ in xrange(len(self.Ylist))])
else:
assert isinstance(inference_method, InferenceMethodList), "please provide one inference method per Y in the list and provide it as InferenceMethodList, inference_method given: {}".format(inference_method)
self.inference_method = inference_method
if X is None:
X, fracs = self._init_X(initx, Ylist)
else:
fracs = [X.var(0)]*len(Ylist)
Z = self._init_Z(initz, X)
self.Z = Param('inducing inputs', Z)
self.num_inducing = self.Z.shape[0] # ensure M==N if M>N
# sort out the kernels
self.logger.info("building kernels")
if kernel is None:
from ..kern import RBF
kernels = [RBF(input_dim, ARD=1, lengthscale=1./fracs[i]) for i in range(len(Ylist))]
elif isinstance(kernel, Kern):
kernels = []
for i in range(len(Ylist)):
k = kernel.copy()
kernels.append(k)
else:
assert len(kernel) == len(Ylist), "need one kernel per output"
assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!"
kernels = kernel
if X_variance is None:
X_variance = np.random.uniform(0.1, 0.2, X.shape)
self.variational_prior = NormalPrior()
#self.X = NormalPosterior(X, X_variance)
if likelihoods is None:
likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
else: likelihoods = likelihoods
self.logger.info("adding X and Z")
super(MRD, self).__init__(Y, input_dim, X=X, X_variance=X_variance, num_inducing=num_inducing,
Z=self.Z, kernel=None, inference_method=self.inference_method, likelihood=Gaussian(),
name='manifold relevance determination', normalizer=None,
missing_data=False, stochastic=False, batchsize=1)
self._log_marginal_likelihood = 0
self.unlink_parameter(self.likelihood)
self.unlink_parameter(self.kern)
del self.kern
del self.likelihood
self.num_data = Ylist[0].shape[0]
if isinstance(batchsize, int):
batchsize = itertools.repeat(batchsize)
self.bgplvms = []
for i, n, k, l, Y, im, bs in itertools.izip(itertools.count(), Ynames, kernels, likelihoods, Ylist, self.inference_method, batchsize):
assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another"
md = np.isnan(Y).any()
spgp = BayesianGPLVMMiniBatch(Y, input_dim, X, X_variance,
Z=Z, kernel=k, likelihood=l,
inference_method=im, name=n,
normalizer=normalizer,
missing_data=md,
stochastic=stochastic,
batchsize=bs)
spgp.kl_factr = 1./len(Ynames)
spgp.unlink_parameter(spgp.Z)
spgp.unlink_parameter(spgp.X)
del spgp.Z
del spgp.X
spgp.Z = self.Z
spgp.X = self.X
self.link_parameter(spgp, i+2)
self.bgplvms.append(spgp)
self.posterior = None
self.logger.info("init done")
def parameters_changed(self):
self._log_marginal_likelihood = 0
self.Z.gradient[:] = 0.
self.X.gradient[:] = 0.
for b, i in itertools.izip(self.bgplvms, self.inference_method):
self._log_marginal_likelihood += b._log_marginal_likelihood
self.logger.info('working on im <{}>'.format(hex(id(i))))
self.Z.gradient[:] += b.full_values['Zgrad']
grad_dict = b.full_values
self.X.mean.gradient += grad_dict['meangrad']
self.X.variance.gradient += grad_dict['vargrad']
if isinstance(self.X, VariationalPosterior):
# update for the KL divergence
self.variational_prior.update_gradients_KL(self.X)
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
pass
def log_likelihood(self):
return self._log_marginal_likelihood
def _init_X(self, init='PCA', Ylist=None):
if Ylist is None:
Ylist = self.Ylist
if init in "PCA_concat":
X, fracs = initialize_latent('PCA', self.input_dim, np.hstack(Ylist))
fracs = [fracs]*len(Ylist)
elif init in "PCA_single":
X = np.zeros((Ylist[0].shape[0], self.input_dim))
fracs = []
for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist):
x,frcs = initialize_latent('PCA', len(qs), Y)
X[:, qs] = x
fracs.append(frcs)
else: # init == 'random':
X = np.random.randn(Ylist[0].shape[0], self.input_dim)
fracs = X.var(0)
fracs = [fracs]*len(Ylist)
X -= X.mean()
X /= X.std()
return X, fracs
def _init_Z(self, init="permute", X=None):
if X is None:
X = self.X
if init in "permute":
Z = np.random.permutation(X.copy())[:self.num_inducing]
elif init in "random":
Z = np.random.randn(self.num_inducing, self.input_dim) * X.var()
return Z
def _handle_plotting(self, fignum, axes, plotf, sharex=False, sharey=False):
import matplotlib.pyplot as plt
if axes is None:
fig = plt.figure(num=fignum)
sharex_ax = None
sharey_ax = None
plots = []
for i, g in enumerate(self.bgplvms):
try:
if sharex:
sharex_ax = ax # @UndefinedVariable
sharex = False # dont set twice
if sharey:
sharey_ax = ax # @UndefinedVariable
sharey = False # dont set twice
except:
pass
if axes is None:
ax = fig.add_subplot(1, len(self.bgplvms), i + 1, sharex=sharex_ax, sharey=sharey_ax)
elif isinstance(axes, (tuple, list, np.ndarray)):
ax = axes[i]
else:
raise ValueError("Need one axes per latent dimension input_dim")
plots.append(plotf(i, g, ax))
if sharey_ax is not None:
plt.setp(ax.get_yticklabels(), visible=False)
plt.draw()
if axes is None:
try:
fig.tight_layout()
except:
pass
return plots
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, Yindex=0):
"""
Prediction for data set Yindex[default=0].
This predicts the output mean and variance for the dataset given in Ylist[Yindex]
"""
b = self.bgplvms[Yindex]
self.posterior = b.posterior
self.kern = b.kern
self.likelihood = b.likelihood
return super(MRD, self).predict(Xnew, full_cov, Y_metadata, kern)
#===============================================================================
# TODO: Predict! Maybe even change to several bgplvms, which share an X?
#===============================================================================
# def plot_predict(self, fignum=None, ax=None, sharex=False, sharey=False, **kwargs):
# fig = self._handle_plotting(fignum,
# ax,
# lambda i, g, ax: ax.imshow(g.predict(g.X)[0], **kwargs),
# sharex=sharex, sharey=sharey)
# return fig
def plot_scales(self, fignum=None, ax=None, titles=None, sharex=False, sharey=True, *args, **kwargs):
"""
TODO: Explain other parameters
:param titles: titles for axes of datasets
"""
if titles is None:
titles = [r'${}$'.format(name) for name in self.names]
ymax = reduce(max, [np.ceil(max(g.kern.input_sensitivity())) for g in self.bgplvms])
def plotf(i, g, ax):
#ax.set_ylim([0,ymax])
return g.kern.plot_ARD(ax=ax, title=titles[i], *args, **kwargs)
fig = self._handle_plotting(fignum, ax, plotf, sharex=sharex, sharey=sharey)
return fig
def plot_latent(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40,
fignum=None, plot_inducing=True, legend=True,
plot_limits=None,
aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}):
"""
see plotting.matplot_dep.dim_reduction_plots.plot_latent
if predict_kwargs is None, will plot latent spaces for 0th dataset (and kernel), otherwise give
predict_kwargs=dict(Yindex='index') for plotting only the latent space of dataset with 'index'.
"""
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from matplotlib import pyplot as plt
from ..plotting.matplot_dep import dim_reduction_plots
if "Yindex" not in predict_kwargs:
predict_kwargs['Yindex'] = 0
Yindex = predict_kwargs['Yindex']
if ax is None:
fig = plt.figure(num=fignum)
ax = fig.add_subplot(111)
else:
fig = ax.figure
self.kern = self.bgplvms[Yindex].kern
self.likelihood = self.bgplvms[Yindex].likelihood
plot = dim_reduction_plots.plot_latent(self, labels, which_indices,
resolution, ax, marker, s,
fignum, plot_inducing, legend,
plot_limits, aspect, updates, predict_kwargs, imshow_kwargs)
ax.set_title(self.bgplvms[Yindex].name)
try:
fig.tight_layout()
except:
pass
return plot
def __getstate__(self):
state = super(MRD, self).__getstate__()
if state.has_key('kern'):
del state['kern']
if state.has_key('likelihood'):
del state['likelihood']
return state
def __setstate__(self, state):
# TODO:
super(MRD, self).__setstate__(state)
self.kern = self.bgplvms[0].kern
self.likelihood = self.bgplvms[0].likelihood
self.parameters_changed()

View file

@ -1,46 +0,0 @@
# Copyright (c) 2013, Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import SparseGP
from .. import likelihoods
from .. import kern
from ..likelihoods import likelihood
from ..inference.latent_function_inference import expectation_propagation_dtc
class SparseGPClassification(SparseGP):
"""
sparse Gaussian Process model for classification
This is a thin wrapper around the sparse_GP class, with a set of sensible defaults
:param X: input observations
:param Y: observed values
:param likelihood: a GPy likelihood, defaults to Binomial with probit link_function
:param kernel: a GPy kernel, defaults to rbf+white
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
:rtype: model object
"""
#def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10):
def __init__(self, X, Y=None, likelihood=None, kernel=None, Z=None, num_inducing=10, Y_metadata=None):
if kernel is None:
kernel = kern.RBF(X.shape[1])
likelihood = likelihoods.Bernoulli()
if Z is None:
i = np.random.permutation(X.shape[0])[:num_inducing]
Z = X[i].copy()
else:
assert Z.shape[1] == X.shape[1]
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=expectation_propagation_dtc.EPDTC(), name='SparseGPClassification',Y_metadata=Y_metadata)
#def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp', Y_metadata=None):

View file

@ -1,109 +0,0 @@
# Copyright (c) 2012, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import SparseGP
from ..core.sparse_gp_mpi import SparseGP_MPI
from .. import likelihoods
from .. import kern
from ..inference.latent_function_inference import VarDTC
from ..core.parameterization.variational import NormalPosterior
from GPy.inference.latent_function_inference.var_dtc_parallel import VarDTC_minibatch
class SparseGPRegression(SparseGP_MPI):
"""
Gaussian Process model for regression
This is a thin wrapper around the SparseGP class, with a set of sensible defalts
:param X: input observations
:param Y: observed values
:param kernel: a GPy kernel, defaults to rbf+white
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (num_inducing x input_dim) | None
:param num_inducing: number of inducing points (ignored if Z is passed, see note)
:type num_inducing: int
:rtype: model object
.. Note:: If no Z array is passed, num_inducing (default 10) points are selected from the data. Other wise num_inducing is ignored
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, X_variance=None, normalizer=None, mpi_comm=None):
num_data, input_dim = X.shape
# kern defaults to rbf (plus white for stability)
if kernel is None:
kernel = kern.RBF(input_dim)# + kern.white(input_dim, variance=1e-3)
# Z defaults to a subset of the data
if Z is None:
i = np.random.permutation(num_data)[:min(num_inducing, num_data)]
Z = X.view(np.ndarray)[i].copy()
else:
assert Z.shape[1] == input_dim
likelihood = likelihoods.Gaussian()
if not (X_variance is None):
X = NormalPosterior(X,X_variance)
if mpi_comm is not None:
from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_minibatch
infr = VarDTC_minibatch(mpi_comm=mpi_comm)
else:
infr = VarDTC()
SparseGP_MPI.__init__(self, X, Y, Z, kernel, likelihood, inference_method=infr, normalizer=normalizer, mpi_comm=mpi_comm)
def parameters_changed(self):
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients_sparsegp,VarDTC_minibatch
if isinstance(self.inference_method,VarDTC_minibatch):
update_gradients_sparsegp(self, mpi_comm=self.mpi_comm)
else:
super(SparseGPRegression, self).parameters_changed()
class SparseGPRegressionUncertainInput(SparseGP):
"""
Gaussian Process model for regression with Gaussian variance on the inputs (X_variance)
This is a thin wrapper around the SparseGP class, with a set of sensible defalts
"""
def __init__(self, X, X_variance, Y, kernel=None, Z=None, num_inducing=10, normalizer=None):
"""
:param X: input observations
:type X: np.ndarray (num_data x input_dim)
:param X_variance: The uncertainty in the measurements of X (Gaussian variance, optional)
:type X_variance: np.ndarray (num_data x input_dim)
:param Y: observed values
:param kernel: a GPy kernel, defaults to rbf+white
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (num_inducing x input_dim) | None
:param num_inducing: number of inducing points (ignored if Z is passed, see note)
:type num_inducing: int
:rtype: model object
.. Note:: If no Z array is passed, num_inducing (default 10) points are selected from the data. Other wise num_inducing is ignored
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
num_data, input_dim = X.shape
# kern defaults to rbf (plus white for stability)
if kernel is None:
kernel = kern.RBF(input_dim) + kern.White(input_dim, variance=1e-3)
# Z defaults to a subset of the data
if Z is None:
i = np.random.permutation(num_data)[:min(num_inducing, num_data)]
Z = X[i].copy()
else:
assert Z.shape[1] == input_dim
likelihood = likelihoods.Gaussian()
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, X_variance=X_variance, inference_method=VarDTC(), normalizer=normalizer)
self.ensure_default_constraints()

View file

@ -1,43 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import sys
from GPy.models.sparse_gp_regression import SparseGPRegression
class SparseGPLVM(SparseGPRegression):
"""
Sparse Gaussian Process Latent Variable Model
:param Y: observed data
:type Y: np.ndarray
:param input_dim: latent dimensionality
:type input_dim: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
"""
def __init__(self, Y, input_dim, X=None, kernel=None, init='PCA', num_inducing=10):
if X is None:
from ..util.initialization import initialize_latent
X, fracs = initialize_latent(init, input_dim, Y)
SparseGPRegression.__init__(self, X, Y, kernel=kernel, num_inducing=num_inducing)
def parameters_changed(self):
super(SparseGPLVM, self).parameters_changed()
self.X.gradient = self.kern.gradients_X_diag(self.grad_dict['dL_dKdiag'], self.X)
self.X.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'], self.X, self.Z)
def plot_latent(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40,
fignum=None, plot_inducing=True, legend=True,
plot_limits=None,
aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}):
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import dim_reduction_plots
return dim_reduction_plots.plot_latent(self, labels, which_indices,
resolution, ax, marker, s,
fignum, plot_inducing, legend,
plot_limits, aspect, updates, predict_kwargs, imshow_kwargs)

View file

@ -1,99 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..util.warping_functions import *
from ..core import GP
from .. import likelihoods
from GPy.util.warping_functions import TanhWarpingFunction_d
from GPy import kern
class WarpedGP(GP):
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3, normalize_X=False, normalize_Y=False):
if kernel is None:
kernel = kern.rbf(X.shape[1])
if warping_function == None:
self.warping_function = TanhWarpingFunction_d(warping_terms)
self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1,) * 1)
self.scale_data = False
if self.scale_data:
Y = self._scale_data(Y)
self.has_uncertain_inputs = False
self.Y_untransformed = Y.copy()
self.predict_in_warped_space = False
likelihood = likelihoods.Gaussian(self.transform_data(), normalize=normalize_Y)
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self._set_params(self._get_params())
def _scale_data(self, Y):
self._Ymax = Y.max()
self._Ymin = Y.min()
return (Y - self._Ymin) / (self._Ymax - self._Ymin) - 0.5
def _unscale_data(self, Y):
return (Y + 0.5) * (self._Ymax - self._Ymin) + self._Ymin
def _set_params(self, x):
self.warping_params = x[:self.warping_function.num_parameters]
Y = self.transform_data()
self.likelihood.set_data(Y)
GP._set_params(self, x[self.warping_function.num_parameters:].copy())
def _get_params(self):
return np.hstack((self.warping_params.flatten().copy(), GP._get_params(self).copy()))
def _get_param_names(self):
warping_names = self.warping_function._get_param_names()
param_names = GP._get_param_names(self)
return warping_names + param_names
def transform_data(self):
Y = self.warping_function.f(self.Y_untransformed.copy(), self.warping_params).copy()
return Y
def log_likelihood(self):
ll = GP.log_likelihood(self)
jacobian = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
return ll + np.log(jacobian).sum()
def _log_likelihood_gradients(self):
ll_grads = GP._log_likelihood_gradients(self)
alpha = np.dot(self.Ki, self.likelihood.Y.flatten())
warping_grads = self.warping_function_gradients(alpha)
warping_grads = np.append(warping_grads[:, :-1].flatten(), warping_grads[0, -1])
return np.hstack((warping_grads.flatten(), ll_grads.flatten()))
def warping_function_gradients(self, Kiy):
grad_y = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed, self.warping_params,
return_covar_chain=True)
djac_dpsi = ((1.0 / grad_y[:, :, None, None]) * grad_y_psi).sum(axis=0).sum(axis=0)
dquad_dpsi = (Kiy[:, None, None, None] * grad_psi).sum(axis=0).sum(axis=0)
return -dquad_dpsi + djac_dpsi
def plot_warping(self):
self.warping_function.plot(self.warping_params, self.Y_untransformed.min(), self.Y_untransformed.max())
def predict(self, Xnew, which_parts='all', full_cov=False, pred_init=None):
# normalize X values
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
mu, var = GP._raw_predict(self, Xnew, full_cov=full_cov, which_parts=which_parts)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
if self.predict_in_warped_space:
mean = self.warping_function.f_inv(mean, self.warping_params, y=pred_init)
var = self.warping_function.f_inv(var, self.warping_params)
if self.scale_data:
mean = self._unscale_data(mean)
return mean, var, _025pm, _975pm

View file

@ -2,6 +2,9 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import matplotlib as mpl
import pylab as pb
import sys
#sys.path.append('/home/james/mlprojects/sitran_cluster/')
#from switch_pylab_backend import *
@ -81,7 +84,6 @@ def reset():
lightList.append(lightList.pop(0))
def setLightFigures():
import matplotlib as mpl
mpl.rcParams['axes.edgecolor']=colorsHex['Aluminium6']
mpl.rcParams['axes.facecolor']=colorsHex['Aluminium2']
mpl.rcParams['axes.labelcolor']=colorsHex['Aluminium6']
@ -95,7 +97,6 @@ def setLightFigures():
mpl.rcParams['ytick.color']=colorsHex['Aluminium6']
def setDarkFigures():
import matplotlib as mpl
mpl.rcParams['axes.edgecolor']=colorsHex['Aluminium2']
mpl.rcParams['axes.facecolor']=colorsHex['Aluminium6']
mpl.rcParams['axes.labelcolor']=colorsHex['Aluminium2']
@ -156,10 +157,10 @@ cdict_Alu = {'red' :((0./5,colorsRGB['Aluminium1'][0]/256.,colorsRGB['Aluminium1
(5./5,colorsRGB['Aluminium6'][2]/256.,colorsRGB['Aluminium6'][2]/256.))}
# cmap_Alu = mpl.colors.LinearSegmentedColormap('TangoAluminium',cdict_Alu,256)
# cmap_BGR = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_BGR,256)
# cmap_RB = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_RB,256)
if __name__=='__main__':
import matplotlib.pyplot as pb, numpy as np
import pylab as pb
pb.figure()
cmap_RB = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_RB,256)
pb.pcolor(np.random.rand(10,10),cmap=cmap_RB)
pb.pcolor(pb.rand(10,10),cmap=cmap_RB)
pb.colorbar()
pb.show()

View file

@ -1,134 +0,0 @@
import json
neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/'
sam_url = 'http://www.cs.nyu.edu/~roweis/data/'
cmu_url = 'http://mocap.cs.cmu.edu/subjects/'
data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'],
'files' : [['ankurDataPoseSilhouette.mat']],
'license' : None,
'citation' : """3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.""",
'details' : """Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing."""},
'boston_housing' : {'urls' : ['http://archive.ics.uci.edu/ml/machine-learning-databases/housing/'],
'files' : [['Index', 'housing.data', 'housing.names']],
'citation' : """Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.""",
'details' : """The Boston Housing data relates house values in Boston to a range of input variables.""",
'license' : None,
'size' : 51276
},
'brendan_faces' : {'urls' : [sam_url],
'files': [['frey_rawface.mat']],
'citation' : 'Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.',
'details' : """A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.""",
'license': None,
'size' : 1100584},
'cmu_mocap_full' : {'urls' : ['http://mocap.cs.cmu.edu'],
'files' : [['allasfamc.zip']],
'citation' : """Please include this in your acknowledgements: The data used in this project was obtained from mocap.cs.cmu.edu.'
'The database was created with funding from NSF EIA-0196217.""",
'details' : """CMU Motion Capture data base. Captured by a Vicon motion capture system consisting of 12 infrared MX-40 cameras, each of which is capable of recording at 120 Hz with images of 4 megapixel resolution. Motions are captured in a working volume of approximately 3m x 8m. The capture subject wears 41 markers and a stylish black garment.""",
'license' : """From http://mocap.cs.cmu.edu. This data is free for use in research projects. You may include this data in commercially-sold products, but you may not resell this data directly, even in converted form. If you publish results obtained using this data, we would appreciate it if you would send the citation to your published paper to jkh+mocap@cs.cmu.edu, and also would add this text to your acknowledgments section: The data used in this project was obtained from mocap.cs.cmu.edu. The database was created with funding from NSF EIA-0196217.""",
'size' : None},
'creep_rupture' : {'urls' : ['http://www.msm.cam.ac.uk/map/data/tar/'],
'files' : [['creeprupt.tar']],
'citation' : 'Materials Algorithms Project Data Library: MAP_DATA_CREEP_RUPTURE. F. Brun and T. Yoshida.',
'details' : """Provides 2066 creep rupture test results of steels (mainly of two kinds of steels: 2.25Cr and 9-12 wt% Cr ferritic steels). See http://www.msm.cam.ac.uk/map/data/materials/creeprupt-b.html.""",
'license' : None,
'size' : 602797},
'della_gatta' : {'urls' : [neil_url + 'della_gatta/'],
'files': [['DellaGattadata.mat']],
'citation' : 'Direct targets of the TRP63 transcription factor revealed by a combination of gene expression profiling and reverse engineering. Giusy Della Gatta, Mukesh Bansal, Alberto Ambesi-Impiombato, Dario Antonini, Caterina Missero, and Diego di Bernardo, Genome Research 2008',
'details': "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA.",
'license':None,
'size':3729650},
'epomeo_gpx' : {'urls' : [neil_url + 'epomeo_gpx/'],
'files': [['endomondo_1.gpx', 'endomondo_2.gpx', 'garmin_watch_via_endomondo.gpx','viewranger_phone.gpx','viewranger_tablet.gpx']],
'citation' : '',
'details': "Five different GPS traces of the same run up Mount Epomeo in Ischia. The traces are from different sources. endomondo_1 and endomondo_2 are traces from the mobile phone app Endomondo, with a split in the middle. garmin_watch_via_endomondo is the trace from a Garmin watch, with a segment missing about 4 kilometers in. viewranger_phone and viewranger_tablet are traces from a phone and a tablet through the viewranger app. The viewranger_phone data comes from the same mobile phone as the Endomondo data (i.e. there are 3 GPS devices, but one device recorded two traces).",
'license':None,
'size': 2031872},
'three_phase_oil_flow': {'urls' : [neil_url + 'three_phase_oil_flow/'],
'files' : [['DataTrnLbls.txt', 'DataTrn.txt', 'DataTst.txt', 'DataTstLbls.txt', 'DataVdn.txt', 'DataVdnLbls.txt']],
'citation' : 'Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593',
'details' : """The three phase oil data used initially for demonstrating the Generative Topographic mapping.""",
'license' : None,
'size' : 712796},
'rogers_girolami_data' : {'urls' : ['https://www.dropbox.com/sh/7p6tu1t29idgliq/_XqlH_3nt9/'],
'files' : [['firstcoursemldata.tar.gz']],
'suffices' : [['?dl=1']],
'citation' : 'A First Course in Machine Learning. Simon Rogers and Mark Girolami: Chapman & Hall/CRC, ISBN-13: 978-1439824146',
'details' : """Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.""",
'license' : None,
'size' : 21949154},
'olivetti_faces' : {'urls' : [neil_url + 'olivetti_faces/', sam_url],
'files' : [['att_faces.zip'], ['olivettifaces.mat']],
'citation' : 'Ferdinando Samaria and Andy Harter, Parameterisation of a Stochastic Model for Human Face Identification. Proceedings of 2nd IEEE Workshop on Applications of Computer Vision, Sarasota FL, December 1994',
'details' : """Olivetti Research Labs Face data base, acquired between December 1992 and December 1994 in the Olivetti Research Lab, Cambridge (which later became AT&T Laboratories, Cambridge). When using these images please give credit to AT&T Laboratories, Cambridge. """,
'license': None,
'size' : 8561331},
'olympic_marathon_men' : {'urls' : [neil_url + 'olympic_marathon_men/'],
'files' : [['olympicMarathonTimes.csv']],
'citation' : None,
'details' : """Olympic mens' marathon gold medal winning times from 1896 to 2012. Time given in pace (minutes per kilometer). Data is originally downloaded and collated from Wikipedia, we are not responsible for errors in the data""",
'license': None,
'size' : 584},
'osu_run1' : {'urls': ['http://accad.osu.edu/research/mocap/data/', neil_url + 'stick/'],
'files': [['run1TXT.ZIP'],['connections.txt']],
'details' : "Motion capture data of a stick man running from the Open Motion Data Project at Ohio State University.",
'citation' : 'The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.',
'license' : 'Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).',
'size': 338103},
'osu_accad' : {'urls': ['http://accad.osu.edu/research/mocap/data/', neil_url + 'stick/'],
'files': [['swagger1TXT.ZIP','handspring1TXT.ZIP','quickwalkTXT.ZIP','run1TXT.ZIP','sprintTXT.ZIP','dogwalkTXT.ZIP','camper_04TXT.ZIP','dance_KB3_TXT.ZIP','per20_TXT.ZIP','perTWO07_TXT.ZIP','perTWO13_TXT.ZIP','perTWO14_TXT.ZIP','perTWO15_TXT.ZIP','perTWO16_TXT.ZIP'],['connections.txt']],
'details' : "Motion capture data of different motions from the Open Motion Data Project at Ohio State University.",
'citation' : 'The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.',
'license' : 'Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).',
'size': 15922790},
'pumadyn-32nm' : {'urls' : ['ftp://ftp.cs.toronto.edu/pub/neuron/delve/data/tarfiles/pumadyn-family/'],
'files' : [['pumadyn-32nm.tar.gz']],
'details' : """Pumadyn non linear 32 input data set with moderate noise. See http://www.cs.utoronto.ca/~delve/data/pumadyn/desc.html for details.""",
'citation' : """Created by Zoubin Ghahramani using the Matlab Robotics Toolbox of Peter Corke. Corke, P. I. (1996). A Robotics Toolbox for MATLAB. IEEE Robotics and Automation Magazine, 3 (1): 24-32.""",
'license' : """Data is made available by the Delve system at the University of Toronto""",
'size' : 5861646},
'robot_wireless' : {'urls' : [neil_url + 'robot_wireless/'],
'files' : [['uw-floor.txt']],
'citation' : """WiFi-SLAM using Gaussian Process Latent Variable Models by Brian Ferris, Dieter Fox and Neil Lawrence in IJCAI'07 Proceedings pages 2480-2485. Data used in A Unifying Probabilistic Perspective for Spectral Dimensionality Reduction: Insights and New Models by Neil D. Lawrence, JMLR 13 pg 1609--1638, 2012.""",
'details' : """Data created by Brian Ferris and Dieter Fox. Consists of WiFi access point strengths taken during a circuit of the Paul Allen building at the University of Washington.""",
'license' : None,
'size' : 284390},
'swiss_roll' : {'urls' : ['http://isomap.stanford.edu/'],
'files' : [['swiss_roll_data.mat']],
'details' : """Swiss roll data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.""",
'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000',
'license' : None,
'size' : 800256},
'ripley_prnn_data' : {'urls' : ['http://www.stats.ox.ac.uk/pub/PRNN/'],
'files' : [['Cushings.dat', 'README', 'crabs.dat', 'fglass.dat', 'fglass.grp', 'pima.te', 'pima.tr', 'pima.tr2', 'synth.te', 'synth.tr', 'viruses.dat', 'virus3.dat']],
'details' : """Data sets from Brian Ripley's Pattern Recognition and Neural Networks""",
'citation': """Pattern Recognition and Neural Networks by B.D. Ripley (1996) Cambridge University Press ISBN 0 521 46986 7""",
'license' : None,
'size' : 93565},
'isomap_face_data' : {'urls' : [neil_url + 'isomap_face_data/'],
'files' : [['face_data.mat']],
'details' : """Face data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.""",
'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000',
'license' : None,
'size' : 24229368},
'xw_pen' : {'urls' : [neil_url + 'xw_pen/'],
'files' : [['xw_pen_15.csv']],
'details' : """Accelerometer pen data used for robust regression by Tipping and Lawrence.""",
'citation' : 'Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005',
'license' : None,
'size' : 3410},
'hapmap3' : {'urls' : ['http://hapmap.ncbi.nlm.nih.gov/downloads/genotypes/latest_phaseIII_ncbi_b36/plink_format/'],
'files' : [['hapmap3_r2_b36_fwd.consensus.qc.poly.map.bz2', 'hapmap3_r2_b36_fwd.consensus.qc.poly.ped.bz2', 'relationships_w_pops_121708.txt']],
'details' : """HapMap Project: Single Nucleotide Polymorphism sequenced in all human populations. See http://www.nature.com/nature/journal/v426/n6968/abs/nature02168.html for details.""",
'citation': """Gibbs, Richard A., et al. "The international HapMap project." Nature 426.6968 (2003): 789-796.""",
'license' : """International HapMap Project Public Access License (http://hapmap.ncbi.nlm.nih.gov/cgi-perl/registration#licence)""",
'size' : 2*1729092237 + 62265},
}
with open('data_resources.json', 'w') as f:
print "writing data_resources"
json.dump(data_resources, f)

View file

@ -1 +0,0 @@
0.4.9

View file

@ -1,134 +0,0 @@
GPy.models_modules package
==========================
Submodules
----------
GPy.models_modules.bayesian_gplvm module
----------------------------------------
.. automodule:: GPy.models_modules.bayesian_gplvm
:members:
:undoc-members:
:show-inheritance:
GPy.models_modules.bcgplvm module
---------------------------------
.. automodule:: GPy.models_modules.bcgplvm
:members:
:undoc-members:
:show-inheritance:
GPy.models_modules.fitc_classification module
---------------------------------------------
.. automodule:: GPy.models_modules.fitc_classification
:members:
:undoc-members:
:show-inheritance:
GPy.models_modules.gp_classification module
-------------------------------------------
.. automodule:: GPy.models_modules.gp_classification
:members:
:undoc-members:
:show-inheritance:
GPy.models_modules.gp_multioutput_regression module
---------------------------------------------------
.. automodule:: GPy.models_modules.gp_multioutput_regression
:members:
:undoc-members:
:show-inheritance:
GPy.models_modules.gp_regression module
---------------------------------------
.. automodule:: GPy.models_modules.gp_regression
:members:
:undoc-members:
:show-inheritance:
GPy.models_modules.gplvm module
-------------------------------
.. automodule:: GPy.models_modules.gplvm
:members:
:undoc-members:
:show-inheritance:
GPy.models_modules.gradient_checker module
------------------------------------------
.. automodule:: GPy.models_modules.gradient_checker
:members:
:undoc-members:
:show-inheritance:
GPy.models_modules.mrd module
-----------------------------
.. automodule:: GPy.models_modules.mrd
:members:
:undoc-members:
:show-inheritance:
GPy.models_modules.sparse_gp_classification module
--------------------------------------------------
.. automodule:: GPy.models_modules.sparse_gp_classification
:members:
:undoc-members:
:show-inheritance:
GPy.models_modules.sparse_gp_multioutput_regression module
----------------------------------------------------------
.. automodule:: GPy.models_modules.sparse_gp_multioutput_regression
:members:
:undoc-members:
:show-inheritance:
GPy.models_modules.sparse_gp_regression module
----------------------------------------------
.. automodule:: GPy.models_modules.sparse_gp_regression
:members:
:undoc-members:
:show-inheritance:
GPy.models_modules.sparse_gplvm module
--------------------------------------
.. automodule:: GPy.models_modules.sparse_gplvm
:members:
:undoc-members:
:show-inheritance:
GPy.models_modules.svigp_regression module
------------------------------------------
.. automodule:: GPy.models_modules.svigp_regression
:members:
:undoc-members:
:show-inheritance:
GPy.models_modules.warped_gp module
-----------------------------------
.. automodule:: GPy.models_modules.warped_gp
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: GPy.models_modules
:members:
:undoc-members:
:show-inheritance:

View file

@ -12,24 +12,11 @@ Subpackages
GPy.kern
GPy.likelihoods
GPy.mappings
GPy.models_modules
GPy.models
GPy.plotting
GPy.testing
GPy.util
Submodules
----------
GPy.models module
-----------------
.. automodule:: GPy.models
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------

View file

@ -22,9 +22,6 @@ The code can be found on our `Github project page <https://github.com/SheffieldM
.. You may also be interested by some examples in the GPy/examples folder.
The detailed Developers Documentation is listed below
=====================================================
Contents:
.. toctree::

View file

@ -139,4 +139,4 @@ directly::
:align: center
:height: 350px
Contour plot of the mean predictor (posterior mean).
Contour plot of the best predictor (posterior mean).