mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-10 12:32:40 +02:00
Merge branch 'devel' into pickle
This commit is contained in:
commit
1e06ca2d40
66 changed files with 768 additions and 193 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -9,7 +9,6 @@
|
||||||
dist
|
dist
|
||||||
build
|
build
|
||||||
eggs
|
eggs
|
||||||
parts
|
|
||||||
bin
|
bin
|
||||||
var
|
var
|
||||||
sdist
|
sdist
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@
|
||||||
from model import *
|
from model import *
|
||||||
from parameterised import *
|
from parameterised import *
|
||||||
import priors
|
import priors
|
||||||
from GPy.core.gp import GP
|
from gp import GP
|
||||||
from GPy.core.sparse_gp import SparseGP
|
from sparse_gp import SparseGP
|
||||||
from fitc import FITC
|
from fitc import FITC
|
||||||
|
from svigp import SVIGP
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,7 @@ class GPBase(Model):
|
||||||
self._Xscale = np.ones((1, self.input_dim))
|
self._Xscale = np.ones((1, self.input_dim))
|
||||||
|
|
||||||
super(GPBase, self).__init__()
|
super(GPBase, self).__init__()
|
||||||
|
#Model.__init__(self)
|
||||||
# All leaf nodes should call self._set_params(self._get_params()) at
|
# All leaf nodes should call self._set_params(self._get_params()) at
|
||||||
# the end
|
# the end
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -81,6 +81,7 @@ class SparseGP(GPBase):
|
||||||
|
|
||||||
def _computations(self):
|
def _computations(self):
|
||||||
|
|
||||||
|
|
||||||
# factor Kmm
|
# factor Kmm
|
||||||
self.Lm = jitchol(self.Kmm)
|
self.Lm = jitchol(self.Kmm)
|
||||||
|
|
||||||
|
|
@ -107,17 +108,18 @@ class SparseGP(GPBase):
|
||||||
self.B = np.eye(self.num_inducing) + self.A
|
self.B = np.eye(self.num_inducing) + self.A
|
||||||
self.LB = jitchol(self.B)
|
self.LB = jitchol(self.B)
|
||||||
|
|
||||||
# TODO: make a switch for either first compute psi1V, or VV.T
|
#VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
|
||||||
self.psi1V = np.dot(self.psi1.T, self.likelihood.V)
|
self.psi1Vf = np.dot(self.psi1.T, self.likelihood.VVT_factor)
|
||||||
|
|
||||||
# back substutue C into psi1V
|
# back substutue C into psi1Vf
|
||||||
tmp, info1 = dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
|
tmp, info1 = dtrtrs(self.Lm, np.asfortranarray(self.psi1Vf), lower=1, trans=0)
|
||||||
self._LBi_Lmi_psi1V, _ = dtrtrs(self.LB, np.asfortranarray(tmp), lower=1, trans=0)
|
self._LBi_Lmi_psi1Vf, _ = dtrtrs(self.LB, np.asfortranarray(tmp), lower=1, trans=0)
|
||||||
tmp, info2 = dpotrs(self.LB, tmp, lower=1)
|
tmp, info2 = dpotrs(self.LB, tmp, lower=1)
|
||||||
self.Cpsi1V, info3 = dtrtrs(self.Lm, tmp, lower=1, trans=1)
|
self.Cpsi1Vf, info3 = dtrtrs(self.Lm, tmp, lower=1, trans=1)
|
||||||
|
|
||||||
# Compute dL_dKmm
|
# Compute dL_dKmm
|
||||||
tmp = tdot(self._LBi_Lmi_psi1V)
|
tmp = tdot(self._LBi_Lmi_psi1Vf)
|
||||||
|
self.data_fit = np.trace(tmp)
|
||||||
self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.output_dim * np.eye(self.num_inducing) + tmp)
|
self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.output_dim * np.eye(self.num_inducing) + tmp)
|
||||||
tmp = -0.5 * self.DBi_plus_BiPBi
|
tmp = -0.5 * self.DBi_plus_BiPBi
|
||||||
tmp += -0.5 * self.B * self.output_dim
|
tmp += -0.5 * self.B * self.output_dim
|
||||||
|
|
@ -126,7 +128,7 @@ class SparseGP(GPBase):
|
||||||
|
|
||||||
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
|
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
|
||||||
self.dL_dpsi0 = -0.5 * self.output_dim * (self.likelihood.precision * np.ones([self.num_data, 1])).flatten()
|
self.dL_dpsi0 = -0.5 * self.output_dim * (self.likelihood.precision * np.ones([self.num_data, 1])).flatten()
|
||||||
self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T).T
|
self.dL_dpsi1 = np.dot(self.likelihood.VVT_factor, self.Cpsi1Vf.T)
|
||||||
dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
|
dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
|
||||||
|
|
||||||
if self.likelihood.is_heteroscedastic:
|
if self.likelihood.is_heteroscedastic:
|
||||||
|
|
@ -156,18 +158,18 @@ class SparseGP(GPBase):
|
||||||
# likelihood is not heterscedatic
|
# likelihood is not heterscedatic
|
||||||
self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
|
self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
|
||||||
self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
|
self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
|
||||||
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
|
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - self.data_fit)
|
||||||
|
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
""" Compute the (lower bound on the) log marginal likelihood """
|
""" Compute the (lower bound on the) log marginal likelihood """
|
||||||
if self.likelihood.is_heteroscedastic:
|
if self.likelihood.is_heteroscedastic:
|
||||||
A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y)
|
A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V*self.likelihood.Y)
|
||||||
B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
|
B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
|
||||||
else:
|
else:
|
||||||
A = -0.5 * self.num_data * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
|
A = -0.5 * self.num_data * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
|
||||||
B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
|
B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
|
||||||
C = -self.output_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2))
|
C = -self.output_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2))
|
||||||
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
|
D = 0.5 * self.data_fit
|
||||||
return A + B + C + D + self.likelihood.Z
|
return A + B + C + D + self.likelihood.Z
|
||||||
|
|
||||||
def _set_params(self, p):
|
def _set_params(self, p):
|
||||||
|
|
@ -176,6 +178,7 @@ class SparseGP(GPBase):
|
||||||
self.likelihood._set_params(p[self.Z.size + self.kern.num_params:])
|
self.likelihood._set_params(p[self.Z.size + self.kern.num_params:])
|
||||||
self._compute_kernel_matrices()
|
self._compute_kernel_matrices()
|
||||||
self._computations()
|
self._computations()
|
||||||
|
self.Cpsi1V = None
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
return np.hstack([self.Z.flatten(), self.kern._get_params_transformed(), self.likelihood._get_params()])
|
return np.hstack([self.Z.flatten(), self.kern._get_params_transformed(), self.likelihood._get_params()])
|
||||||
|
|
@ -242,6 +245,14 @@ class SparseGP(GPBase):
|
||||||
symmetrify(Bi)
|
symmetrify(Bi)
|
||||||
Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.num_inducing) - Bi)
|
Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.num_inducing) - Bi)
|
||||||
|
|
||||||
|
if self.Cpsi1V is None:
|
||||||
|
psi1V = np.dot(self.psi1.T,self.likelihood.V)
|
||||||
|
tmp, _ = dtrtrs(self.Lm, np.asfortranarray(psi1V), lower=1, trans=0)
|
||||||
|
tmp, _ = dpotrs(self.LB, tmp, lower=1)
|
||||||
|
self.Cpsi1V, _ = dtrtrs(self.Lm, tmp, lower=1, trans=1)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if X_variance_new is None:
|
if X_variance_new is None:
|
||||||
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
|
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
|
||||||
mu = np.dot(Kx.T, self.Cpsi1V)
|
mu = np.dot(Kx.T, self.Cpsi1V)
|
||||||
|
|
|
||||||
466
GPy/core/svigp.py
Normal file
466
GPy/core/svigp.py
Normal file
|
|
@ -0,0 +1,466 @@
|
||||||
|
# Copyright (c) 2012, James Hensman and Nicolo' Fusi
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pylab as pb
|
||||||
|
from .. import kern
|
||||||
|
from ..util.linalg import pdinv, mdot, tdot, dpotrs, dtrtrs, jitchol, backsub_both_sides
|
||||||
|
from ..likelihoods import EP
|
||||||
|
from gp_base import GPBase
|
||||||
|
from model import Model
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
class SVIGP(GPBase):
|
||||||
|
"""
|
||||||
|
Stochastic Variational inference in a Gaussian Process
|
||||||
|
|
||||||
|
:param X: inputs
|
||||||
|
:type X: np.ndarray (N x Q)
|
||||||
|
:param Y: observed data
|
||||||
|
:type Y: np.ndarray of observations (N x D)
|
||||||
|
:param batchsize: the size of a h
|
||||||
|
|
||||||
|
Additional kwargs are used as for a sparse GP. They include
|
||||||
|
|
||||||
|
:param q_u: canonical parameters of the distribution squasehd into a 1D array
|
||||||
|
:type q_u: np.ndarray
|
||||||
|
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
|
||||||
|
:type M: int
|
||||||
|
:param kernel : the kernel/covariance function. See link kernels
|
||||||
|
:type kernel: a GPy kernel
|
||||||
|
:param Z: inducing inputs (optional, see note)
|
||||||
|
:type Z: np.ndarray (M x Q) | None
|
||||||
|
:param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
|
||||||
|
:type X_uncertainty: np.ndarray (N x Q) | None
|
||||||
|
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
|
||||||
|
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
|
||||||
|
:type M: int
|
||||||
|
:param beta: noise precision. TODO> ignore beta if doing EP
|
||||||
|
:type beta: float
|
||||||
|
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
|
||||||
|
:type normalize_(X|Y): bool
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def __init__(self, X, likelihood, kernel, Z, q_u=None, batchsize=10, X_variance=None):
|
||||||
|
GPBase.__init__(self, X, likelihood, kernel, normalize_X=False)
|
||||||
|
self.batchsize=batchsize
|
||||||
|
self.Y = self.likelihood.Y.copy()
|
||||||
|
self.Z = Z
|
||||||
|
self.num_inducing = Z.shape[0]
|
||||||
|
|
||||||
|
self.batchcounter = 0
|
||||||
|
self.epochs = 0
|
||||||
|
self.iterations = 0
|
||||||
|
|
||||||
|
self.vb_steplength = 0.05
|
||||||
|
self.param_steplength = 1e-5
|
||||||
|
self.momentum = 0.9
|
||||||
|
|
||||||
|
if X_variance is None:
|
||||||
|
self.has_uncertain_inputs = False
|
||||||
|
else:
|
||||||
|
self.has_uncertain_inputs = True
|
||||||
|
self.X_variance = X_variance
|
||||||
|
|
||||||
|
|
||||||
|
if q_u is None:
|
||||||
|
q_u = np.hstack((np.random.randn(self.num_inducing*self.output_dim),-.5*np.eye(self.num_inducing).flatten()))
|
||||||
|
self.set_vb_param(q_u)
|
||||||
|
|
||||||
|
self._permutation = np.random.permutation(self.num_data)
|
||||||
|
self.load_batch()
|
||||||
|
|
||||||
|
self._param_trace = []
|
||||||
|
self._ll_trace = []
|
||||||
|
self._grad_trace = []
|
||||||
|
|
||||||
|
#set the adaptive steplength parameters
|
||||||
|
self.hbar_t = 0.0
|
||||||
|
self.tau_t = 100.0
|
||||||
|
self.gbar_t = 0.0
|
||||||
|
self.gbar_t1 = 0.0
|
||||||
|
self.gbar_t2 = 0.0
|
||||||
|
self.hbar_tp = 0.0
|
||||||
|
self.tau_tp = 10000.0
|
||||||
|
self.gbar_tp = 0.0
|
||||||
|
self.adapt_param_steplength = True
|
||||||
|
self.adapt_vb_steplength = True
|
||||||
|
self._param_steplength_trace = []
|
||||||
|
self._vb_steplength_trace = []
|
||||||
|
|
||||||
|
def _compute_kernel_matrices(self):
|
||||||
|
# kernel computations, using BGPLVM notation
|
||||||
|
self.Kmm = self.kern.K(self.Z)
|
||||||
|
if self.has_uncertain_inputs:
|
||||||
|
self.psi0 = self.kern.psi0(self.Z, self.X_batch, self.X_variance_batch)
|
||||||
|
self.psi1 = self.kern.psi1(self.Z, self.X_batch, self.X_variance_batch)
|
||||||
|
self.psi2 = self.kern.psi2(self.Z, self.X_batch, self.X_variance_batch)
|
||||||
|
else:
|
||||||
|
self.psi0 = self.kern.Kdiag(self.X_batch)
|
||||||
|
self.psi1 = self.kern.K(self.X_batch, self.Z)
|
||||||
|
self.psi2 = None
|
||||||
|
|
||||||
|
def dL_dtheta(self):
|
||||||
|
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
|
||||||
|
if self.has_uncertain_inputs:
|
||||||
|
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X_batch, self.X_variance_batch)
|
||||||
|
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X_batch, self.X_variance_batch)
|
||||||
|
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X_batch, self.X_variance_batch)
|
||||||
|
else:
|
||||||
|
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.X_batch, self.Z)
|
||||||
|
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X_batch)
|
||||||
|
return dL_dtheta
|
||||||
|
|
||||||
|
def _set_params(self, p, computations=True):
|
||||||
|
self.kern._set_params_transformed(p[:self.kern.num_params])
|
||||||
|
self.likelihood._set_params(p[self.kern.num_params:])
|
||||||
|
if computations:
|
||||||
|
self._compute_kernel_matrices()
|
||||||
|
self._computations()
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
return np.hstack((self.kern._get_params_transformed() , self.likelihood._get_params()))
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
|
||||||
|
|
||||||
|
def load_batch(self):
|
||||||
|
"""
|
||||||
|
load a batch of data (set self.X_batch and self.likelihood.Y from self.X, self.Y)
|
||||||
|
"""
|
||||||
|
|
||||||
|
#if we've seen all the data, start again with them in a new random order
|
||||||
|
if self.batchcounter+self.batchsize > self.num_data:
|
||||||
|
self.batchcounter = 0
|
||||||
|
self.epochs += 1
|
||||||
|
self._permutation = np.random.permutation(self.num_data)
|
||||||
|
|
||||||
|
this_perm = self._permutation[self.batchcounter:self.batchcounter+self.batchsize]
|
||||||
|
|
||||||
|
self.X_batch = self.X[this_perm]
|
||||||
|
self.likelihood.set_data(self.Y[this_perm])
|
||||||
|
if self.has_uncertain_inputs:
|
||||||
|
self.X_variance_batch = self.X_variance[this_perm]
|
||||||
|
|
||||||
|
self.batchcounter += self.batchsize
|
||||||
|
|
||||||
|
self.data_prop = float(self.batchsize)/self.num_data
|
||||||
|
|
||||||
|
self._compute_kernel_matrices()
|
||||||
|
self._computations()
|
||||||
|
|
||||||
|
def _computations(self,do_Kmm=True, do_Kmm_grad=True):
|
||||||
|
"""
|
||||||
|
All of the computations needed. Some are optional, see kwargs.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if do_Kmm:
|
||||||
|
self.Lm = jitchol(self.Kmm)
|
||||||
|
|
||||||
|
# The rather complex computations of self.A
|
||||||
|
if self.has_uncertain_inputs:
|
||||||
|
if self.likelihood.is_heteroscedastic:
|
||||||
|
psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.batchsize, 1, 1))).sum(0)
|
||||||
|
else:
|
||||||
|
psi2_beta = self.psi2.sum(0) * self.likelihood.precision
|
||||||
|
evals, evecs = linalg.eigh(psi2_beta)
|
||||||
|
clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
|
||||||
|
tmp = evecs * np.sqrt(clipped_evals)
|
||||||
|
else:
|
||||||
|
if self.likelihood.is_heteroscedastic:
|
||||||
|
tmp = self.psi1.T * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.batchsize)))
|
||||||
|
else:
|
||||||
|
tmp = self.psi1.T * (np.sqrt(self.likelihood.precision))
|
||||||
|
tmp, _ = dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
|
||||||
|
self.A = tdot(tmp)
|
||||||
|
|
||||||
|
self.V = self.likelihood.precision*self.likelihood.Y
|
||||||
|
self.VmT = np.dot(self.V,self.q_u_expectation[0].T)
|
||||||
|
self.psi1V = np.dot(self.psi1.T, self.V)
|
||||||
|
|
||||||
|
self.B = np.eye(self.num_inducing)*self.data_prop + self.A
|
||||||
|
self.Lambda = backsub_both_sides(self.Lm, self.B.T)
|
||||||
|
self.LQL = backsub_both_sides(self.Lm,self.q_u_expectation[1].T,transpose='right')
|
||||||
|
|
||||||
|
self.trace_K = self.psi0.sum() - np.trace(self.A)/self.likelihood.precision
|
||||||
|
self.Kmmi_m, _ = dpotrs(self.Lm, self.q_u_expectation[0], lower=1)
|
||||||
|
self.projected_mean = np.dot(self.psi1,self.Kmmi_m)
|
||||||
|
|
||||||
|
# Compute dL_dpsi
|
||||||
|
self.dL_dpsi0 = - 0.5 * self.output_dim * self.likelihood.precision * np.ones(self.batchsize)
|
||||||
|
self.dL_dpsi1, _ = dpotrs(self.Lm,np.asfortranarray(self.VmT.T),lower=1)
|
||||||
|
self.dL_dpsi1 = self.dL_dpsi1.T
|
||||||
|
|
||||||
|
dL_dpsi2 = -0.5 * self.likelihood.precision * backsub_both_sides(self.Lm, self.LQL - self.output_dim * np.eye(self.num_inducing))
|
||||||
|
if self.has_uncertain_inputs:
|
||||||
|
self.dL_dpsi2 = np.repeat(dL_dpsi2[None,:,:],self.batchsize,axis=0)
|
||||||
|
else:
|
||||||
|
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2,self.psi1.T).T
|
||||||
|
self.dL_dpsi2 = None
|
||||||
|
|
||||||
|
# Compute dL_dKmm
|
||||||
|
if do_Kmm_grad:
|
||||||
|
tmp = np.dot(self.LQL,self.A) - backsub_both_sides(self.Lm,np.dot(self.q_u_expectation[0],self.psi1V.T),transpose='right')
|
||||||
|
tmp += tmp.T
|
||||||
|
tmp += -self.output_dim*self.B
|
||||||
|
tmp += self.data_prop*self.LQL
|
||||||
|
self.dL_dKmm = 0.5*backsub_both_sides(self.Lm,tmp)
|
||||||
|
|
||||||
|
#Compute the gradient of the log likelihood wrt noise variance
|
||||||
|
self.partial_for_likelihood = -0.5*(self.batchsize*self.output_dim - np.sum(self.A*self.LQL))*self.likelihood.precision
|
||||||
|
self.partial_for_likelihood += (0.5*self.output_dim*self.trace_K + 0.5 * self.likelihood.trYYT - np.sum(self.likelihood.Y*self.projected_mean))*self.likelihood.precision**2
|
||||||
|
|
||||||
|
|
||||||
|
def log_likelihood(self):
|
||||||
|
"""
|
||||||
|
As for uncollapsed sparse GP, but account for the proportion of data we're looking at right now.
|
||||||
|
|
||||||
|
NB. self.batchsize is the size of the batch, not the size of X_all
|
||||||
|
"""
|
||||||
|
assert not self.likelihood.is_heteroscedastic
|
||||||
|
A = -0.5*self.batchsize*self.output_dim*(np.log(2.*np.pi) - np.log(self.likelihood.precision))
|
||||||
|
B = -0.5*self.likelihood.precision*self.output_dim*self.trace_K
|
||||||
|
Kmm_logdet = 2.*np.sum(np.log(np.diag(self.Lm)))
|
||||||
|
C = -0.5*self.output_dim*self.data_prop*(Kmm_logdet-self.q_u_logdet - self.num_inducing)
|
||||||
|
C += -0.5*np.sum(self.LQL * self.B)
|
||||||
|
D = -0.5*self.likelihood.precision*self.likelihood.trYYT
|
||||||
|
E = np.sum(self.V*self.projected_mean)
|
||||||
|
return (A+B+C+D+E)/self.data_prop
|
||||||
|
|
||||||
|
def _log_likelihood_gradients(self):
|
||||||
|
return np.hstack((self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))/self.data_prop
|
||||||
|
|
||||||
|
def vb_grad_natgrad(self):
|
||||||
|
"""
|
||||||
|
Compute the gradients of the lower bound wrt the canonical and
|
||||||
|
Expectation parameters of u.
|
||||||
|
|
||||||
|
Note that the natural gradient in either is given by the gradient in the other (See Hensman et al 2012 Fast Variational inference in the conjugate exponential Family)
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Gradient for eta
|
||||||
|
dL_dmmT_S = -0.5*self.Lambda/self.data_prop + 0.5*self.q_u_prec
|
||||||
|
Kmmipsi1V,_ = dpotrs(self.Lm,self.psi1V,lower=1)
|
||||||
|
dL_dm = (Kmmipsi1V - np.dot(self.Lambda,self.q_u_mean))/self.data_prop
|
||||||
|
|
||||||
|
# Gradients for theta
|
||||||
|
S = self.q_u_cov
|
||||||
|
Si = self.q_u_prec
|
||||||
|
m = self.q_u_mean
|
||||||
|
dL_dSi = -mdot(S,dL_dmmT_S, S)
|
||||||
|
|
||||||
|
dL_dmhSi = -2*dL_dSi
|
||||||
|
dL_dSim = np.dot(dL_dSi,m) + np.dot(Si, dL_dm)
|
||||||
|
|
||||||
|
return np.hstack((dL_dm.flatten(),dL_dmmT_S.flatten())) , np.hstack((dL_dSim.flatten(), dL_dmhSi.flatten()))
|
||||||
|
|
||||||
|
|
||||||
|
def optimize(self, iterations, print_interval=10, callback=lambda:None, callback_interval=5):
|
||||||
|
|
||||||
|
param_step = 0.
|
||||||
|
|
||||||
|
#Iterate!
|
||||||
|
for i in range(iterations):
|
||||||
|
|
||||||
|
#store the current configuration for plotting later
|
||||||
|
self._param_trace.append(self._get_params())
|
||||||
|
self._ll_trace.append(self.log_likelihood() + self.log_prior())
|
||||||
|
|
||||||
|
#load a batch
|
||||||
|
self.load_batch()
|
||||||
|
|
||||||
|
#compute the (stochastic) gradient
|
||||||
|
natgrads = self.vb_grad_natgrad()
|
||||||
|
grads = self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
|
||||||
|
self._grad_trace.append(grads)
|
||||||
|
|
||||||
|
#compute the steps in all parameters
|
||||||
|
vb_step = self.vb_steplength*natgrads[0]
|
||||||
|
if (self.epochs>=1):#only move the parameters after the first epoch
|
||||||
|
param_step = self.momentum*param_step + self.param_steplength*grads
|
||||||
|
else:
|
||||||
|
param_step = 0.
|
||||||
|
|
||||||
|
self.set_vb_param(self.get_vb_param() + vb_step)
|
||||||
|
#Note: don't recompute everything here, wait until the next iteration when we have a new batch
|
||||||
|
self._set_params(self._untransform_params(self._get_params_transformed() + param_step), computations=False)
|
||||||
|
|
||||||
|
#print messages if desired
|
||||||
|
if i and (not i%print_interval):
|
||||||
|
print i, np.mean(self._ll_trace[-print_interval:]) #, self.log_likelihood()
|
||||||
|
print np.round(np.mean(self._grad_trace[-print_interval:],0),3)
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
#callback
|
||||||
|
if i and not i%callback_interval:
|
||||||
|
callback()
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
if self.epochs > 10:
|
||||||
|
self._adapt_steplength()
|
||||||
|
|
||||||
|
self.iterations += 1
|
||||||
|
|
||||||
|
|
||||||
|
def _adapt_steplength(self):
|
||||||
|
if self.adapt_vb_steplength:
|
||||||
|
# self._adaptive_vb_steplength()
|
||||||
|
self._adaptive_vb_steplength_KL()
|
||||||
|
self._vb_steplength_trace.append(self.vb_steplength)
|
||||||
|
assert self.vb_steplength > 0
|
||||||
|
|
||||||
|
if self.adapt_param_steplength:
|
||||||
|
# self._adaptive_param_steplength()
|
||||||
|
# self._adaptive_param_steplength_log()
|
||||||
|
self._adaptive_param_steplength_from_vb()
|
||||||
|
self._param_steplength_trace.append(self.param_steplength)
|
||||||
|
|
||||||
|
def _adaptive_param_steplength(self):
|
||||||
|
decr_factor = 0.1
|
||||||
|
g_tp = self._transform_gradients(self._log_likelihood_gradients())
|
||||||
|
self.gbar_tp = (1-1/self.tau_tp)*self.gbar_tp + 1/self.tau_tp * g_tp
|
||||||
|
self.hbar_tp = (1-1/self.tau_tp)*self.hbar_tp + 1/self.tau_tp * np.dot(g_tp.T, g_tp)
|
||||||
|
new_param_steplength = np.dot(self.gbar_tp.T, self.gbar_tp) / self.hbar_tp
|
||||||
|
#- hack
|
||||||
|
new_param_steplength *= decr_factor
|
||||||
|
self.param_steplength = (self.param_steplength + new_param_steplength)/2
|
||||||
|
#-
|
||||||
|
self.tau_tp = self.tau_tp*(1-self.param_steplength) + 1
|
||||||
|
|
||||||
|
def _adaptive_param_steplength_log(self):
|
||||||
|
stp = np.logspace(np.log(0.0001), np.log(1e-6), base=np.e, num=18000)
|
||||||
|
self.param_steplength = stp[self.iterations]
|
||||||
|
|
||||||
|
def _adaptive_param_steplength_log2(self):
|
||||||
|
self.param_steplength = (self.iterations + 0.001)**-0.5
|
||||||
|
|
||||||
|
def _adaptive_param_steplength_from_vb(self):
|
||||||
|
self.param_steplength = self.vb_steplength * 0.01
|
||||||
|
|
||||||
|
def _adaptive_vb_steplength(self):
|
||||||
|
decr_factor = 0.1
|
||||||
|
g_t = self.vb_grad_natgrad()[0]
|
||||||
|
self.gbar_t = (1-1/self.tau_t)*self.gbar_t + 1/self.tau_t * g_t
|
||||||
|
self.hbar_t = (1-1/self.tau_t)*self.hbar_t + 1/self.tau_t * np.dot(g_t.T, g_t)
|
||||||
|
new_vb_steplength = np.dot(self.gbar_t.T, self.gbar_t) / self.hbar_t
|
||||||
|
#- hack
|
||||||
|
new_vb_steplength *= decr_factor
|
||||||
|
self.vb_steplength = (self.vb_steplength + new_vb_steplength)/2
|
||||||
|
#-
|
||||||
|
self.tau_t = self.tau_t*(1-self.vb_steplength) + 1
|
||||||
|
|
||||||
|
def _adaptive_vb_steplength_KL(self):
|
||||||
|
decr_factor = 1 #0.1
|
||||||
|
natgrad = self.vb_grad_natgrad()
|
||||||
|
g_t1 = natgrad[0]
|
||||||
|
g_t2 = natgrad[1]
|
||||||
|
self.gbar_t1 = (1-1/self.tau_t)*self.gbar_t1 + 1/self.tau_t * g_t1
|
||||||
|
self.gbar_t2 = (1-1/self.tau_t)*self.gbar_t2 + 1/self.tau_t * g_t2
|
||||||
|
self.hbar_t = (1-1/self.tau_t)*self.hbar_t + 1/self.tau_t * np.dot(g_t1.T, g_t2)
|
||||||
|
self.vb_steplength = np.dot(self.gbar_t1.T, self.gbar_t2) / self.hbar_t
|
||||||
|
self.vb_steplength *= decr_factor
|
||||||
|
self.tau_t = self.tau_t*(1-self.vb_steplength) + 1
|
||||||
|
|
||||||
|
def _raw_predict(self, X_new, X_variance_new=None, which_parts='all',full_cov=False):
|
||||||
|
"""Internal helper function for making predictions, does not account for normalization"""
|
||||||
|
|
||||||
|
#TODO: make this more efficient!
|
||||||
|
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
|
||||||
|
tmp = self.Kmmi- mdot(self.Kmmi,self.q_u_cov,self.Kmmi)
|
||||||
|
|
||||||
|
if X_variance_new is None:
|
||||||
|
Kx = self.kern.K(X_new,self.Z)
|
||||||
|
mu = np.dot(Kx,self.Kmmi_m)
|
||||||
|
if full_cov:
|
||||||
|
Kxx = self.kern.K(X_new)
|
||||||
|
var = Kxx - mdot(Kx,tmp,Kx.T)
|
||||||
|
else:
|
||||||
|
Kxx = self.kern.Kdiag(X_new)
|
||||||
|
var = (Kxx - np.sum(Kx*np.dot(Kx,tmp),1))[:,None]
|
||||||
|
return mu, var
|
||||||
|
else:
|
||||||
|
assert X_variance_new.shape == X_new.shape
|
||||||
|
Kx = self.kern.psi1(self.Z,X_new, X_variance_new)
|
||||||
|
mu = np.dot(Kx,self.Kmmi_m)
|
||||||
|
Kxx = self.kern.psi0(self.Z,X_new,X_variance_new)
|
||||||
|
psi2 = self.kern.psi2(self.Z,X_new,X_variance_new)
|
||||||
|
diag_var = Kxx - np.sum(np.sum(psi2*tmp[None,:,:],1),1)
|
||||||
|
if full_cov:
|
||||||
|
raise NotImplementedError
|
||||||
|
else:
|
||||||
|
return mu, diag_var[:,None]
|
||||||
|
|
||||||
|
def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
|
||||||
|
# normalize X values
|
||||||
|
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
|
||||||
|
if X_variance_new is not None:
|
||||||
|
X_variance_new = X_variance_new / self._Xscale ** 2
|
||||||
|
|
||||||
|
# here's the actual prediction by the GP model
|
||||||
|
mu, var = self._raw_predict(Xnew, X_variance_new, full_cov=full_cov, which_parts=which_parts)
|
||||||
|
|
||||||
|
# now push through likelihood
|
||||||
|
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
|
||||||
|
|
||||||
|
return mean, var, _025pm, _975pm
|
||||||
|
|
||||||
|
|
||||||
|
def set_vb_param(self,vb_param):
|
||||||
|
"""set the distribution q(u) from the canonical parameters"""
|
||||||
|
self.q_u_canonical_flat = vb_param.copy()
|
||||||
|
self.q_u_canonical = self.q_u_canonical_flat[:self.num_inducing*self.output_dim].reshape(self.num_inducing,self.output_dim),self.q_u_canonical_flat[self.num_inducing*self.output_dim:].reshape(self.num_inducing,self.num_inducing)
|
||||||
|
|
||||||
|
self.q_u_prec = -2.*self.q_u_canonical[1]
|
||||||
|
self.q_u_cov, q_u_Li, q_u_L, tmp = pdinv(self.q_u_prec)
|
||||||
|
self.q_u_Li = q_u_Li
|
||||||
|
self.q_u_logdet = -tmp
|
||||||
|
self.q_u_mean, _ = dpotrs(q_u_Li, np.asfortranarray(self.q_u_canonical[0]),lower=1)
|
||||||
|
|
||||||
|
self.q_u_expectation = (self.q_u_mean, np.dot(self.q_u_mean,self.q_u_mean.T)+self.q_u_cov*self.output_dim)
|
||||||
|
|
||||||
|
|
||||||
|
def get_vb_param(self):
|
||||||
|
"""
|
||||||
|
Return the canonical parameters of the distribution q(u)
|
||||||
|
"""
|
||||||
|
return self.q_u_canonical_flat
|
||||||
|
|
||||||
|
|
||||||
|
def plot(self, ax=None, fignum=None, Z_height=None, **kwargs):
|
||||||
|
|
||||||
|
if ax is None:
|
||||||
|
fig = pb.figure(num=fignum)
|
||||||
|
ax = fig.add_subplot(111)
|
||||||
|
|
||||||
|
#horrible hack here:
|
||||||
|
data = self.likelihood.data.copy()
|
||||||
|
self.likelihood.data = self.Y
|
||||||
|
GPBase.plot(self, ax=ax, **kwargs)
|
||||||
|
self.likelihood.data = data
|
||||||
|
|
||||||
|
Zu = self.Z * self._Xscale + self._Xoffset
|
||||||
|
if self.input_dim==1:
|
||||||
|
ax.plot(self.X_batch, self.likelihood.data, 'gx',mew=2)
|
||||||
|
if Z_height is None:
|
||||||
|
Z_height = ax.get_ylim()[0]
|
||||||
|
ax.plot(Zu, np.zeros_like(Zu) + Z_height, 'r|', mew=1.5, markersize=12)
|
||||||
|
|
||||||
|
if self.input_dim==2:
|
||||||
|
ax.scatter(self.X_all[:,0], self.X_all[:,1], 20., self.Y[:,0], linewidth=0, cmap=pb.cm.jet)
|
||||||
|
ax.plot(Zu[:,0], Zu[:,1], 'w^')
|
||||||
|
|
||||||
|
def plot_traces(self):
|
||||||
|
pb.figure()
|
||||||
|
t = np.array(self._param_trace)
|
||||||
|
pb.subplot(2,1,1)
|
||||||
|
for l,ti in zip(self._get_param_names(),t.T):
|
||||||
|
if not l[:3]=='iip':
|
||||||
|
pb.plot(ti,label=l)
|
||||||
|
pb.legend(loc=0)
|
||||||
|
|
||||||
|
pb.subplot(2,1,2)
|
||||||
|
pb.plot(np.asarray(self._ll_trace),label='stochastic likelihood')
|
||||||
|
pb.legend(loc=0)
|
||||||
|
|
@ -5,3 +5,4 @@ import classification
|
||||||
import regression
|
import regression
|
||||||
import dimensionality_reduction
|
import dimensionality_reduction
|
||||||
import tutorials
|
import tutorials
|
||||||
|
import stochastic
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,6 @@ def crescent_data(seed=default_seed): # FIXME
|
||||||
Y[Y.flatten()==-1] = 0
|
Y[Y.flatten()==-1] = 0
|
||||||
|
|
||||||
m = GPy.models.GPClassification(data['X'], Y)
|
m = GPy.models.GPClassification(data['X'], Y)
|
||||||
m.ensure_default_constraints()
|
|
||||||
#m.update_likelihood_approximation()
|
#m.update_likelihood_approximation()
|
||||||
#m.optimize()
|
#m.optimize()
|
||||||
m.pseudo_EM()
|
m.pseudo_EM()
|
||||||
|
|
@ -75,7 +74,6 @@ def toy_linear_1d_classification(seed=default_seed):
|
||||||
|
|
||||||
# Model definition
|
# Model definition
|
||||||
m = GPy.models.GPClassification(data['X'], Y)
|
m = GPy.models.GPClassification(data['X'], Y)
|
||||||
m.ensure_default_constraints()
|
|
||||||
|
|
||||||
# Optimize
|
# Optimize
|
||||||
#m.update_likelihood_approximation()
|
#m.update_likelihood_approximation()
|
||||||
|
|
@ -106,7 +104,6 @@ def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed):
|
||||||
m = GPy.models.SparseGPClassification(data['X'], Y,num_inducing=num_inducing)
|
m = GPy.models.SparseGPClassification(data['X'], Y,num_inducing=num_inducing)
|
||||||
m['.*len']= 4.
|
m['.*len']= 4.
|
||||||
|
|
||||||
m.ensure_default_constraints()
|
|
||||||
# Optimize
|
# Optimize
|
||||||
#m.update_likelihood_approximation()
|
#m.update_likelihood_approximation()
|
||||||
# Parameters optimization:
|
# Parameters optimization:
|
||||||
|
|
@ -137,7 +134,6 @@ def sparse_crescent_data(num_inducing=10, seed=default_seed):
|
||||||
Y[Y.flatten()==-1]=0
|
Y[Y.flatten()==-1]=0
|
||||||
|
|
||||||
m = GPy.models.SparseGPClassification(data['X'], Y,num_inducing=num_inducing)
|
m = GPy.models.SparseGPClassification(data['X'], Y,num_inducing=num_inducing)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m['.*len'] = 10.
|
m['.*len'] = 10.
|
||||||
#m.update_likelihood_approximation()
|
#m.update_likelihood_approximation()
|
||||||
#m.optimize()
|
#m.optimize()
|
||||||
|
|
@ -163,7 +159,6 @@ def FITC_crescent_data(num_inducing=10, seed=default_seed):
|
||||||
|
|
||||||
m = GPy.models.FITCClassification(data['X'], Y,num_inducing=num_inducing)
|
m = GPy.models.FITCClassification(data['X'], Y,num_inducing=num_inducing)
|
||||||
m.constrain_bounded('.*len',1.,1e3)
|
m.constrain_bounded('.*len',1.,1e3)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m['.*len'] = 3.
|
m['.*len'] = 3.
|
||||||
#m.update_likelihood_approximation()
|
#m.update_likelihood_approximation()
|
||||||
#m.optimize()
|
#m.optimize()
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,6 @@ def BGPLVM(seed=default_seed):
|
||||||
# m.optimize(messages = 1)
|
# m.optimize(messages = 1)
|
||||||
# m.plot()
|
# m.plot()
|
||||||
# pb.title('After optimisation')
|
# pb.title('After optimisation')
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.randomize()
|
m.randomize()
|
||||||
m.checkgrad(verbose=1)
|
m.checkgrad(verbose=1)
|
||||||
|
|
||||||
|
|
@ -53,7 +52,6 @@ def GPLVM_oil_100(optimize=True):
|
||||||
m.data_labels = data['Y'].argmax(axis=1)
|
m.data_labels = data['Y'].argmax(axis=1)
|
||||||
|
|
||||||
# optimize
|
# optimize
|
||||||
m.ensure_default_constraints()
|
|
||||||
if optimize:
|
if optimize:
|
||||||
m.optimize('scg', messages=1)
|
m.optimize('scg', messages=1)
|
||||||
|
|
||||||
|
|
@ -108,7 +106,6 @@ def swiss_roll(optimize=True, N=1000, num_inducing=15, Q=4, sigma=.2, plot=False
|
||||||
m.data_colors = c
|
m.data_colors = c
|
||||||
m.data_t = t
|
m.data_t = t
|
||||||
|
|
||||||
m.ensure_default_constraints()
|
|
||||||
m['rbf_lengthscale'] = 1. # X.var(0).max() / X.var(0)
|
m['rbf_lengthscale'] = 1. # X.var(0).max() / X.var(0)
|
||||||
m['noise_variance'] = Y.var() / 100.
|
m['noise_variance'] = Y.var() / 100.
|
||||||
m['bias_variance'] = 0.05
|
m['bias_variance'] = 0.05
|
||||||
|
|
@ -134,7 +131,6 @@ def BGPLVM_oil(optimize=True, N=200, Q=10, num_inducing=15, max_f_eval=50, plot=
|
||||||
m['.*lengt'] = 1. # m.X.var(0).max() / m.X.var(0)
|
m['.*lengt'] = 1. # m.X.var(0).max() / m.X.var(0)
|
||||||
m['noise'] = Yn.var() / 100.
|
m['noise'] = Yn.var() / 100.
|
||||||
|
|
||||||
m.ensure_default_constraints()
|
|
||||||
|
|
||||||
# optimize
|
# optimize
|
||||||
if optimize:
|
if optimize:
|
||||||
|
|
@ -159,7 +155,6 @@ def oil_100():
|
||||||
m = GPy.models.GPLVM(data['X'], 2)
|
m = GPy.models.GPLVM(data['X'], 2)
|
||||||
|
|
||||||
# optimize
|
# optimize
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.optimize(messages=1, max_iters=2)
|
m.optimize(messages=1, max_iters=2)
|
||||||
|
|
||||||
# plot
|
# plot
|
||||||
|
|
@ -239,7 +234,6 @@ def bgplvm_simulation_matlab_compare():
|
||||||
# X=mu,
|
# X=mu,
|
||||||
# X_variance=S,
|
# X_variance=S,
|
||||||
_debug=False)
|
_debug=False)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.auto_scale_factor = True
|
m.auto_scale_factor = True
|
||||||
m['noise'] = Y.var() / 100.
|
m['noise'] = Y.var() / 100.
|
||||||
m['linear_variance'] = .01
|
m['linear_variance'] = .01
|
||||||
|
|
@ -261,8 +255,8 @@ def bgplvm_simulation(optimize='scg',
|
||||||
|
|
||||||
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q)
|
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q)
|
||||||
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k, _debug=True)
|
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k, _debug=True)
|
||||||
|
|
||||||
# m.constrain('variance|noise', logexp_clipped())
|
# m.constrain('variance|noise', logexp_clipped())
|
||||||
m.ensure_default_constraints()
|
|
||||||
m['noise'] = Y.var() / 100.
|
m['noise'] = Y.var() / 100.
|
||||||
m['linear_variance'] = .01
|
m['linear_variance'] = .01
|
||||||
|
|
||||||
|
|
@ -291,7 +285,6 @@ def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw):
|
||||||
for i, Y in enumerate(Ylist):
|
for i, Y in enumerate(Ylist):
|
||||||
m['{}_noise'.format(i + 1)] = Y.var() / 100.
|
m['{}_noise'.format(i + 1)] = Y.var() / 100.
|
||||||
|
|
||||||
m.ensure_default_constraints()
|
|
||||||
|
|
||||||
# DEBUG
|
# DEBUG
|
||||||
# np.seterr("raise")
|
# np.seterr("raise")
|
||||||
|
|
@ -319,7 +312,6 @@ def brendan_faces():
|
||||||
# optimize
|
# optimize
|
||||||
m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped())
|
m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped())
|
||||||
|
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.optimize('scg', messages=1, max_f_eval=10000)
|
m.optimize('scg', messages=1, max_f_eval=10000)
|
||||||
|
|
||||||
ax = m.plot_latent(which_indices=(0, 1))
|
ax = m.plot_latent(which_indices=(0, 1))
|
||||||
|
|
@ -327,28 +319,55 @@ def brendan_faces():
|
||||||
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
|
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
|
||||||
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
lvm_visualizer.close()
|
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
def stick_play(range=None, frame_rate=15):
|
||||||
|
data = GPy.util.datasets.stick()
|
||||||
|
# optimize
|
||||||
|
if range==None:
|
||||||
|
Y = data['Y'].copy()
|
||||||
|
else:
|
||||||
|
Y = data['Y'][range[0]:range[1], :].copy()
|
||||||
|
y = Y[0, :]
|
||||||
|
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
|
||||||
|
GPy.util.visualize.data_play(Y, data_show, frame_rate)
|
||||||
|
return Y
|
||||||
|
|
||||||
def stick():
|
def stick():
|
||||||
data = GPy.util.datasets.stick()
|
data = GPy.util.datasets.stick()
|
||||||
m = GPy.models.GPLVM(data['Y'], 2)
|
|
||||||
|
|
||||||
# optimize
|
# optimize
|
||||||
m.ensure_default_constraints()
|
m = GPy.models.GPLVM(data['Y'], 2)
|
||||||
m.optimize(messages=1, max_f_eval=10000)
|
m.optimize(messages=1, max_f_eval=10000)
|
||||||
m._set_params(m._get_params())
|
m._set_params(m._get_params())
|
||||||
|
plt.clf
|
||||||
ax = m.plot_latent()
|
ax = m.plot_latent()
|
||||||
y = m.likelihood.Y[0, :]
|
y = m.likelihood.Y[0, :]
|
||||||
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
|
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
|
||||||
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
lvm_visualizer.close()
|
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
||||||
|
def stick_bgplvm(model=None):
|
||||||
|
data = GPy.util.datasets.stick()
|
||||||
|
Q = 6
|
||||||
|
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2))
|
||||||
|
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20,kernel=kernel)
|
||||||
|
# optimize
|
||||||
|
m.ensure_default_constraints()
|
||||||
|
m.optimize(messages=1, max_f_eval=3000,xtol=1e-300,ftol=1e-300)
|
||||||
|
m._set_params(m._get_params())
|
||||||
|
plt.clf, (latent_axes, sense_axes) = plt.subplots(1, 2)
|
||||||
|
plt.sca(latent_axes)
|
||||||
|
m.plot_latent()
|
||||||
|
y = m.likelihood.Y[0, :].copy()
|
||||||
|
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
|
||||||
|
lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
|
||||||
|
raw_input('Press enter to finish')
|
||||||
|
|
||||||
|
return m
|
||||||
|
|
||||||
|
|
||||||
def cmu_mocap(subject='35', motion=['01'], in_place=True):
|
def cmu_mocap(subject='35', motion=['01'], in_place=True):
|
||||||
|
|
||||||
data = GPy.util.datasets.cmu_mocap(subject, motion)
|
data = GPy.util.datasets.cmu_mocap(subject, motion)
|
||||||
|
|
@ -359,7 +378,6 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True):
|
||||||
m = GPy.models.GPLVM(data['Y'], 2, normalize_Y=True)
|
m = GPy.models.GPLVM(data['Y'], 2, normalize_Y=True)
|
||||||
|
|
||||||
# optimize
|
# optimize
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.optimize(messages=1, max_f_eval=10000)
|
m.optimize(messages=1, max_f_eval=10000)
|
||||||
|
|
||||||
ax = m.plot_latent()
|
ax = m.plot_latent()
|
||||||
|
|
@ -391,7 +409,6 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True):
|
||||||
# m.set('iip', Z)
|
# m.set('iip', Z)
|
||||||
# m.set('bias', 1e-4)
|
# m.set('bias', 1e-4)
|
||||||
# # optimize
|
# # optimize
|
||||||
# # m.ensure_default_constraints()
|
|
||||||
#
|
#
|
||||||
# import pdb; pdb.set_trace()
|
# import pdb; pdb.set_trace()
|
||||||
# m.optimize('tnc', messages=1)
|
# m.optimize('tnc', messages=1)
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,6 @@ def toy_rbf_1d(optimizer='tnc', max_nb_eval_optim=100):
|
||||||
m = GPy.models.GPRegression(data['X'],data['Y'])
|
m = GPy.models.GPRegression(data['X'],data['Y'])
|
||||||
|
|
||||||
# optimize
|
# optimize
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.optimize(optimizer, max_f_eval=max_nb_eval_optim)
|
m.optimize(optimizer, max_f_eval=max_nb_eval_optim)
|
||||||
# plot
|
# plot
|
||||||
m.plot()
|
m.plot()
|
||||||
|
|
@ -36,7 +35,6 @@ def rogers_girolami_olympics(optim_iters=100):
|
||||||
m['rbf_lengthscale'] = 10
|
m['rbf_lengthscale'] = 10
|
||||||
|
|
||||||
# optimize
|
# optimize
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.optimize(max_f_eval=optim_iters)
|
m.optimize(max_f_eval=optim_iters)
|
||||||
|
|
||||||
# plot
|
# plot
|
||||||
|
|
@ -52,7 +50,6 @@ def toy_rbf_1d_50(optim_iters=100):
|
||||||
m = GPy.models.GPRegression(data['X'],data['Y'])
|
m = GPy.models.GPRegression(data['X'],data['Y'])
|
||||||
|
|
||||||
# optimize
|
# optimize
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.optimize(max_f_eval=optim_iters)
|
m.optimize(max_f_eval=optim_iters)
|
||||||
|
|
||||||
# plot
|
# plot
|
||||||
|
|
@ -68,7 +65,6 @@ def silhouette(optim_iters=100):
|
||||||
m = GPy.models.GPRegression(data['X'],data['Y'])
|
m = GPy.models.GPRegression(data['X'],data['Y'])
|
||||||
|
|
||||||
# optimize
|
# optimize
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.optimize(messages=True,max_f_eval=optim_iters)
|
m.optimize(messages=True,max_f_eval=optim_iters)
|
||||||
|
|
||||||
print(m)
|
print(m)
|
||||||
|
|
@ -92,7 +88,6 @@ def coregionalisation_toy2(optim_iters=100):
|
||||||
m = GPy.models.GPRegression(X,Y,kernel=k)
|
m = GPy.models.GPRegression(X,Y,kernel=k)
|
||||||
m.constrain_fixed('.*rbf_var',1.)
|
m.constrain_fixed('.*rbf_var',1.)
|
||||||
#m.constrain_positive('.*kappa')
|
#m.constrain_positive('.*kappa')
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.optimize('sim',messages=1,max_f_eval=optim_iters)
|
m.optimize('sim',messages=1,max_f_eval=optim_iters)
|
||||||
|
|
||||||
pb.figure()
|
pb.figure()
|
||||||
|
|
@ -124,7 +119,6 @@ def coregionalisation_toy(optim_iters=100):
|
||||||
m = GPy.models.GPRegression(X,Y,kernel=k)
|
m = GPy.models.GPRegression(X,Y,kernel=k)
|
||||||
m.constrain_fixed('.*rbf_var',1.)
|
m.constrain_fixed('.*rbf_var',1.)
|
||||||
#m.constrain_positive('kappa')
|
#m.constrain_positive('kappa')
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.optimize(max_f_eval=optim_iters)
|
m.optimize(max_f_eval=optim_iters)
|
||||||
|
|
||||||
pb.figure()
|
pb.figure()
|
||||||
|
|
@ -162,7 +156,6 @@ def coregionalisation_sparse(optim_iters=100):
|
||||||
m.constrain_fixed('.*rbf_var',1.)
|
m.constrain_fixed('.*rbf_var',1.)
|
||||||
m.constrain_fixed('iip')
|
m.constrain_fixed('iip')
|
||||||
m.constrain_bounded('noise_variance',1e-3,1e-1)
|
m.constrain_bounded('noise_variance',1e-3,1e-1)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.optimize_restarts(5, robust=True, messages=1, max_f_eval=optim_iters)
|
m.optimize_restarts(5, robust=True, messages=1, max_f_eval=optim_iters)
|
||||||
|
|
||||||
#plotting:
|
#plotting:
|
||||||
|
|
@ -189,11 +182,9 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
|
||||||
log_SNRs = np.linspace(-3., 4., resolution)
|
log_SNRs = np.linspace(-3., 4., resolution)
|
||||||
|
|
||||||
data = GPy.util.datasets.della_gatta_TRP63_gene_expression(gene_number)
|
data = GPy.util.datasets.della_gatta_TRP63_gene_expression(gene_number)
|
||||||
# Sub sample the data to ensure multiple optima
|
|
||||||
#data['Y'] = data['Y'][0::2, :]
|
#data['Y'] = data['Y'][0::2, :]
|
||||||
#data['X'] = data['X'][0::2, :]
|
#data['X'] = data['X'][0::2, :]
|
||||||
|
|
||||||
# Remove the mean (no bias kernel to ensure signal/noise is in RBF/white)
|
|
||||||
data['Y'] = data['Y'] - np.mean(data['Y'])
|
data['Y'] = data['Y'] - np.mean(data['Y'])
|
||||||
|
|
||||||
lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf)
|
lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf)
|
||||||
|
|
@ -220,7 +211,6 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
|
||||||
optim_point_y[0] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']);
|
optim_point_y[0] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']);
|
||||||
|
|
||||||
# optimize
|
# optimize
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.optimize('scg', xtol=1e-6, ftol=1e-6, max_f_eval=optim_iters)
|
m.optimize('scg', xtol=1e-6, ftol=1e-6, max_f_eval=optim_iters)
|
||||||
|
|
||||||
optim_point_x[1] = m['rbf_lengthscale']
|
optim_point_x[1] = m['rbf_lengthscale']
|
||||||
|
|
@ -273,7 +263,6 @@ def sparse_GP_regression_1D(N = 400, num_inducing = 5, optim_iters=100):
|
||||||
# create simple GP Model
|
# create simple GP Model
|
||||||
m = GPy.models.SparseGPRegression(X, Y, kernel, num_inducing=num_inducing)
|
m = GPy.models.SparseGPRegression(X, Y, kernel, num_inducing=num_inducing)
|
||||||
|
|
||||||
m.ensure_default_constraints()
|
|
||||||
|
|
||||||
m.checkgrad(verbose=1)
|
m.checkgrad(verbose=1)
|
||||||
m.optimize('tnc', messages = 1, max_f_eval=optim_iters)
|
m.optimize('tnc', messages = 1, max_f_eval=optim_iters)
|
||||||
|
|
@ -294,7 +283,6 @@ def sparse_GP_regression_2D(N = 400, num_inducing = 50, optim_iters=100):
|
||||||
m = GPy.models.SparseGPRegression(X,Y,kernel, num_inducing = num_inducing)
|
m = GPy.models.SparseGPRegression(X,Y,kernel, num_inducing = num_inducing)
|
||||||
|
|
||||||
# contrain all parameters to be positive (but not inducing inputs)
|
# contrain all parameters to be positive (but not inducing inputs)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.set('.*len',2.)
|
m.set('.*len',2.)
|
||||||
|
|
||||||
m.checkgrad()
|
m.checkgrad()
|
||||||
|
|
@ -320,7 +308,6 @@ def uncertain_inputs_sparse_regression(optim_iters=100):
|
||||||
|
|
||||||
# create simple GP Model - no input uncertainty on this one
|
# create simple GP Model - no input uncertainty on this one
|
||||||
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z)
|
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.optimize('scg', messages=1, max_f_eval=optim_iters)
|
m.optimize('scg', messages=1, max_f_eval=optim_iters)
|
||||||
m.plot(ax=axes[0])
|
m.plot(ax=axes[0])
|
||||||
axes[0].set_title('no input uncertainty')
|
axes[0].set_title('no input uncertainty')
|
||||||
|
|
@ -328,7 +315,6 @@ def uncertain_inputs_sparse_regression(optim_iters=100):
|
||||||
|
|
||||||
#the same Model with uncertainty
|
#the same Model with uncertainty
|
||||||
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z, X_variance=S)
|
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z, X_variance=S)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.optimize('scg', messages=1, max_f_eval=optim_iters)
|
m.optimize('scg', messages=1, max_f_eval=optim_iters)
|
||||||
m.plot(ax=axes[1])
|
m.plot(ax=axes[1])
|
||||||
axes[1].set_title('with input uncertainty')
|
axes[1].set_title('with input uncertainty')
|
||||||
|
|
|
||||||
40
GPy/examples/stochastic.py
Normal file
40
GPy/examples/stochastic.py
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import pylab as pb
|
||||||
|
import numpy as np
|
||||||
|
import GPy
|
||||||
|
|
||||||
|
def toy_1d():
|
||||||
|
N = 2000
|
||||||
|
M = 20
|
||||||
|
|
||||||
|
#create data
|
||||||
|
X = np.linspace(0,32,N)[:,None]
|
||||||
|
Z = np.linspace(0,32,M)[:,None]
|
||||||
|
Y = np.sin(X) + np.cos(0.3*X) + np.random.randn(*X.shape)/np.sqrt(50.)
|
||||||
|
|
||||||
|
m = GPy.models.SVIGPRegression(X,Y, batchsize=10, Z=Z)
|
||||||
|
m.constrain_bounded('noise_variance',1e-3,1e-1)
|
||||||
|
|
||||||
|
m.param_steplength = 1e-4
|
||||||
|
|
||||||
|
fig = pb.figure()
|
||||||
|
ax = fig.add_subplot(111)
|
||||||
|
def cb():
|
||||||
|
ax.cla()
|
||||||
|
m.plot(ax=ax,Z_height=-3)
|
||||||
|
ax.set_ylim(-3,3)
|
||||||
|
fig.canvas.draw()
|
||||||
|
|
||||||
|
m.optimize(500, callback=cb, callback_interval=1)
|
||||||
|
|
||||||
|
m.plot_traces()
|
||||||
|
return m
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -24,7 +24,6 @@ def tuto_GP_regression():
|
||||||
print m
|
print m
|
||||||
m.plot()
|
m.plot()
|
||||||
|
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.constrain_positive('')
|
m.constrain_positive('')
|
||||||
|
|
||||||
m.unconstrain('') # may be used to remove the previous constrains
|
m.unconstrain('') # may be used to remove the previous constrains
|
||||||
|
|
@ -135,7 +134,6 @@ def tuto_kernel_overview():
|
||||||
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
|
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
|
||||||
m.plot(ax=axs, which_parts=[False,False,False,True])
|
m.plot(ax=axs, which_parts=[False,False,False,True])
|
||||||
|
|
||||||
m.ensure_default_constraints()
|
|
||||||
return(m)
|
return(m)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -144,6 +142,5 @@ def model_interaction():
|
||||||
Y = np.sin(X) + np.random.randn(*X.shape)*0.01 + 5.
|
Y = np.sin(X) + np.random.randn(*X.shape)*0.01 + 5.
|
||||||
k = GPy.kern.rbf(1) + GPy.kern.bias(1)
|
k = GPy.kern.rbf(1) + GPy.kern.bias(1)
|
||||||
m = GPy.models.GPRegression(X, Y, kernel=k)
|
m = GPy.models.GPRegression(X, Y, kernel=k)
|
||||||
m.ensure_default_constraints()
|
|
||||||
return m
|
return m
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,7 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
from constructors import *
|
||||||
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, Coregionalise, rational_quadratic, Fixed, rbfcos, IndependentOutputs
|
|
||||||
try:
|
try:
|
||||||
from constructors import rbf_sympy, sympykern # these depend on sympy
|
from constructors import rbf_sympy, sympykern # these depend on sympy
|
||||||
except:
|
except:
|
||||||
|
|
|
||||||
|
|
@ -1,33 +1,9 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from kern import kern
|
from kern import kern
|
||||||
|
import parts
|
||||||
from rbf import rbf as rbfpart
|
|
||||||
from white import white as whitepart
|
|
||||||
from linear import linear as linearpart
|
|
||||||
from exponential import exponential as exponentialpart
|
|
||||||
from Matern32 import Matern32 as Matern32part
|
|
||||||
from Matern52 import Matern52 as Matern52part
|
|
||||||
from bias import bias as biaspart
|
|
||||||
from fixed import Fixed as fixedpart
|
|
||||||
from finite_dimensional import finite_dimensional as finite_dimensionalpart
|
|
||||||
from spline import spline as splinepart
|
|
||||||
from Brownian import Brownian as Brownianpart
|
|
||||||
from periodic_exponential import periodic_exponential as periodic_exponentialpart
|
|
||||||
from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part
|
|
||||||
from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part
|
|
||||||
from prod import prod as prodpart
|
|
||||||
from symmetric import symmetric as symmetric_part
|
|
||||||
from coregionalise import Coregionalise as coregionalise_part
|
|
||||||
from rational_quadratic import rational_quadratic as rational_quadraticpart
|
|
||||||
from rbfcos import rbfcos as rbfcospart
|
|
||||||
from independent_outputs import IndependentOutputs as independent_output_part
|
|
||||||
#TODO these s=constructors are not as clean as we'd like. Tidy the code up
|
|
||||||
#using meta-classes to make the objects construct properly wthout them.
|
|
||||||
|
|
||||||
|
|
||||||
def rbf(input_dim,variance=1., lengthscale=None,ARD=False):
|
def rbf(input_dim,variance=1., lengthscale=None,ARD=False):
|
||||||
"""
|
"""
|
||||||
|
|
@ -42,7 +18,7 @@ def rbf(input_dim,variance=1., lengthscale=None,ARD=False):
|
||||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||||
:type ARD: Boolean
|
:type ARD: Boolean
|
||||||
"""
|
"""
|
||||||
part = rbfpart(input_dim,variance,lengthscale,ARD)
|
part = parts.rbf.RBF(input_dim,variance,lengthscale,ARD)
|
||||||
return kern(input_dim, [part])
|
return kern(input_dim, [part])
|
||||||
|
|
||||||
def linear(input_dim,variances=None,ARD=False):
|
def linear(input_dim,variances=None,ARD=False):
|
||||||
|
|
@ -55,7 +31,7 @@ def linear(input_dim,variances=None,ARD=False):
|
||||||
variances (np.ndarray)
|
variances (np.ndarray)
|
||||||
ARD (boolean)
|
ARD (boolean)
|
||||||
"""
|
"""
|
||||||
part = linearpart(input_dim,variances,ARD)
|
part = parts.linear.Linear(input_dim,variances,ARD)
|
||||||
return kern(input_dim, [part])
|
return kern(input_dim, [part])
|
||||||
|
|
||||||
def white(input_dim,variance=1.):
|
def white(input_dim,variance=1.):
|
||||||
|
|
@ -67,7 +43,7 @@ def white(input_dim,variance=1.):
|
||||||
input_dimD (int), obligatory
|
input_dimD (int), obligatory
|
||||||
variance (float)
|
variance (float)
|
||||||
"""
|
"""
|
||||||
part = whitepart(input_dim,variance)
|
part = parts.white.White(input_dim,variance)
|
||||||
return kern(input_dim, [part])
|
return kern(input_dim, [part])
|
||||||
|
|
||||||
def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
|
def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
|
||||||
|
|
@ -83,7 +59,7 @@ def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
|
||||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||||
:type ARD: Boolean
|
:type ARD: Boolean
|
||||||
"""
|
"""
|
||||||
part = exponentialpart(input_dim,variance, lengthscale, ARD)
|
part = parts.exponential.Exponential(input_dim,variance, lengthscale, ARD)
|
||||||
return kern(input_dim, [part])
|
return kern(input_dim, [part])
|
||||||
|
|
||||||
def Matern32(input_dim,variance=1., lengthscale=None, ARD=False):
|
def Matern32(input_dim,variance=1., lengthscale=None, ARD=False):
|
||||||
|
|
@ -99,7 +75,7 @@ def Matern32(input_dim,variance=1., lengthscale=None, ARD=False):
|
||||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||||
:type ARD: Boolean
|
:type ARD: Boolean
|
||||||
"""
|
"""
|
||||||
part = Matern32part(input_dim,variance, lengthscale, ARD)
|
part = parts.Matern32.Matern32(input_dim,variance, lengthscale, ARD)
|
||||||
return kern(input_dim, [part])
|
return kern(input_dim, [part])
|
||||||
|
|
||||||
def Matern52(input_dim, variance=1., lengthscale=None, ARD=False):
|
def Matern52(input_dim, variance=1., lengthscale=None, ARD=False):
|
||||||
|
|
@ -115,7 +91,7 @@ def Matern52(input_dim, variance=1., lengthscale=None, ARD=False):
|
||||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||||
:type ARD: Boolean
|
:type ARD: Boolean
|
||||||
"""
|
"""
|
||||||
part = Matern52part(input_dim, variance, lengthscale, ARD)
|
part = parts.Matern52.Matern52(input_dim, variance, lengthscale, ARD)
|
||||||
return kern(input_dim, [part])
|
return kern(input_dim, [part])
|
||||||
|
|
||||||
def bias(input_dim, variance=1.):
|
def bias(input_dim, variance=1.):
|
||||||
|
|
@ -127,7 +103,7 @@ def bias(input_dim, variance=1.):
|
||||||
input_dim (int), obligatory
|
input_dim (int), obligatory
|
||||||
variance (float)
|
variance (float)
|
||||||
"""
|
"""
|
||||||
part = biaspart(input_dim, variance)
|
part = parts.bias.Bias(input_dim, variance)
|
||||||
return kern(input_dim, [part])
|
return kern(input_dim, [part])
|
||||||
|
|
||||||
def finite_dimensional(input_dim, F, G, variances=1., weights=None):
|
def finite_dimensional(input_dim, F, G, variances=1., weights=None):
|
||||||
|
|
@ -138,7 +114,7 @@ def finite_dimensional(input_dim, F, G, variances=1., weights=None):
|
||||||
G: np.array with shape (n,n) - the Gram matrix associated to F
|
G: np.array with shape (n,n) - the Gram matrix associated to F
|
||||||
variances : np.ndarray with shape (n,)
|
variances : np.ndarray with shape (n,)
|
||||||
"""
|
"""
|
||||||
part = finite_dimensionalpart(input_dim, F, G, variances, weights)
|
part = parts.finite_dimensional.FiniteDimensional(input_dim, F, G, variances, weights)
|
||||||
return kern(input_dim, [part])
|
return kern(input_dim, [part])
|
||||||
|
|
||||||
def spline(input_dim, variance=1.):
|
def spline(input_dim, variance=1.):
|
||||||
|
|
@ -150,7 +126,7 @@ def spline(input_dim, variance=1.):
|
||||||
:param variance: the variance of the kernel
|
:param variance: the variance of the kernel
|
||||||
:type variance: float
|
:type variance: float
|
||||||
"""
|
"""
|
||||||
part = splinepart(input_dim, variance)
|
part = parts.spline.Spline(input_dim, variance)
|
||||||
return kern(input_dim, [part])
|
return kern(input_dim, [part])
|
||||||
|
|
||||||
def Brownian(input_dim, variance=1.):
|
def Brownian(input_dim, variance=1.):
|
||||||
|
|
@ -162,7 +138,7 @@ def Brownian(input_dim, variance=1.):
|
||||||
:param variance: the variance of the kernel
|
:param variance: the variance of the kernel
|
||||||
:type variance: float
|
:type variance: float
|
||||||
"""
|
"""
|
||||||
part = Brownianpart(input_dim, variance)
|
part = parts.Brownian.Brownian(input_dim, variance)
|
||||||
return kern(input_dim, [part])
|
return kern(input_dim, [part])
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -215,7 +191,7 @@ def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 *
|
||||||
:param n_freq: the number of frequencies considered for the periodic subspace
|
:param n_freq: the number of frequencies considered for the periodic subspace
|
||||||
:type n_freq: int
|
:type n_freq: int
|
||||||
"""
|
"""
|
||||||
part = periodic_exponentialpart(input_dim, variance, lengthscale, period, n_freq, lower, upper)
|
part = parts.periodic_exponential.PeriodicExponential(input_dim, variance, lengthscale, period, n_freq, lower, upper)
|
||||||
return kern(input_dim, [part])
|
return kern(input_dim, [part])
|
||||||
|
|
||||||
def periodic_Matern32(input_dim, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
|
def periodic_Matern32(input_dim, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
|
||||||
|
|
@ -233,7 +209,7 @@ def periodic_Matern32(input_dim, variance=1., lengthscale=None, period=2 * np.pi
|
||||||
:param n_freq: the number of frequencies considered for the periodic subspace
|
:param n_freq: the number of frequencies considered for the periodic subspace
|
||||||
:type n_freq: int
|
:type n_freq: int
|
||||||
"""
|
"""
|
||||||
part = periodic_Matern32part(input_dim, variance, lengthscale, period, n_freq, lower, upper)
|
part = parts.periodic_Matern32.PeriodicMatern32(input_dim, variance, lengthscale, period, n_freq, lower, upper)
|
||||||
return kern(input_dim, [part])
|
return kern(input_dim, [part])
|
||||||
|
|
||||||
def periodic_Matern52(input_dim, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
|
def periodic_Matern52(input_dim, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
|
||||||
|
|
@ -251,7 +227,7 @@ def periodic_Matern52(input_dim, variance=1., lengthscale=None, period=2 * np.pi
|
||||||
:param n_freq: the number of frequencies considered for the periodic subspace
|
:param n_freq: the number of frequencies considered for the periodic subspace
|
||||||
:type n_freq: int
|
:type n_freq: int
|
||||||
"""
|
"""
|
||||||
part = periodic_Matern52part(input_dim, variance, lengthscale, period, n_freq, lower, upper)
|
part = parts.periodic_Matern52part(input_dim, variance, lengthscale, period, n_freq, lower, upper)
|
||||||
return kern(input_dim, [part])
|
return kern(input_dim, [part])
|
||||||
|
|
||||||
def prod(k1,k2,tensor=False):
|
def prod(k1,k2,tensor=False):
|
||||||
|
|
@ -262,7 +238,7 @@ def prod(k1,k2,tensor=False):
|
||||||
:type k1, k2: kernpart
|
:type k1, k2: kernpart
|
||||||
:rtype: kernel object
|
:rtype: kernel object
|
||||||
"""
|
"""
|
||||||
part = prodpart(k1,k2,tensor)
|
part = parts.prodpart(k1,k2,tensor)
|
||||||
return kern(part.input_dim, [part])
|
return kern(part.input_dim, [part])
|
||||||
|
|
||||||
def symmetric(k):
|
def symmetric(k):
|
||||||
|
|
@ -270,11 +246,11 @@ def symmetric(k):
|
||||||
Construct a symmetrical kernel from an existing kernel
|
Construct a symmetrical kernel from an existing kernel
|
||||||
"""
|
"""
|
||||||
k_ = k.copy()
|
k_ = k.copy()
|
||||||
k_.parts = [symmetric_part(p) for p in k.parts]
|
k_.parts = [symmetric.Symmetric(p) for p in k.parts]
|
||||||
return k_
|
return k_
|
||||||
|
|
||||||
def Coregionalise(Nout,R=1, W=None, kappa=None):
|
def coregionalise(Nout,R=1, W=None, kappa=None):
|
||||||
p = coregionalise_part(Nout,R,W,kappa)
|
p = parts.coregionalise.Coregionalise(Nout,R,W,kappa)
|
||||||
return kern(1,[p])
|
return kern(1,[p])
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -291,10 +267,10 @@ def rational_quadratic(input_dim, variance=1., lengthscale=1., power=1.):
|
||||||
:rtype: kern object
|
:rtype: kern object
|
||||||
|
|
||||||
"""
|
"""
|
||||||
part = rational_quadraticpart(input_dim, variance, lengthscale, power)
|
part = parts.rational_quadratic.RationalQuadratic(input_dim, variance, lengthscale, power)
|
||||||
return kern(input_dim, [part])
|
return kern(input_dim, [part])
|
||||||
|
|
||||||
def Fixed(input_dim, K, variance=1.):
|
def fixed(input_dim, K, variance=1.):
|
||||||
"""
|
"""
|
||||||
Construct a Fixed effect kernel.
|
Construct a Fixed effect kernel.
|
||||||
|
|
||||||
|
|
@ -304,23 +280,21 @@ def Fixed(input_dim, K, variance=1.):
|
||||||
K (np.array), obligatory
|
K (np.array), obligatory
|
||||||
variance (float)
|
variance (float)
|
||||||
"""
|
"""
|
||||||
part = fixedpart(input_dim, K, variance)
|
part = parts.fixed.Fixed(input_dim, K, variance)
|
||||||
return kern(input_dim, [part])
|
return kern(input_dim, [part])
|
||||||
|
|
||||||
def rbfcos(input_dim, variance=1., frequencies=None, bandwidths=None, ARD=False):
|
def rbfcos(input_dim, variance=1., frequencies=None, bandwidths=None, ARD=False):
|
||||||
"""
|
"""
|
||||||
construct a rbfcos kernel
|
construct a rbfcos kernel
|
||||||
"""
|
"""
|
||||||
part = rbfcospart(input_dim, variance, frequencies, bandwidths, ARD)
|
part = parts.rbfcos.RBFCos(input_dim, variance, frequencies, bandwidths, ARD)
|
||||||
return kern(input_dim, [part])
|
return kern(input_dim, [part])
|
||||||
|
|
||||||
def IndependentOutputs(k):
|
def independent_outputs(k):
|
||||||
"""
|
"""
|
||||||
Construct a kernel with independent outputs from an existing kernel
|
Construct a kernel with independent outputs from an existing kernel
|
||||||
"""
|
"""
|
||||||
for sl in k.input_slices:
|
for sl in k.input_slices:
|
||||||
assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
|
assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
|
||||||
parts = [independent_output_part(p) for p in k.parts]
|
parts = [independent_outputs.IndependentOutputs(p) for p in k.parts]
|
||||||
return kern(k.input_dim+1,parts)
|
return kern(k.input_dim+1,parts)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,12 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
from ..core.parameterised import Parameterised
|
from ..core.parameterised import Parameterised
|
||||||
from kernpart import Kernpart
|
from parts.kernpart import Kernpart
|
||||||
import itertools
|
import itertools
|
||||||
from prod import prod
|
from parts.prod import Prod as prod
|
||||||
|
|
||||||
class kern(Parameterised):
|
class kern(Parameterised):
|
||||||
def __init__(self, input_dim, parts=[], input_slices=None):
|
def __init__(self, input_dim, parts=[], input_slices=None):
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ class Brownian(Kernpart):
|
||||||
def __init__(self,input_dim,variance=1.):
|
def __init__(self,input_dim,variance=1.):
|
||||||
self.input_dim = input_dim
|
self.input_dim = input_dim
|
||||||
assert self.input_dim==1, "Brownian motion in 1D only"
|
assert self.input_dim==1, "Brownian motion in 1D only"
|
||||||
self.num_params = 1.
|
self.num_params = 1
|
||||||
self.name = 'Brownian'
|
self.name = 'Brownian'
|
||||||
self._set_params(np.array([variance]).flatten())
|
self._set_params(np.array([variance]).flatten())
|
||||||
|
|
||||||
21
GPy/kern/parts/__init__.py
Normal file
21
GPy/kern/parts/__init__.py
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
import bias
|
||||||
|
import Brownian
|
||||||
|
import coregionalise
|
||||||
|
import exponential
|
||||||
|
import finite_dimensional
|
||||||
|
import fixed
|
||||||
|
import independent_outputs
|
||||||
|
import linear
|
||||||
|
import Matern32
|
||||||
|
import Matern52
|
||||||
|
import periodic_exponential
|
||||||
|
import periodic_Matern32
|
||||||
|
import periodic_Matern52
|
||||||
|
import prod_orthogonal
|
||||||
|
import prod
|
||||||
|
import rational_quadratic
|
||||||
|
import rbfcos
|
||||||
|
import rbf
|
||||||
|
import spline
|
||||||
|
import symmetric
|
||||||
|
import white
|
||||||
|
|
@ -6,7 +6,7 @@ from kernpart import Kernpart
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
class bias(Kernpart):
|
class Bias(Kernpart):
|
||||||
def __init__(self,input_dim,variance=1.):
|
def __init__(self,input_dim,variance=1.):
|
||||||
"""
|
"""
|
||||||
:param input_dim: the number of input dimensions
|
:param input_dim: the number of input dimensions
|
||||||
|
|
@ -6,7 +6,7 @@ from kernpart import Kernpart
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy import integrate
|
from scipy import integrate
|
||||||
|
|
||||||
class exponential(Kernpart):
|
class Exponential(Kernpart):
|
||||||
"""
|
"""
|
||||||
Exponential kernel (aka Ornstein-Uhlenbeck or Matern 1/2)
|
Exponential kernel (aka Ornstein-Uhlenbeck or Matern 1/2)
|
||||||
|
|
||||||
|
|
@ -4,9 +4,9 @@
|
||||||
|
|
||||||
from kernpart import Kernpart
|
from kernpart import Kernpart
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from ..util.linalg import pdinv,mdot
|
from ...util.linalg import pdinv,mdot
|
||||||
|
|
||||||
class finite_dimensional(Kernpart):
|
class FiniteDimensional(Kernpart):
|
||||||
def __init__(self, input_dim, F, G, variance=1., weights=None):
|
def __init__(self, input_dim, F, G, variance=1., weights=None):
|
||||||
"""
|
"""
|
||||||
Argumnents
|
Argumnents
|
||||||
|
|
@ -15,7 +15,7 @@ class Fixed(Kernpart):
|
||||||
self.input_dim = input_dim
|
self.input_dim = input_dim
|
||||||
self.fixed_K = K
|
self.fixed_K = K
|
||||||
self.num_params = 1
|
self.num_params = 1
|
||||||
self.name = 'Fixed'
|
self.name = 'fixed'
|
||||||
self._set_params(np.array([variance]).flatten())
|
self._set_params(np.array([variance]).flatten())
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
|
|
@ -4,10 +4,10 @@
|
||||||
|
|
||||||
from kernpart import Kernpart
|
from kernpart import Kernpart
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from ..util.linalg import tdot
|
from ...util.linalg import tdot
|
||||||
from scipy import weave
|
from scipy import weave
|
||||||
|
|
||||||
class linear(Kernpart):
|
class Linear(Kernpart):
|
||||||
"""
|
"""
|
||||||
Linear kernel
|
Linear kernel
|
||||||
|
|
||||||
|
|
@ -7,7 +7,7 @@ import numpy as np
|
||||||
from GPy.util.linalg import mdot
|
from GPy.util.linalg import mdot
|
||||||
from GPy.util.decorators import silence_errors
|
from GPy.util.decorators import silence_errors
|
||||||
|
|
||||||
class periodic_Matern32(Kernpart):
|
class PeriodicMatern32(Kernpart):
|
||||||
"""
|
"""
|
||||||
Kernel of the periodic subspace (up to a given frequency) of a Matern 3/2 RKHS. Only defined for input_dim=1.
|
Kernel of the periodic subspace (up to a given frequency) of a Matern 3/2 RKHS. Only defined for input_dim=1.
|
||||||
|
|
||||||
|
|
@ -7,7 +7,7 @@ import numpy as np
|
||||||
from GPy.util.linalg import mdot
|
from GPy.util.linalg import mdot
|
||||||
from GPy.util.decorators import silence_errors
|
from GPy.util.decorators import silence_errors
|
||||||
|
|
||||||
class periodic_Matern52(Kernpart):
|
class PeriodicMatern52(Kernpart):
|
||||||
"""
|
"""
|
||||||
Kernel of the periodic subspace (up to a given frequency) of a Matern 5/2 RKHS. Only defined for input_dim=1.
|
Kernel of the periodic subspace (up to a given frequency) of a Matern 5/2 RKHS. Only defined for input_dim=1.
|
||||||
|
|
||||||
|
|
@ -7,7 +7,7 @@ import numpy as np
|
||||||
from GPy.util.linalg import mdot
|
from GPy.util.linalg import mdot
|
||||||
from GPy.util.decorators import silence_errors
|
from GPy.util.decorators import silence_errors
|
||||||
|
|
||||||
class periodic_exponential(Kernpart):
|
class PeriodicExponential(Kernpart):
|
||||||
"""
|
"""
|
||||||
Kernel of the periodic subspace (up to a given frequency) of a exponential (Matern 1/2) RKHS. Only defined for input_dim=1.
|
Kernel of the periodic subspace (up to a given frequency) of a exponential (Matern 1/2) RKHS. Only defined for input_dim=1.
|
||||||
|
|
||||||
|
|
@ -5,7 +5,7 @@ from kernpart import Kernpart
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
class prod(Kernpart):
|
class Prod(Kernpart):
|
||||||
"""
|
"""
|
||||||
Computes the product of 2 kernels
|
Computes the product of 2 kernels
|
||||||
|
|
||||||
|
|
@ -5,7 +5,7 @@
|
||||||
from kernpart import Kernpart
|
from kernpart import Kernpart
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
class rational_quadratic(Kernpart):
|
class RationalQuadratic(Kernpart):
|
||||||
"""
|
"""
|
||||||
rational quadratic kernel
|
rational quadratic kernel
|
||||||
|
|
||||||
|
|
@ -6,9 +6,9 @@ from kernpart import Kernpart
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import hashlib
|
import hashlib
|
||||||
from scipy import weave
|
from scipy import weave
|
||||||
from ..util.linalg import tdot
|
from ...util.linalg import tdot
|
||||||
|
|
||||||
class rbf(Kernpart):
|
class RBF(Kernpart):
|
||||||
"""
|
"""
|
||||||
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
|
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
|
||||||
|
|
||||||
|
|
@ -241,9 +241,9 @@ class rbf(Kernpart):
|
||||||
# here are the "statistics" for psi1 and psi2
|
# here are the "statistics" for psi1 and psi2
|
||||||
if not np.array_equal(Z, self._Z):
|
if not np.array_equal(Z, self._Z):
|
||||||
#Z has changed, compute Z specific stuff
|
#Z has changed, compute Z specific stuff
|
||||||
self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # num_inducing,num_inducing,input_dim
|
self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q
|
||||||
self._psi2_Zdist = 0.5*(Z[:,None,:]-Z[None,:,:]) # num_inducing,num_inducing,input_dim
|
self._psi2_Zdist = 0.5*(Z[:,None,:]-Z[None,:,:]) # M,M,Q
|
||||||
self._psi2_Zdist_sq = np.square(self._psi2_Zdist/self.lengthscale) # num_inducing,num_inducing,input_dim
|
self._psi2_Zdist_sq = np.square(self._psi2_Zdist/self.lengthscale) # M,M,Q
|
||||||
self._Z = Z
|
self._Z = Z
|
||||||
|
|
||||||
if not (np.array_equal(Z, self._Z) and np.array_equal(mu, self._mu) and np.array_equal(S, self._S)):
|
if not (np.array_equal(Z, self._Z) and np.array_equal(mu, self._mu) and np.array_equal(S, self._S)):
|
||||||
|
|
@ -257,12 +257,12 @@ class rbf(Kernpart):
|
||||||
self._psi1 = self.variance*np.exp(self._psi1_exponent)
|
self._psi1 = self.variance*np.exp(self._psi1_exponent)
|
||||||
|
|
||||||
#psi2
|
#psi2
|
||||||
self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscale2+1. # N,num_inducing,num_inducing,input_dim
|
self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscale2+1. # N,M,M,Q
|
||||||
self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu,self._psi2_Zhat)
|
self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu,self._psi2_Zhat)
|
||||||
#self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,num_inducing,num_inducing,input_dim
|
#self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
|
||||||
#self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
|
#self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
|
||||||
#self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,num_inducing,num_inducing
|
#self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q
|
||||||
self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,num_inducing,num_inducing
|
self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M,Q
|
||||||
|
|
||||||
#store matrices for caching
|
#store matrices for caching
|
||||||
self._Z, self._mu, self._S = Z, mu,S
|
self._Z, self._mu, self._S = Z, mu,S
|
||||||
|
|
@ -6,7 +6,7 @@
|
||||||
from kernpart import Kernpart
|
from kernpart import Kernpart
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
class rbfcos(Kernpart):
|
class RBFCos(Kernpart):
|
||||||
def __init__(self,input_dim,variance=1.,frequencies=None,bandwidths=None,ARD=False):
|
def __init__(self,input_dim,variance=1.,frequencies=None,bandwidths=None,ARD=False):
|
||||||
self.input_dim = input_dim
|
self.input_dim = input_dim
|
||||||
self.name = 'rbfcos'
|
self.name = 'rbfcos'
|
||||||
|
|
@ -9,7 +9,7 @@ def theta(x):
|
||||||
"""Heaviside step function"""
|
"""Heaviside step function"""
|
||||||
return np.where(x>=0.,1.,0.)
|
return np.where(x>=0.,1.,0.)
|
||||||
|
|
||||||
class spline(Kernpart):
|
class Spline(Kernpart):
|
||||||
"""
|
"""
|
||||||
Spline kernel
|
Spline kernel
|
||||||
|
|
||||||
|
|
@ -4,7 +4,7 @@
|
||||||
from kernpart import Kernpart
|
from kernpart import Kernpart
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
class symmetric(Kernpart):
|
class Symmetric(Kernpart):
|
||||||
"""
|
"""
|
||||||
Symmetrical kernels
|
Symmetrical kernels
|
||||||
|
|
||||||
|
|
@ -1,10 +1,10 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
from kernpart import Kernpart
|
from kernpart import Kernpart
|
||||||
import numpy as np
|
import numpy as np
|
||||||
class white(Kernpart):
|
|
||||||
|
class White(Kernpart):
|
||||||
"""
|
"""
|
||||||
White noise kernel.
|
White noise kernel.
|
||||||
|
|
||||||
|
|
@ -34,6 +34,8 @@ class EP(likelihood):
|
||||||
self.Z = 0
|
self.Z = 0
|
||||||
self.YYT = None
|
self.YYT = None
|
||||||
self.V = self.precision * self.Y
|
self.V = self.precision * self.Y
|
||||||
|
self.VVT_factor = self.V
|
||||||
|
self.trYYT = 0.
|
||||||
|
|
||||||
def restart(self):
|
def restart(self):
|
||||||
self.tau_tilde = np.zeros(self.N)
|
self.tau_tilde = np.zeros(self.N)
|
||||||
|
|
@ -44,6 +46,8 @@ class EP(likelihood):
|
||||||
self.Z = 0
|
self.Z = 0
|
||||||
self.YYT = None
|
self.YYT = None
|
||||||
self.V = self.precision * self.Y
|
self.V = self.precision * self.Y
|
||||||
|
self.VVT_factor = self.V
|
||||||
|
self.trYYT = 0.
|
||||||
|
|
||||||
def predictive_values(self,mu,var,full_cov):
|
def predictive_values(self,mu,var,full_cov):
|
||||||
if full_cov:
|
if full_cov:
|
||||||
|
|
@ -71,6 +75,8 @@ class EP(likelihood):
|
||||||
self.covariance_matrix = np.diag(1./self.tau_tilde)
|
self.covariance_matrix = np.diag(1./self.tau_tilde)
|
||||||
self.precision = self.tau_tilde[:,None]
|
self.precision = self.tau_tilde[:,None]
|
||||||
self.V = self.precision * self.Y
|
self.V = self.precision * self.Y
|
||||||
|
self.VVT_factor = self.V
|
||||||
|
self.trYYT = np.trace(self.YYT)
|
||||||
|
|
||||||
def fit_full(self,K):
|
def fit_full(self,K):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from likelihood import likelihood
|
from likelihood import likelihood
|
||||||
|
from ..util.linalg import jitchol
|
||||||
|
|
||||||
|
|
||||||
class Gaussian(likelihood):
|
class Gaussian(likelihood):
|
||||||
"""
|
"""
|
||||||
|
|
@ -40,9 +42,11 @@ class Gaussian(likelihood):
|
||||||
if D > self.N:
|
if D > self.N:
|
||||||
self.YYT = np.dot(self.Y, self.Y.T)
|
self.YYT = np.dot(self.Y, self.Y.T)
|
||||||
self.trYYT = np.trace(self.YYT)
|
self.trYYT = np.trace(self.YYT)
|
||||||
|
self.YYT_factor = jitchol(self.YYT)
|
||||||
else:
|
else:
|
||||||
self.YYT = None
|
self.YYT = None
|
||||||
self.trYYT = np.sum(np.square(self.Y))
|
self.trYYT = np.sum(np.square(self.Y))
|
||||||
|
self.YYT_factor = self.Y
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
return np.asarray(self._variance)
|
return np.asarray(self._variance)
|
||||||
|
|
@ -53,12 +57,13 @@ class Gaussian(likelihood):
|
||||||
def _set_params(self, x):
|
def _set_params(self, x):
|
||||||
x = np.float64(x)
|
x = np.float64(x)
|
||||||
if np.all(self._variance != x):
|
if np.all(self._variance != x):
|
||||||
if x == 0.:
|
if x == 0.:#special case of zero noise
|
||||||
self.precision = np.inf
|
self.precision = np.inf
|
||||||
self.V = None
|
self.V = None
|
||||||
else:
|
else:
|
||||||
self.precision = 1. / x
|
self.precision = 1. / x
|
||||||
self.V = (self.precision) * self.Y
|
self.V = (self.precision) * self.Y
|
||||||
|
self.VVT_factor = self.precision * self.YYT_factor
|
||||||
self.covariance_matrix = np.eye(self.N) * x
|
self.covariance_matrix = np.eye(self.N) * x
|
||||||
self._variance = x
|
self._variance = x
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@
|
||||||
from gp_regression import GPRegression
|
from gp_regression import GPRegression
|
||||||
from gp_classification import GPClassification
|
from gp_classification import GPClassification
|
||||||
from sparse_gp_regression import SparseGPRegression
|
from sparse_gp_regression import SparseGPRegression
|
||||||
|
from svigp_regression import SVIGPRegression
|
||||||
from sparse_gp_classification import SparseGPClassification
|
from sparse_gp_classification import SparseGPClassification
|
||||||
from fitc_classification import FITCClassification
|
from fitc_classification import FITCClassification
|
||||||
from gplvm import GPLVM
|
from gplvm import GPLVM
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,7 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
||||||
kernel = kern.rbf(input_dim) + kern.white(input_dim)
|
kernel = kern.rbf(input_dim) + kern.white(input_dim)
|
||||||
|
|
||||||
SparseGP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs)
|
SparseGP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs)
|
||||||
self._set_params(self._get_params())
|
self.ensure_default_constraints()
|
||||||
|
|
||||||
def __getstate__(self):
|
def __getstate__(self):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -44,4 +44,4 @@ class FITCClassification(FITC):
|
||||||
assert Z.shape[1]==X.shape[1]
|
assert Z.shape[1]==X.shape[1]
|
||||||
|
|
||||||
FITC.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X)
|
FITC.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X)
|
||||||
self._set_params(self._get_params())
|
self.ensure_default_constraints()
|
||||||
|
|
|
||||||
|
|
@ -38,4 +38,4 @@ class GPClassification(GP):
|
||||||
raise Warning, 'likelihood.data and Y are different.'
|
raise Warning, 'likelihood.data and Y are different.'
|
||||||
|
|
||||||
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
||||||
self._set_params(self._get_params())
|
self.ensure_default_constraints()
|
||||||
|
|
|
||||||
|
|
@ -32,4 +32,4 @@ class GPRegression(GP):
|
||||||
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
|
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
|
||||||
|
|
||||||
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
||||||
self._set_params(self._get_params())
|
self.ensure_default_constraints()
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ class GPLVM(GP):
|
||||||
kernel = kern.rbf(input_dim, ARD=input_dim > 1) + kern.bias(input_dim, np.exp(-2)) + kern.white(input_dim, np.exp(-2))
|
kernel = kern.rbf(input_dim, ARD=input_dim > 1) + kern.bias(input_dim, np.exp(-2)) + kern.white(input_dim, np.exp(-2))
|
||||||
likelihood = Gaussian(Y, normalize=normalize_Y)
|
likelihood = Gaussian(Y, normalize=normalize_Y)
|
||||||
GP.__init__(self, X, likelihood, kernel, normalize_X=False)
|
GP.__init__(self, X, likelihood, kernel, normalize_X=False)
|
||||||
self._set_params(self._get_params())
|
self.ensure_default_constraints()
|
||||||
|
|
||||||
def initialise_latent(self, init, input_dim, Y):
|
def initialise_latent(self, init, input_dim, Y):
|
||||||
if init == 'PCA':
|
if init == 'PCA':
|
||||||
|
|
|
||||||
|
|
@ -82,7 +82,7 @@ class MRD(Model):
|
||||||
self.MQ = self.num_inducing * self.input_dim
|
self.MQ = self.num_inducing * self.input_dim
|
||||||
|
|
||||||
Model.__init__(self)
|
Model.__init__(self)
|
||||||
self._set_params(self._get_params())
|
self.ensure_default_constraints()
|
||||||
|
|
||||||
def __getstate__(self):
|
def __getstate__(self):
|
||||||
return [self.names,
|
return [self.names,
|
||||||
|
|
|
||||||
|
|
@ -44,4 +44,4 @@ class SparseGPClassification(SparseGP):
|
||||||
assert Z.shape[1]==X.shape[1]
|
assert Z.shape[1]==X.shape[1]
|
||||||
|
|
||||||
SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X)
|
SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X)
|
||||||
self._set_params(self._get_params())
|
self.ensure_default_constraints()
|
||||||
|
|
|
||||||
|
|
@ -42,4 +42,4 @@ class SparseGPRegression(SparseGP):
|
||||||
likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y)
|
likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y)
|
||||||
|
|
||||||
SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X, X_variance=X_variance)
|
SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X, X_variance=X_variance)
|
||||||
self._set_params(self._get_params())
|
self.ensure_default_constraints()
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@ class SparseGPLVM(SparseGPRegression, GPLVM):
|
||||||
def __init__(self, Y, input_dim, kernel=None, init='PCA', num_inducing=10):
|
def __init__(self, Y, input_dim, kernel=None, init='PCA', num_inducing=10):
|
||||||
X = self.initialise_latent(init, input_dim, Y)
|
X = self.initialise_latent(init, input_dim, Y)
|
||||||
SparseGPRegression.__init__(self, X, Y, kernel=kernel, num_inducing=num_inducing)
|
SparseGPRegression.__init__(self, X, Y, kernel=kernel, num_inducing=num_inducing)
|
||||||
|
self.ensure_default_constraints()
|
||||||
|
|
||||||
def _get_param_names(self):
|
def _get_param_names(self):
|
||||||
return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
|
return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
|
||||||
|
|
|
||||||
44
GPy/models/svigp_regression.py
Normal file
44
GPy/models/svigp_regression.py
Normal file
|
|
@ -0,0 +1,44 @@
|
||||||
|
# Copyright (c) 2012, James Hensman
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from ..core import SVIGP
|
||||||
|
from .. import likelihoods
|
||||||
|
from .. import kern
|
||||||
|
|
||||||
|
class SVIGPRegression(SVIGP):
|
||||||
|
"""
|
||||||
|
Gaussian Process model for regression
|
||||||
|
|
||||||
|
This is a thin wrapper around the SVIGP class, with a set of sensible defalts
|
||||||
|
|
||||||
|
:param X: input observations
|
||||||
|
:param Y: observed values
|
||||||
|
:param kernel: a GPy kernel, defaults to rbf+white
|
||||||
|
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
||||||
|
:type normalize_X: False|True
|
||||||
|
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
|
||||||
|
:type normalize_Y: False|True
|
||||||
|
:rtype: model object
|
||||||
|
|
||||||
|
.. Note:: Multiple independent outputs are allowed using columns of Y
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, q_u=None, batchsize=10):
|
||||||
|
# kern defaults to rbf (plus white for stability)
|
||||||
|
if kernel is None:
|
||||||
|
kernel = kern.rbf(X.shape[1], variance=1., lengthscale=4.) + kern.white(X.shape[1], 1e-3)
|
||||||
|
|
||||||
|
# Z defaults to a subset of the data
|
||||||
|
if Z is None:
|
||||||
|
i = np.random.permutation(X.shape[0])[:num_inducing]
|
||||||
|
Z = X[i].copy()
|
||||||
|
else:
|
||||||
|
assert Z.shape[1] == X.shape[1]
|
||||||
|
|
||||||
|
# likelihood defaults to Gaussian
|
||||||
|
likelihood = likelihoods.Gaussian(Y, normalize=False)
|
||||||
|
|
||||||
|
SVIGP.__init__(self, X, likelihood, kernel, Z, q_u=q_u, batchsize=batchsize)
|
||||||
|
self.load_batch()
|
||||||
|
|
@ -16,7 +16,6 @@ class BGPLVMTests(unittest.TestCase):
|
||||||
Y -= Y.mean(axis=0)
|
Y -= Y.mean(axis=0)
|
||||||
k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||||
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
|
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.randomize()
|
m.randomize()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
|
@ -29,7 +28,6 @@ class BGPLVMTests(unittest.TestCase):
|
||||||
Y -= Y.mean(axis=0)
|
Y -= Y.mean(axis=0)
|
||||||
k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||||
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
|
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.randomize()
|
m.randomize()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
|
@ -42,7 +40,6 @@ class BGPLVMTests(unittest.TestCase):
|
||||||
Y -= Y.mean(axis=0)
|
Y -= Y.mean(axis=0)
|
||||||
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||||
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
|
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.randomize()
|
m.randomize()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
|
@ -55,7 +52,6 @@ class BGPLVMTests(unittest.TestCase):
|
||||||
Y -= Y.mean(axis=0)
|
Y -= Y.mean(axis=0)
|
||||||
k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||||
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
|
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.randomize()
|
m.randomize()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
|
@ -69,7 +65,6 @@ class BGPLVMTests(unittest.TestCase):
|
||||||
Y -= Y.mean(axis=0)
|
Y -= Y.mean(axis=0)
|
||||||
k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||||
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
|
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.randomize()
|
m.randomize()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,6 @@ class GPLVMTests(unittest.TestCase):
|
||||||
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
|
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
|
||||||
k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||||
m = GPy.models.GPLVM(Y, input_dim, kernel = k)
|
m = GPy.models.GPLVM(Y, input_dim, kernel = k)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.randomize()
|
m.randomize()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
|
@ -26,7 +25,6 @@ class GPLVMTests(unittest.TestCase):
|
||||||
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
|
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
|
||||||
k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||||
m = GPy.models.GPLVM(Y, input_dim, kernel = k)
|
m = GPy.models.GPLVM(Y, input_dim, kernel = k)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.randomize()
|
m.randomize()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
|
@ -38,7 +36,6 @@ class GPLVMTests(unittest.TestCase):
|
||||||
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
|
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
|
||||||
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||||
m = GPy.models.GPLVM(Y, input_dim, kernel = k)
|
m = GPy.models.GPLVM(Y, input_dim, kernel = k)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.randomize()
|
m.randomize()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ class KernelTests(unittest.TestCase):
|
||||||
"""
|
"""
|
||||||
X = np.random.rand(30, 4)
|
X = np.random.rand(30, 4)
|
||||||
K = np.dot(X, X.T)
|
K = np.dot(X, X.T)
|
||||||
kernel = GPy.kern.Fixed(4, K)
|
kernel = GPy.kern.fixed(4, K)
|
||||||
Y = np.ones((30,1))
|
Y = np.ones((30,1))
|
||||||
m = GPy.models.GPRegression(X,Y,kernel=kernel)
|
m = GPy.models.GPRegression(X,Y,kernel=kernel)
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
@ -36,7 +36,7 @@ class KernelTests(unittest.TestCase):
|
||||||
Y = np.vstack((Y1,Y2))
|
Y = np.vstack((Y1,Y2))
|
||||||
|
|
||||||
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
|
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
|
||||||
k2 = GPy.kern.Coregionalise(2,1)
|
k2 = GPy.kern.coregionalise(2,1)
|
||||||
k = k1.prod(k2,tensor=True)
|
k = k1.prod(k2,tensor=True)
|
||||||
m = GPy.models.GPRegression(X,Y,kernel=k)
|
m = GPy.models.GPRegression(X,Y,kernel=k)
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,6 @@ class MRDTests(unittest.TestCase):
|
||||||
likelihood_list = [GPy.likelihoods.Gaussian(Y) for Y in Ylist]
|
likelihood_list = [GPy.likelihoods.Gaussian(Y) for Y in Ylist]
|
||||||
|
|
||||||
m = GPy.models.MRD(likelihood_list, input_dim=input_dim, kernels=k, num_inducing=num_inducing)
|
m = GPy.models.MRD(likelihood_list, input_dim=input_dim, kernels=k, num_inducing=num_inducing)
|
||||||
m.ensure_default_constraints()
|
|
||||||
|
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,6 @@ class PriorTests(unittest.TestCase):
|
||||||
y += 0.05*np.random.randn(len(X))
|
y += 0.05*np.random.randn(len(X))
|
||||||
X, y = X[:, None], y[:, None]
|
X, y = X[:, None], y[:, None]
|
||||||
m = GPy.models.GPRegression(X, y)
|
m = GPy.models.GPRegression(X, y)
|
||||||
m.ensure_default_constraints()
|
|
||||||
lognormal = GPy.priors.LogGaussian(1, 2)
|
lognormal = GPy.priors.LogGaussian(1, 2)
|
||||||
m.set_prior('rbf', lognormal)
|
m.set_prior('rbf', lognormal)
|
||||||
m.randomize()
|
m.randomize()
|
||||||
|
|
@ -28,7 +27,6 @@ class PriorTests(unittest.TestCase):
|
||||||
y += 0.05*np.random.randn(len(X))
|
y += 0.05*np.random.randn(len(X))
|
||||||
X, y = X[:, None], y[:, None]
|
X, y = X[:, None], y[:, None]
|
||||||
m = GPy.models.GPRegression(X, y)
|
m = GPy.models.GPRegression(X, y)
|
||||||
m.ensure_default_constraints()
|
|
||||||
Gamma = GPy.priors.Gamma(1, 1)
|
Gamma = GPy.priors.Gamma(1, 1)
|
||||||
m.set_prior('rbf', Gamma)
|
m.set_prior('rbf', Gamma)
|
||||||
m.randomize()
|
m.randomize()
|
||||||
|
|
@ -42,7 +40,6 @@ class PriorTests(unittest.TestCase):
|
||||||
y += 0.05*np.random.randn(len(X))
|
y += 0.05*np.random.randn(len(X))
|
||||||
X, y = X[:, None], y[:, None]
|
X, y = X[:, None], y[:, None]
|
||||||
m = GPy.models.GPRegression(X, y)
|
m = GPy.models.GPRegression(X, y)
|
||||||
m.ensure_default_constraints()
|
|
||||||
gaussian = GPy.priors.Gaussian(1, 1)
|
gaussian = GPy.priors.Gaussian(1, 1)
|
||||||
success = False
|
success = False
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -113,7 +113,6 @@ if __name__ == "__main__":
|
||||||
# Y -= Y.mean(axis=0)
|
# Y -= Y.mean(axis=0)
|
||||||
# k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
# k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||||
# m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
|
# m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
|
||||||
# m.ensure_default_constraints()
|
|
||||||
# m.randomize()
|
# m.randomize()
|
||||||
# # self.assertTrue(m.checkgrad())
|
# # self.assertTrue(m.checkgrad())
|
||||||
numpy.random.seed(0)
|
numpy.random.seed(0)
|
||||||
|
|
@ -146,7 +145,6 @@ if __name__ == "__main__":
|
||||||
# num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim))
|
# num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim))
|
||||||
m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
|
m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
|
||||||
num_inducing=num_inducing, kernel=GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)))
|
num_inducing=num_inducing, kernel=GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)))
|
||||||
m3.ensure_default_constraints()
|
|
||||||
# + GPy.kern.bias(input_dim))
|
# + GPy.kern.bias(input_dim))
|
||||||
# m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
|
# m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
|
||||||
# num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim))
|
# num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim))
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,6 @@ class sparse_GPLVMTests(unittest.TestCase):
|
||||||
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
|
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
|
||||||
k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||||
m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
|
m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.randomize()
|
m.randomize()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
|
@ -27,7 +26,6 @@ class sparse_GPLVMTests(unittest.TestCase):
|
||||||
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
|
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
|
||||||
k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||||
m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
|
m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.randomize()
|
m.randomize()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
|
@ -39,7 +37,6 @@ class sparse_GPLVMTests(unittest.TestCase):
|
||||||
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
|
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
|
||||||
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||||
m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
|
m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.randomize()
|
m.randomize()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,6 @@ class GradientTests(unittest.TestCase):
|
||||||
noise = GPy.kern.white(dimension)
|
noise = GPy.kern.white(dimension)
|
||||||
kern = kern + noise
|
kern = kern + noise
|
||||||
m = model_fit(X, Y, kernel=kern)
|
m = model_fit(X, Y, kernel=kern)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.randomize()
|
m.randomize()
|
||||||
# contrain all parameters to be positive
|
# contrain all parameters to be positive
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
@ -150,7 +149,6 @@ class GradientTests(unittest.TestCase):
|
||||||
K = k.K(X)
|
K = k.K(X)
|
||||||
Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T
|
Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T
|
||||||
m = GPy.models.GPLVM(Y, input_dim, kernel=k)
|
m = GPy.models.GPLVM(Y, input_dim, kernel=k)
|
||||||
m.ensure_default_constraints()
|
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
def test_GPLVM_rbf_linear_white_kern_2D(self):
|
def test_GPLVM_rbf_linear_white_kern_2D(self):
|
||||||
|
|
@ -161,7 +159,6 @@ class GradientTests(unittest.TestCase):
|
||||||
K = k.K(X)
|
K = k.K(X)
|
||||||
Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T
|
Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T
|
||||||
m = GPy.models.GPLVM(Y, input_dim, init='PCA', kernel=k)
|
m = GPy.models.GPLVM(Y, input_dim, init='PCA', kernel=k)
|
||||||
m.ensure_default_constraints()
|
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
def test_GP_EP_probit(self):
|
def test_GP_EP_probit(self):
|
||||||
|
|
@ -195,7 +192,6 @@ class GradientTests(unittest.TestCase):
|
||||||
k = GPy.kern.rbf(1) + GPy.kern.white(1)
|
k = GPy.kern.rbf(1) + GPy.kern.white(1)
|
||||||
Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None]
|
Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None]
|
||||||
m = GPy.models.FITCClassification(X, Y=Y)
|
m = GPy.models.FITCClassification(X, Y=Y)
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.update_likelihood_approximation()
|
m.update_likelihood_approximation()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,29 +2,30 @@ import numpy as np
|
||||||
|
|
||||||
def conf_matrix(p,labels,names=['1','0'],threshold=.5,show=True):
|
def conf_matrix(p,labels,names=['1','0'],threshold=.5,show=True):
|
||||||
"""
|
"""
|
||||||
Returns true and false positives in a binary classification problem
|
Returns error rate and true/false positives in a binary classification problem
|
||||||
- Column names: true class of the examples
|
- Actual classes are displayed by column.
|
||||||
- Row names: classification assigned by the model
|
- Predicted classes are displayed by row.
|
||||||
|
|
||||||
p: probabilities estimated for observation of belonging to class '1'
|
:param p: array of class '1' probabilities.
|
||||||
labels: observations' class
|
:param labels: array of actual classes.
|
||||||
names: classes' names
|
:param names: list of class names, defaults to ['1','0'].
|
||||||
threshold: probability value at which the model allocate an element to each class
|
:param threshold: probability value used to decide the class.
|
||||||
show: whether the matrix should be shown or not
|
:param show: whether the matrix should be shown or not
|
||||||
|
:type show: False|True
|
||||||
"""
|
"""
|
||||||
p = p.flatten()
|
assert p.size == labels.size, "Arrays p and labels have different dimensions."
|
||||||
labels = labels.flatten()
|
decision = np.ones((labels.size,1))
|
||||||
N = p.size
|
decision[p<threshold] = 0
|
||||||
C = np.ones(N)
|
diff = decision - labels
|
||||||
C[p<threshold] = 0
|
false_0 = diff[diff == -1].size
|
||||||
True_1 = float((labels - C)[labels-C==0].shape[0] )
|
false_1 = diff[diff == 1].size
|
||||||
False_1 = float((labels - C)[labels-C==-2].shape[0] )
|
true_1 = np.sum(decision[diff ==0])
|
||||||
True_0 = float((labels - C)[labels-C==-1].shape[0] )
|
true_0 = labels.size - true_1 - false_0 - false_1
|
||||||
False_0 = float((labels - C)[labels-C==1].shape[0] )
|
error = (false_1 + false_0)/np.float(labels.size)
|
||||||
if show:
|
if show:
|
||||||
print (True_1 + True_0 + 0.)/N * 100,'% instances correctly classified'
|
print 100. - error * 100,'% instances correctly classified'
|
||||||
print '%-10s| %-10s| %-10s| ' % ('',names[0],names[1])
|
print '%-10s| %-10s| %-10s| ' % ('',names[0],names[1])
|
||||||
print '----------|------------|------------|'
|
print '----------|------------|------------|'
|
||||||
print '%-10s| %-10s| %-10s| ' % (names[0],True_1,False_0)
|
print '%-10s| %-10s| %-10s| ' % (names[0],true_1,false_0)
|
||||||
print '%-10s| %-10s| %-10s| ' % (names[1],False_1,True_0)
|
print '%-10s| %-10s| %-10s| ' % (names[1],false_1,true_0)
|
||||||
return True_1, False_1, True_0, False_0
|
return error,true_1, false_1, true_0, false_0
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,36 @@ def align_subplots(N,M,xlim=None, ylim=None):
|
||||||
else:
|
else:
|
||||||
removeUpperTicks()
|
removeUpperTicks()
|
||||||
|
|
||||||
|
def align_subplot_array(axes,xlim=None, ylim=None):
|
||||||
|
"""make all of the axes in the array hae the same limits, turn off unnecessary ticks
|
||||||
|
|
||||||
|
use pb.subplots() to get an array of axes
|
||||||
|
"""
|
||||||
|
#find sensible xlim,ylim
|
||||||
|
if xlim is None:
|
||||||
|
xlim = [np.inf,-np.inf]
|
||||||
|
for ax in axes.flatten():
|
||||||
|
xlim[0] = min(xlim[0],ax.get_xlim()[0])
|
||||||
|
xlim[1] = max(xlim[1],ax.get_xlim()[1])
|
||||||
|
if ylim is None:
|
||||||
|
ylim = [np.inf,-np.inf]
|
||||||
|
for ax in axes.flatten():
|
||||||
|
ylim[0] = min(ylim[0],ax.get_ylim()[0])
|
||||||
|
ylim[1] = max(ylim[1],ax.get_ylim()[1])
|
||||||
|
|
||||||
|
N,M = axes.shape
|
||||||
|
for i,ax in enumerate(axes.flatten()):
|
||||||
|
ax.set_xlim(xlim)
|
||||||
|
ax.set_ylim(ylim)
|
||||||
|
if (i)%M:
|
||||||
|
ax.set_yticks([])
|
||||||
|
else:
|
||||||
|
removeRightTicks(ax)
|
||||||
|
if i<(M*(N-1)):
|
||||||
|
ax.set_xticks([])
|
||||||
|
else:
|
||||||
|
removeUpperTicks(ax)
|
||||||
|
|
||||||
def x_frame1D(X,plot_limits=None,resolution=None):
|
def x_frame1D(X,plot_limits=None,resolution=None):
|
||||||
"""
|
"""
|
||||||
Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits
|
Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits
|
||||||
|
|
|
||||||
|
|
@ -203,6 +203,7 @@ class lvm_dimselect(lvm):
|
||||||
self.sense_axes = sense_axes
|
self.sense_axes = sense_axes
|
||||||
self.labels = labels
|
self.labels = labels
|
||||||
lvm.__init__(self,vals,Model,data_visualize,latent_axes,sense_axes,latent_index)
|
lvm.__init__(self,vals,Model,data_visualize,latent_axes,sense_axes,latent_index)
|
||||||
|
self.show_sensitivities()
|
||||||
print "use left and right mouse butons to select dimensions"
|
print "use left and right mouse butons to select dimensions"
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -506,5 +507,5 @@ def data_play(Y, visualizer, frame_rate=30):
|
||||||
|
|
||||||
|
|
||||||
for y in Y:
|
for y in Y:
|
||||||
visualizer.modify(y)
|
visualizer.modify(y[None, :])
|
||||||
time.sleep(1./float(frame_rate))
|
time.sleep(1./float(frame_rate))
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
include *.txt
|
include *.txt
|
||||||
recursive-include docs *.txt
|
recursive-include doc *.txt
|
||||||
include *.md
|
include *.md
|
||||||
recursive-include docs *.md
|
recursive-include doc *.md
|
||||||
|
|
|
||||||
2
setup.py
2
setup.py
|
|
@ -5,7 +5,7 @@ import os
|
||||||
from setuptools import setup
|
from setuptools import setup
|
||||||
|
|
||||||
# Version number
|
# Version number
|
||||||
version = '0.4.5'
|
version = '0.4.6'
|
||||||
|
|
||||||
def read(fname):
|
def read(fname):
|
||||||
return open(os.path.join(os.path.dirname(__file__), fname)).read()
|
return open(os.path.join(os.path.dirname(__file__), fname)).read()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue