Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
James Hensman 2013-05-17 10:28:21 +01:00
commit dbfcebe2a0
19 changed files with 410 additions and 330 deletions

8
.gitignore vendored
View file

@ -39,3 +39,11 @@ nosetests.xml
#bfgs optimiser leaves this lying around #bfgs optimiser leaves this lying around
iterate.dat iterate.dat
# Nosetests #
#############
*.noseids
# git merge files #
###################
*.orig

View file

@ -39,23 +39,29 @@ class logexp(transformation):
return '(+ve)' return '(+ve)'
class logexp_clipped(transformation): class logexp_clipped(transformation):
def __init__(self): max_bound = 1e300
min_bound = 1e-10
log_max_bound = np.log(max_bound)
log_min_bound = np.log(min_bound)
def __init__(self, lower=1e-6):
self.domain = 'positive' self.domain = 'positive'
self.lower = lower
def f(self, x): def f(self, x):
f = np.log(1. + np.exp(x)) exp = np.exp(np.clip(x, self.log_min_bound, self.log_max_bound))
f = np.log(1. + exp)
return f return f
def finv(self, f): def finv(self, f):
return np.log(np.exp(f) - 1.) return np.log(np.exp(np.clip(f, self.min_bound, self.max_bound)) - 1.)
def gradfactor(self, f): def gradfactor(self, f):
ef = np.exp(f) ef = np.exp(f)
gf = (ef - 1.) / ef gf = (ef - 1.) / ef
return np.where(f < 1e-6, 0, gf) return np.where(f < self.lower, 0, gf)
def initialize(self, f): def initialize(self, f):
if np.any(f < 0.): if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints" print "Warning: changing parameters to satisfy constraints"
return np.abs(f) return np.abs(f)
def __str__(self): def __str__(self):
return '(+ve)' return '(+ve_c)'
class exponent(transformation): class exponent(transformation):
def __init__(self): def __init__(self):

View file

@ -79,7 +79,6 @@ def toy_linear_1d_classification(seed=default_seed):
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
Y[Y == -1] = 0
# Kernel object # Kernel object
kernel = GPy.kern.rbf(1) kernel = GPy.kern.rbf(1)
@ -96,7 +95,7 @@ def toy_linear_1d_classification(seed=default_seed):
m.update_likelihood_approximation() m.update_likelihood_approximation()
# Parameters optimization: # Parameters optimization:
m.optimize() m.optimize()
#m.EPEM() #FIXME #m.pseudo_EM() #FIXME
# Plot # Plot
pb.subplot(211) pb.subplot(211)
@ -109,14 +108,13 @@ def toy_linear_1d_classification(seed=default_seed):
def sparse_toy_linear_1d_classification(seed=default_seed): def sparse_toy_linear_1d_classification(seed=default_seed):
""" """
Simple 1D classification example Sparse 1D classification example
:param seed : seed value for data generation (default is 4). :param seed : seed value for data generation (default is 4).
:type seed: int :type seed: int
""" """
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
Y[Y == -1] = 0
# Kernel object # Kernel object
kernel = GPy.kern.rbf(1) + GPy.kern.white(1) kernel = GPy.kern.rbf(1) + GPy.kern.white(1)
@ -168,7 +166,6 @@ def sparse_crescent_data(inducing=10, seed=default_seed):
sample = np.random.randint(0,data['X'].shape[0],inducing) sample = np.random.randint(0,data['X'].shape[0],inducing)
Z = data['X'][sample,:] Z = data['X'][sample,:]
#Z = (np.random.random_sample(2*inducing)*(data['X'].max()-data['X'].min())+data['X'].min()).reshape(inducing,-1)
# create sparse GP EP model # create sparse GP EP model
m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z) m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)

View file

@ -2,13 +2,11 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
import pylab as pb from matplotlib import pyplot as plt
from matplotlib import pyplot as plt, pyplot
import GPy import GPy
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
from GPy.util.datasets import simulation_BGPLVM from GPy.util.datasets import swiss_roll_generated
from GPy.core.transformations import square, logexp_clipped
default_seed = np.random.seed(123344) default_seed = np.random.seed(123344)
@ -47,10 +45,11 @@ def BGPLVM(seed=default_seed):
def GPLVM_oil_100(optimize=True): def GPLVM_oil_100(optimize=True):
data = GPy.util.datasets.oil_100() data = GPy.util.datasets.oil_100()
Y = data['X']
# create simple GP model # create simple GP model
kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6) kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6)
m = GPy.models.GPLVM(data['X'], 6, kernel=kernel) m = GPy.models.GPLVM(Y, 6, kernel=kernel)
m.data_labels = data['Y'].argmax(axis=1) m.data_labels = data['Y'].argmax(axis=1)
# optimize # optimize
@ -63,27 +62,88 @@ def GPLVM_oil_100(optimize=True):
m.plot_latent(labels=m.data_labels) m.plot_latent(labels=m.data_labels)
return m return m
def BGPLVM_oil(optimize=True, N=100, Q=10, M=20, max_f_eval=300, plot=False): def swiss_roll(optimize=True, N=1000, M=15, Q=4, sigma=.2, plot=False):
from GPy.util.datasets import swiss_roll
from GPy.core.transformations import logexp_clipped
data = swiss_roll_generated(N=N, sigma=sigma)
Y = data['Y']
Y -= Y.mean()
Y /= Y.std()
t = data['t']
c = data['colors']
try:
from sklearn.manifold.isomap import Isomap
iso = Isomap().fit(Y)
X = iso.embedding_
if Q > 2:
X = np.hstack((X, np.random.randn(N, Q - 2)))
except ImportError:
X = np.random.randn(N, Q)
if plot:
from mpl_toolkits import mplot3d
import pylab
fig = pylab.figure("Swiss Roll Data")
ax = fig.add_subplot(121, projection='3d')
ax.scatter(*Y.T, c=c)
ax.set_title("Swiss Roll")
ax = fig.add_subplot(122)
ax.scatter(*X.T[:2], c=c)
ax.set_title("Initialization")
var = .5
S = (var * np.ones_like(X) + np.clip(np.random.randn(N, Q) * var ** 2,
- (1 - var),
(1 - var))) + .001
Z = np.random.permutation(X)[:M]
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2))
m = Bayesian_GPLVM(Y, Q, X=X, X_variance=S, M=M, Z=Z, kernel=kernel)
m.data_colors = c
m.data_t = t
m.constrain('variance|length', logexp_clipped())
m['lengthscale'] = 1. # X.var(0).max() / X.var(0)
m['noise'] = Y.var() / 100.
m.ensure_default_constraints()
if optimize:
m.optimize('scg', messages=1)
return m
def BGPLVM_oil(optimize=True, N=100, Q=5, M=25, max_f_eval=4e3, plot=False, **k):
data = GPy.util.datasets.oil() data = GPy.util.datasets.oil()
from GPy.core.transformations import logexp_clipped
np.random.seed(0)
# create simple GP model # create simple GP model
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2)) kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2))
Y = data['X'][:N] Y = data['X'][:N]
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=kernel, M=M) Yn = Y - Y.mean(0)
Yn /= Yn.std(0)
m = GPy.models.Bayesian_GPLVM(Yn, Q, kernel=kernel, M=M, **k)
m.data_labels = data['Y'][:N].argmax(axis=1) m.data_labels = data['Y'][:N].argmax(axis=1)
m.constrain('variance', logexp_clipped()) # m.constrain('variance', logexp_clipped())
m.constrain('length', logexp_clipped()) # m.constrain('length', logexp_clipped())
m['lengt'] = 100. m['lengt'] = m.X.var(0).max() / m.X.var(0)
m['noise'] = Yn.var() / 100.
m.ensure_default_constraints() m.ensure_default_constraints()
# optimize # optimize
if optimize: if optimize:
m.unconstrain('noise'); m.constrain_fixed('noise', Y.var() / 100.) # m.unconstrain('noise'); m.constrain_fixed('noise')
m.optimize('scg', messages=1, max_f_eval=150) # m.optimize('scg', messages=1, max_f_eval=200)
# m.unconstrain('noise')
m.unconstrain('noise') # m.constrain('noise', logexp_clipped())
m.constrain('noise', logexp_clipped())
m.optimize('scg', messages=1, max_f_eval=max_f_eval) m.optimize('scg', messages=1, max_f_eval=max_f_eval)
if plot: if plot:
@ -115,6 +175,8 @@ def oil_100():
# m.plot_latent(labels=data['Y'].argmax(axis=1)) # m.plot_latent(labels=data['Y'].argmax(axis=1))
return m return m
def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False): def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
x = np.linspace(0, 4 * np.pi, N)[:, None] x = np.linspace(0, 4 * np.pi, N)[:, None]
s1 = np.vectorize(lambda x: np.sin(x)) s1 = np.vectorize(lambda x: np.sin(x))
@ -178,6 +240,7 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
return slist, [S1, S2, S3], Ylist return slist, [S1, S2, S3], Ylist
def bgplvm_simulation_matlab_compare(): def bgplvm_simulation_matlab_compare():
from GPy.util.datasets import simulation_BGPLVM
sim_data = simulation_BGPLVM() sim_data = simulation_BGPLVM()
Y = sim_data['Y'] Y = sim_data['Y']
S = sim_data['S'] S = sim_data['S']
@ -213,6 +276,8 @@ def bgplvm_simulation(burnin='scg', plot_sim=False,
max_burnin=100, true_X=False, max_burnin=100, true_X=False,
do_opt=True, do_opt=True,
max_f_eval=1000): max_f_eval=1000):
from GPy.core.transformations import logexp_clipped
D1, D2, D3, N, M, Q = 15, 8, 8, 350, 3, 6 D1, D2, D3, N, M, Q = 15, 8, 8, 350, 3, 6
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim) slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
@ -317,6 +382,8 @@ def mrd_simulation(plot_sim=False):
from GPy.models import mrd from GPy.models import mrd
from GPy import kern from GPy import kern
from GPy.core.transformations import logexp_clipped
reload(mrd); reload(kern) reload(mrd); reload(kern)
# k = kern.rbf(2, ARD=True) + kern.bias(2) + kern.white(2) # k = kern.rbf(2, ARD=True) + kern.bias(2) + kern.white(2)
@ -365,13 +432,23 @@ def mrd_silhouette():
pass pass
def brendan_faces(): def brendan_faces():
from GPy import kern
data = GPy.util.datasets.brendan_faces() data = GPy.util.datasets.brendan_faces()
Y = data['Y'][0:-1:10, :] Q = 2
m = GPy.models.GPLVM(data['Y'], 2) # Y = data['Y'][0:-1:2, :]
Y = data['Y']
Yn = Y - Y.mean()
Yn /= Yn.std()
m = GPy.models.GPLVM(Yn, Q)#, M=Y.shape[0]/4)
# optimize # optimize
# m.constrain_fixed('white', 1e-2)
# m.constrain_bounded('noise', 1e-6, 10)
m.constrain('rbf', GPy.core.transformations.logexp_clipped())
m.ensure_default_constraints() m.ensure_default_constraints()
m.optimize(messages=1, max_f_eval=10000) m.optimize('scg', messages=1, max_f_eval=10000)
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0, :] y = m.likelihood.Y[0, :]

View file

@ -111,7 +111,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
iteration += 1 iteration += 1
if display: if display:
print '\r', print '\r',
print 'i: {0:>5g} f:{1:> 12e} b:{2:> 12e} |g|:{3:> 12e}'.format(iteration, fnow, beta, current_grad), print 'Iter: {0:>0{mi}g} Obj:{1:> 12e} Scale:{2:> 12e} |g|:{3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len(str(maxiters))),
# print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r', # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush() sys.stdout.flush()
@ -130,7 +130,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
# If the gradient is zero then we are done. # If the gradient is zero then we are done.
if current_grad <= gtol: if current_grad <= gtol:
status = 'converged' status = 'converged'
return x, flog, function_eval, status break
# return x, flog, function_eval, status
# Adjust beta according to comparison ratio. # Adjust beta according to comparison ratio.
if Delta < 0.25: if Delta < 0.25:
@ -147,9 +148,10 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
elif success: elif success:
gamma = np.dot(gradold - gradnew, gradnew) / (mu) gamma = np.dot(gradold - gradnew, gradnew) / (mu)
d = gamma * d - gradnew d = gamma * d - gradnew
else:
# If we get here, then we haven't terminated in the given number of # If we get here, then we haven't terminated in the given number of
# iterations. # iterations.
status = "maxiter exceeded" status = "maxiter exceeded"
print ""
return x, flog, function_eval, status return x, flog, function_eval, status

View file

@ -1,146 +0,0 @@
#Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012)
#Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
# HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import numpy as np
import sys
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=1e-6, ftol=1e-6):
"""
Optimisation through Scaled Conjugate Gradients (SCG)
f: the objective function
gradf : the gradient function (should return a 1D np.ndarray)
x : the initial condition
Returns
x the optimal value for x
flog : a list of all the objective values
"""
sigma0 = 1.0e-4
fold = f(x, *optargs) # Initial function value.
function_eval = 1
fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient.
gradold = gradnew.copy()
d = -gradnew # Initial search direction.
success = True # Force calculation of directional derivs.
nsuccess = 0 # nsuccess counts number of successes.
beta = 1.0 # Initial scale parameter.
betamin = 1.0e-15 # Lower bound on scale.
betamax = 1.0e100 # Upper bound on scale.
status = "Not converged"
flog = [fold]
iteration = 0
# Main optimization loop.
while iteration < maxiters:
# Calculate first and second directional derivatives.
if success:
mu = np.dot(d, gradnew)
if mu >= 0:
d = -gradnew
mu = np.dot(d, gradnew)
kappa = np.dot(d, d)
sigma = sigma0/np.sqrt(kappa)
xplus = x + sigma*d
gplus = gradf(xplus, *optargs)
theta = np.dot(d, (gplus - gradnew))/sigma
# Increase effective curvature and evaluate step size alpha.
delta = theta + beta*kappa
if delta <= 0:
delta = beta*kappa
beta = beta - theta/kappa
alpha = - mu/delta
# Calculate the comparison ratio.
xnew = x + alpha*d
fnew = f(xnew, *optargs)
function_eval += 1
if function_eval >= max_f_eval:
status = "Maximum number of function evaluations exceeded"
return x, flog, function_eval, status
Delta = 2.*(fnew - fold)/(alpha*mu)
if Delta >= 0.:
success = True
nsuccess += 1
x = xnew
fnow = fnew
else:
success = False
fnow = fold
# Store relevant variables
flog.append(fnow) # Current function value
iteration += 1
if display:
print '\r',
print 'Iteration: {0:>5g} Objective:{1:> 12e} Scale:{2:> 12e}'.format(iteration, fnow, beta),
# print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush()
if success:
# Test for termination
if (np.max(np.abs(alpha*d)) < xtol) or (np.abs(fnew-fold) < ftol):
status='converged'
return x, flog, function_eval, status
else:
# Update variables for new position
fold = fnew
gradold = gradnew
gradnew = gradf(x, *optargs)
# If the gradient is zero then we are done.
if np.dot(gradnew,gradnew) == 0:
return x, flog, function_eval, status
# Adjust beta according to comparison ratio.
if Delta < 0.25:
beta = min(4.0*beta, betamax)
if Delta > 0.75:
beta = max(0.5*beta, betamin)
# Update search direction using Polak-Ribiere formula, or re-start
# in direction of negative gradient after nparams steps.
if nsuccess == x.size:
d = -gradnew
nsuccess = 0
elif success:
gamma = np.dot(gradold - gradnew,gradnew)/(mu)
d = gamma*d - gradnew
# If we get here, then we haven't terminated in the given number of
# iterations.
status = "maxiter exceeded"
return x, flog, function_eval, status

View file

@ -61,7 +61,7 @@ class kern(parameterised):
ax.bar(np.arange(len(ard_params)) - 0.4, ard_params) ax.bar(np.arange(len(ard_params)) - 0.4, ard_params)
ax.set_xticks(np.arange(len(ard_params))) ax.set_xticks(np.arange(len(ard_params)))
ax.set_xticklabels([r"${}$".format(i + 1) for i in range(len(ard_params))]) ax.set_xticklabels([r"${}$".format(i) for i in range(len(ard_params))])
return ax return ax
def _transform_gradients(self, g): def _transform_gradients(self, g):
@ -176,8 +176,8 @@ class kern(parameterised):
prev_constr_ind = [K1.constrained_indices] + [K1.Nparam + i for i in K2.constrained_indices] prev_constr_ind = [K1.constrained_indices] + [K1.Nparam + i for i in K2.constrained_indices]
prev_constr = K1.constraints + K2.constraints prev_constr = K1.constraints + K2.constraints
prev_constr_fix = K1.fixed_indices + [arr + K1.Nparam for arr in K2.fixed_indices] # prev_constr_fix = K1.fixed_indices + [arr + K1.Nparam for arr in K2.fixed_indices]
prev_constr_fix_values = K1.fixed_values + K2.fixed_values # prev_constr_fix_values = K1.fixed_values + K2.fixed_values
# follow the previous ties # follow the previous ties
for arr in prev_ties: for arr in prev_ties:

View file

@ -1,6 +1,6 @@
import numpy as np import numpy as np
from scipy import stats, linalg from scipy import stats, linalg
from ..util.linalg import pdinv,mdot,jitchol,DSYR from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot
from likelihood import likelihood from likelihood import likelihood
class EP(likelihood): class EP(likelihood):
@ -117,8 +117,6 @@ class EP(likelihood):
self.v_tilde[i] += Delta_v self.v_tilde[i] += Delta_v
#Posterior distribution parameters update #Posterior distribution parameters update
DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i]))) DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i])))
#si=Sigma[:,i:i+1]
#Sigma -= Delta_tau/(1.+ Delta_tau*Sigma[i,i])*np.dot(si,si.T)#DSYR
mu = np.dot(Sigma,self.v_tilde) mu = np.dot(Sigma,self.v_tilde)
self.iterations += 1 self.iterations += 1
#Sigma recomptutation with Cholesky decompositon #Sigma recomptutation with Cholesky decompositon
@ -135,12 +133,12 @@ class EP(likelihood):
return self._compute_GP_variables() return self._compute_GP_variables()
#def fit_DTC(self, Knn_diag, Kmn, Kmm):
def fit_DTC(self, Kmm, Kmn): def fit_DTC(self, Kmm, Kmn):
""" """
The expectation-propagation algorithm with sparse pseudo-input. The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see ... 2013. For nomenclature see ... 2013.
""" """
M = Kmm.shape[0]
#TODO: this doesn't work with uncertain inputs! #TODO: this doesn't work with uncertain inputs!
@ -149,12 +147,20 @@ class EP(likelihood):
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0) q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = Qnn = Knm*Kmmi*Kmn Sigma0 = Qnn = Knm*Kmmi*Kmn
""" """
Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
KmnKnm = np.dot(Kmn,Kmn.T) KmnKnm = np.dot(Kmn,Kmn.T)
Lm = jitchol(Kmm)
Lmi = chol_inv(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
KmmiKmn = np.dot(Kmmi,Kmn) KmmiKmn = np.dot(Kmmi,Kmn)
Qnn_diag = np.sum(Kmn*KmmiKmn,-2) Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
LLT0 = Kmm.copy() LLT0 = Kmm.copy()
#Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
#KmnKnm = np.dot(Kmn, Kmn.T)
#KmmiKmn = np.dot(Kmmi,Kmn)
#Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
#LLT0 = Kmm.copy()
""" """
Posterior approximation: q(f|y) = N(f| mu, Sigma) Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K Sigma = Diag + P*R.T*R*P.T + K
@ -197,19 +203,19 @@ class EP(likelihood):
#Site parameters update #Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau self.tau_tilde[i] += Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v self.v_tilde[i] += Delta_v
#Posterior distribution parameters update #Posterior distribution parameters update
LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
L = jitchol(LLT) L = jitchol(LLT)
#cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau)) #cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau))
V,info = linalg.lapack.flapack.dtrtrs(L,Kmn,lower=1) V,info = linalg.lapack.flapack.dtrtrs(L,Kmn,lower=1)
Sigma_diag = np.sum(V*V,-2) Sigma_diag = np.sum(V*V,-2)
si = np.sum(V.T*V[:,i],-1) si = np.sum(V.T*V[:,i],-1)
mu = mu + (Delta_v-Delta_tau*mu[i])*si mu += (Delta_v-Delta_tau*mu[i])*si
self.iterations += 1 self.iterations += 1
#Sigma recomputation with Cholesky decompositon #Sigma recomputation with Cholesky decompositon
LLT0 = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T) LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
L = jitchol(LLT) L = jitchol(LLT)
V,info = linalg.lapack.flapack.dtrtrs(L,Kmn,lower=1) V,info = linalg.lapack.flapack.dtrtrs(L,Kmn,lower=1)
V2,info = linalg.lapack.flapack.dtrtrs(L.T,V,lower=0) V2,info = linalg.lapack.flapack.dtrtrs(L.T,V,lower=0)
@ -235,7 +241,9 @@ class EP(likelihood):
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0) q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
""" """
Kmmi, self.Lm, self.Lmi, Kmm_logdet = pdinv(Kmm) Lm = jitchol(Kmm)
Lmi = chol_inv(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
P0 = Kmn.T P0 = Kmn.T
KmnKnm = np.dot(P0.T, P0) KmnKnm = np.dot(P0.T, P0)
KmmiKmn = np.dot(Kmmi,P0.T) KmmiKmn = np.dot(Kmmi,P0.T)
@ -290,8 +298,8 @@ class EP(likelihood):
#Site parameters update #Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau self.tau_tilde[i] += Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v self.v_tilde[i] += Delta_v
#Posterior distribution parameters update #Posterior distribution parameters update
dtd1 = Delta_tau*Diag[i] + 1. dtd1 = Delta_tau*Diag[i] + 1.
dii = Diag[i] dii = Diag[i]
@ -301,8 +309,8 @@ class EP(likelihood):
Rp_i = np.dot(R,pi_.T) Rp_i = np.dot(R,pi_.T)
RTR = np.dot(R.T,np.dot(np.eye(M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R)) RTR = np.dot(R.T,np.dot(np.eye(M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
R = jitchol(RTR).T R = jitchol(RTR).T
self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1 self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1
self.gamma = self.gamma + (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T) self.gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
RPT = np.dot(R,P.T) RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
mu = self.w + np.dot(P,self.gamma) mu = self.w + np.dot(P,self.gamma)

View file

@ -53,10 +53,10 @@ class Gaussian(likelihood):
def _set_params(self, x): def _set_params(self, x):
x = float(x) x = float(x)
if self._variance != x: if self._variance != x:
self._variance = x self.precision = 1. / x
self.covariance_matrix = np.eye(self.N) * self._variance self.covariance_matrix = np.eye(self.N) * x
self.precision = 1. / self._variance
self.V = (self.precision) * self.Y self.V = (self.precision) * self.Y
self._variance = x
def predictive_values(self, mu, var, full_cov): def predictive_values(self, mu, var, full_cov):
""" """
@ -69,6 +69,7 @@ class Gaussian(likelihood):
# Note. for D>1, we need to re-normalise all the outputs independently. # Note. for D>1, we need to re-normalise all the outputs independently.
# This will mess up computations of diag(true_var), below. # This will mess up computations of diag(true_var), below.
# note that the upper, lower quantiles should be the same shape as mean # note that the upper, lower quantiles should be the same shape as mean
# Augment the output variance with the likelihood variance and rescale.
true_var = (var + np.eye(var.shape[0]) * self._variance) * self._scale ** 2 true_var = (var + np.eye(var.shape[0]) * self._variance) * self._scale ** 2
_5pc = mean - 2.*np.sqrt(np.diag(true_var)) _5pc = mean - 2.*np.sqrt(np.diag(true_var))
_95pc = mean + 2.*np.sqrt(np.diag(true_var)) _95pc = mean + 2.*np.sqrt(np.diag(true_var))

View file

@ -58,7 +58,7 @@ class probit(likelihood_function):
norm_975 = [stats.norm.ppf(.975,m,v) for m,v in zip(mu,var)] norm_975 = [stats.norm.ppf(.975,m,v) for m,v in zip(mu,var)]
p_025 = stats.norm.cdf(norm_025/np.sqrt(1+var)) p_025 = stats.norm.cdf(norm_025/np.sqrt(1+var))
p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var)) p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var))
return mean, np.nan*var, p_025, p_975 # TODO: var return mean[:,None], np.nan*var, p_025[:,None], p_975[:,None] # TODO: var
class Poisson(likelihood_function): class Poisson(likelihood_function):
""" """

View file

@ -27,7 +27,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
""" """
def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10, def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10,
Z=None, kernel=None, oldpsave=5, _debug=False, Z=None, kernel=None, oldpsave=10, _debug=False,
**kwargs): **kwargs):
if X == None: if X == None:
X = self.initialise_latent(init, Q, Y) X = self.initialise_latent(init, Q, Y)
@ -87,19 +87,19 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
return x return x
def _set_params(self, x, save_old=True, save_count=0): def _set_params(self, x, save_old=True, save_count=0):
try: # try:
N, Q = self.N, self.Q N, Q = self.N, self.Q
self.X = x[:self.X.size].reshape(N, Q).copy() self.X = x[:self.X.size].reshape(N, Q).copy()
self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy() self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy()
sparse_GP._set_params(self, x[(2 * N * Q):]) sparse_GP._set_params(self, x[(2 * N * Q):])
self.oldps = x # self.oldps = x
except (LinAlgError, FloatingPointError, ZeroDivisionError): # except (LinAlgError, FloatingPointError, ZeroDivisionError):
print "\rWARNING: Caught LinAlgError, continueing without setting " # print "\rWARNING: Caught LinAlgError, continueing without setting "
if self._debug: # if self._debug:
self._savederrors.append(self.f_call) # self._savederrors.append(self.f_call)
if save_count > 10: # if save_count > 10:
raise # raise
self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1) # self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1)
def dKL_dmuS(self): def dKL_dmuS(self):
dKL_dS = (1. - (1. / (self.X_variance))) * 0.5 dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
@ -167,8 +167,12 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
# d_dmu = (dL_dmu).flatten() # d_dmu = (dL_dmu).flatten()
# d_dS = (dL_dS).flatten() # d_dS = (dL_dS).flatten()
# ======================== # ========================
dbound_dmuS = np.hstack((d_dmu, d_dS)) self.dbound_dmuS = np.hstack((d_dmu, d_dS))
return np.hstack((dbound_dmuS.flatten(), sparse_GP._log_likelihood_gradients(self))) self.dbound_dZtheta = sparse_GP._log_likelihood_gradients(self)
return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))
def _log_likelihood_normal_gradients(self):
Si, _, _, _ = pdinv(self.X_variance)
def plot_latent(self, which_indices=None, *args, **kwargs): def plot_latent(self, which_indices=None, *args, **kwargs):
@ -263,7 +267,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
param_dict = dict(self._savedparams) param_dict = dict(self._savedparams)
gradient_dict = dict(self._savedgradients) gradient_dict = dict(self._savedgradients)
kmm_dict = dict(self._savedpsiKmm) # kmm_dict = dict(self._savedpsiKmm)
iters = np.array(param_dict.keys()) iters = np.array(param_dict.keys())
ABCD_dict = np.array(self._savedABCD) ABCD_dict = np.array(self._savedABCD)
self.showing = 0 self.showing = 0

View file

@ -3,8 +3,7 @@
import numpy as np import numpy as np
import pylab as pb import pylab as pb
from ..util.linalg import mdot, jitchol, tdot, symmetrify,pdinv from ..util.linalg import mdot, jitchol, chol_inv, tdot, symmetrify,pdinv
#from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
from ..util.plot import gpplot from ..util.plot import gpplot
from .. import kern from .. import kern
from scipy import stats, linalg from scipy import stats, linalg
@ -33,7 +32,6 @@ class FITC(sparse_GP):
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP self._set_params(self._get_params()) # update the GP
#@profile
def _computations(self): def _computations(self):
#factor Kmm #factor Kmm
@ -58,18 +56,15 @@ class FITC(sparse_GP):
# factor B # factor B
self.B = np.eye(self.M) + self.A self.B = np.eye(self.M) + self.A
self.LB = jitchol(self.B) self.LB = jitchol(self.B)
self.LBi,info = linalg.lapack.flapack.dtrtrs(self.LB,np.eye(self.M),lower=1) self.LBi = chol_inv(self.LB)
self.psi1V = np.dot(self.psi1, self.V_star) self.psi1V = np.dot(self.psi1, self.V_star)
# back substutue C into psi1V Lmi_psi1V, info = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
Lmi_psi1V, info1 = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(Lmi_psi1V), lower=1, trans=0) self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(Lmi_psi1V), lower=1, trans=0)
Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1) Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1)
b_psi1_Ki = self.beta_star * Kmmipsi1.T b_psi1_Ki = self.beta_star * Kmmipsi1.T
Ki_pbp_Ki = np.dot(Kmmipsi1,b_psi1_Ki) Ki_pbp_Ki = np.dot(Kmmipsi1,b_psi1_Ki)
Kmmi = np.dot(self.Lmi.T,self.Lmi) Kmmi = np.dot(self.Lmi.T,self.Lmi)
LBiLmi = np.dot(self.LBi,self.Lmi) LBiLmi = np.dot(self.LBi,self.Lmi)
LBL_inv = np.dot(LBiLmi.T,LBiLmi) LBL_inv = np.dot(LBiLmi.T,LBiLmi)
@ -78,13 +73,15 @@ class FITC(sparse_GP):
Ki_pVVp_Ki = np.dot(Kmmipsi1,VV_p_Ki) Ki_pVVp_Ki = np.dot(Kmmipsi1,VV_p_Ki)
psi1beta = self.psi1*self.beta_star.T psi1beta = self.psi1*self.beta_star.T
H = self.Kmm + mdot(self.psi1,psi1beta.T) H = self.Kmm + mdot(self.psi1,psi1beta.T)
Hi, LH, LHi, logdetH = pdinv(H) LH = jitchol(H)
LHi = chol_inv(LH)
Hi = np.dot(LHi.T,LHi)
betapsi1TLmiLBi = np.dot(psi1beta.T,LBiLmi.T) betapsi1TLmiLBi = np.dot(psi1beta.T,LBiLmi.T)
alpha = np.array([np.dot(a.T,a) for a in betapsi1TLmiLBi])[:,None] alpha = np.array([np.dot(a.T,a) for a in betapsi1TLmiLBi])[:,None]
gamma_1 = mdot(VVT,self.psi1.T,Hi) gamma_1 = mdot(VVT,self.psi1.T,Hi)
pHip = mdot(self.psi1.T,Hi,self.psi1) pHip = mdot(self.psi1.T,Hi,self.psi1)
gamma_2 = mdot(self.beta_star*pHip,self.V_star) gamma_2 = mdot(self.beta_star*pHip,self.V_star)
#gamma_3 = self.V_star * mdot(self.V_star.T,pHip*self.beta_star).T
gamma_3 = self.V_star * gamma_2 gamma_3 = self.V_star * gamma_2
self._dL_dpsi0 = -0.5 * self.beta_star#dA_dpsi0: logdet(self.beta_star) self._dL_dpsi0 = -0.5 * self.beta_star#dA_dpsi0: logdet(self.beta_star)
@ -97,31 +94,31 @@ class FITC(sparse_GP):
self._dL_dpsi1 += gamma_1 - mdot(psi1beta.T,Hi,self.psi1,gamma_1) #dD_dpsi1 self._dL_dpsi1 += gamma_1 - mdot(psi1beta.T,Hi,self.psi1,gamma_1) #dD_dpsi1
self._dL_dKmm = -0.5 * np.dot(Kmmipsi1,b_psi1_Ki) #dA_dKmm: logdet(self.beta_star) self._dL_dKmm = -0.5 * np.dot(Kmmipsi1,b_psi1_Ki) #dA_dKmm: logdet(self.beta_star)
self._dL_dKmm += -.5*Kmmi + .5*LBL_inv + mdot(LBL_inv,psi1beta,Kmmipsi1.T) #dC_dKmm self._dL_dKmm += .5*(LBL_inv - Kmmi) + mdot(LBL_inv,psi1beta,Kmmipsi1.T) #dC_dKmm
self._dL_dKmm += -.5 * mdot(Hi,self.psi1,gamma_1) #dD_dKmm self._dL_dKmm += -.5 * mdot(Hi,self.psi1,gamma_1) #dD_dKmm
self._dpsi1_dtheta = 0 self._dpsi1_dtheta = 0
self._dpsi1_dX = 0 self._dpsi1_dX = 0
self._dKmm_dtheta = 0 self._dKmm_dtheta = 0
self._dKmm_dX = 0 self._dKmm_dX = 0
for psi1_n,V_n,X_n,alpha_n,gamma_n,gamma_k in zip(self.psi1.T,self.V_star,self.X,alpha,gamma_2,gamma_3):
psin_K = np.dot(psi1_n[None,:],Kmmi) self._dpsi1_dX_jkj = 0
self._dpsi1_dtheta_jkj = 0
_dpsi1 = -V_n**2 * psin_K #dA_dpsi1: yT*beta_star*y for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.N),self.V_star,alpha,gamma_2,gamma_3):
_dpsi1 += - alpha_n * psin_K #Diag_dC_dpsi1 K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T)
_dpsi1 += - gamma_n**2 * psin_K + 2. * gamma_k * psin_K #Diag_dD_dpsi1
_dKmm = .5*V_n**2 * np.dot(psin_K.T,psin_K) #dA_dKmm: yT*beta_star*y #Diag_dpsi1 = Diag_dA_dpsi1: yT*beta_star*y + Diag_dC_dpsi1 +Diag_dD_dpsi1
_dKmm += .5 * alpha_n * np.dot(psin_K.T,psin_K) #Diag_dC_dKmm _dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:]
_dKmm += .5*gamma_n**2 * np.dot(psin_K.T,psin_K) - gamma_k * np.dot(psin_K.T,psin_K) #Diag_dD_dKmm
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,X_n[None,:],self.Z) #Diag_dKmm = Diag_dA_dKmm: yT*beta_star*y +Diag_dC_dKmm +Diag_dD_dKmm
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z)
self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z) self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z)
self._dKmm_dX += 2.*self.kern.dK_dX(_dKmm ,self.Z) self._dKmm_dX += 2.*self.kern.dK_dX(_dKmm ,self.Z)
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,X_n[None,:]) self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:])
# the partial derivative vector for the likelihood # the partial derivative vector for the likelihood
if self.likelihood.Nparams == 0: if self.likelihood.Nparams == 0:
@ -235,8 +232,6 @@ class FITC(sparse_GP):
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
else: else:
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts) Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
Kxx_ = self.kern.K(Xnew,which_parts=which_parts) # TODO: RA, is this line needed?
var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) # TODO: RA, is this line needed?
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None] var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None]
return mu_star[:,None],var return mu_star[:,None],var
else: else:

View file

@ -28,7 +28,7 @@ class GPLVM(GP):
if X is None: if X is None:
X = self.initialise_latent(init, Q, Y) X = self.initialise_latent(init, Q, Y)
if kernel is None: if kernel is None:
kernel = kern.rbf(Q) + kern.bias(Q) kernel = kern.rbf(Q, ARD=Q>1) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
likelihood = Gaussian(Y, normalize=normalize_Y) likelihood = Gaussian(Y, normalize=normalize_Y)
GP.__init__(self, X, likelihood, kernel, **kwargs) GP.__init__(self, X, likelihood, kernel, **kwargs)

View file

@ -76,7 +76,7 @@ class sparse_GP(GP):
# psi2_beta_scaled = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1) / sf2)).sum(0) # psi2_beta_scaled = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1) / sf2)).sum(0)
psi2_beta_scaled = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1))).sum(0) psi2_beta_scaled = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1))).sum(0)
evals, evecs = linalg.eigh(psi2_beta_scaled) evals, evecs = linalg.eigh(psi2_beta_scaled)
clipped_evals = np.clip(evals, 0., 1e15) # TODO: make clipping configurable clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
if not np.allclose(evals, clipped_evals): if not np.allclose(evals, clipped_evals):
print "Warning: clipping posterior eigenvalues" print "Warning: clipping posterior eigenvalues"
tmp = evecs * np.sqrt(clipped_evals) tmp = evecs * np.sqrt(clipped_evals)

View file

@ -4,6 +4,7 @@ import numpy as np
import GPy import GPy
import scipy.sparse import scipy.sparse
import scipy.io import scipy.io
import cPickle as pickle
data_path = os.path.join(os.path.dirname(__file__), 'datasets') data_path = os.path.join(os.path.dirname(__file__), 'datasets')
default_seed = 10000 default_seed = 10000
@ -96,16 +97,29 @@ def stick():
lbls = 'connect' lbls = 'connect'
return {'Y': Y, 'connect' : connect, 'info': "Stick man data from Ohio."} return {'Y': Y, 'connect' : connect, 'info': "Stick man data from Ohio."}
def swiss_roll_generated(N=1000, sigma=0.0):
with open(os.path.join(data_path, 'swiss_roll.pickle')) as f:
data = pickle.load(f)
Na = data['Y'].shape[0]
perm = np.random.permutation(np.r_[:Na])[:N]
Y = data['Y'][perm, :]
t = data['t'][perm]
c = data['colors'][perm, :]
so = np.argsort(t)
Y = Y[so, :]
t = t[so]
c = c[so, :]
return {'Y':Y, 't':t, 'colors':c}
def swiss_roll_1000(): def swiss_roll_1000():
mat_data = scipy.io.loadmat(os.path.join(data_path, 'swiss_roll_data')) mat_data = scipy.io.loadmat(os.path.join(data_path, 'swiss_roll_data'))
Y = mat_data['X_data'][:, 0:1000].transpose() Y = mat_data['X_data'][:, 0:1000].transpose()
return {'Y': Y, 'info': "Subsample of the swiss roll data extracting only the first 1000 values."} return {'Y': Y, 'info': "Subsample of the swiss roll data extracting only the first 1000 values."}
def swiss_roll(): def swiss_roll(N=3000):
mat_data = scipy.io.loadmat(os.path.join(data_path, 'swiss_roll_data.mat')) mat_data = scipy.io.loadmat(os.path.join(data_path, 'swiss_roll_data.mat'))
Y = mat_data['X_data'][:, 0:3000].transpose() Y = mat_data['X_data'][:, 0:N].transpose()
return {'Y': Y, 'info': "The first 3,000 points from the swiss roll data of Tennenbaum, de Silva and Langford (2001)."} return {'Y': Y, 'X': mat_data['X_data'], 'info': "The first 3,000 points from the swiss roll data of Tennenbaum, de Silva and Langford (2001)."}
def toy_rbf_1d(seed=default_seed): def toy_rbf_1d(seed=default_seed):
np.random.seed(seed=seed) np.random.seed(seed=seed)

File diff suppressed because one or more lines are too long

View file

@ -237,6 +237,16 @@ def tdot(*args, **kwargs):
return tdot_numpy(*args,**kwargs) return tdot_numpy(*args,**kwargs)
def DSYR(A,x,alpha=1.): def DSYR(A,x,alpha=1.):
"""
Performs a symmetric rank-1 update operation:
A <- A + alpha * np.dot(x,x.T)
Arguments
---------
:param A: Symmetric NxN np.array
:param x: Nx1 np.array
:param alpha: scalar
"""
N = c_int(A.shape[0]) N = c_int(A.shape[0])
LDA = c_int(A.shape[0]) LDA = c_int(A.shape[0])
UPLO = c_char('l') UPLO = c_char('l')

View file

@ -44,7 +44,7 @@ class vector_show(data_show):
class lvm(data_show): class lvm(data_show):
def __init__(self, vals, model, data_visualize, latent_axes=None, latent_index=[0,1]): def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0,1]):
"""Visualize a latent variable model """Visualize a latent variable model
:param model: the latent variable model to visualize. :param model: the latent variable model to visualize.
@ -71,7 +71,7 @@ class lvm(data_show):
self.data_visualize = data_visualize self.data_visualize = data_visualize
self.model = model self.model = model
self.latent_axes = latent_axes self.latent_axes = latent_axes
self.sense_axes = sense_axes
self.called = False self.called = False
self.move_on = False self.move_on = False
self.latent_index = latent_index self.latent_index = latent_index
@ -81,10 +81,12 @@ class lvm(data_show):
self.latent_values = vals self.latent_values = vals
self.latent_handle = self.latent_axes.plot([0],[0],'rx',mew=2)[0] self.latent_handle = self.latent_axes.plot([0],[0],'rx',mew=2)[0]
self.modify(vals) self.modify(vals)
self.show_sensitivities()
def modify(self, vals): def modify(self, vals):
"""When latent values are modified update the latent representation and ulso update the output visualization.""" """When latent values are modified update the latent representation and ulso update the output visualization."""
y = self.model.predict(vals)[0] y = self.model.predict(vals)[0]
print y
self.data_visualize.modify(y) self.data_visualize.modify(y)
self.latent_handle.set_data(vals[self.latent_index[0]], vals[self.latent_index[1]]) self.latent_handle.set_data(vals[self.latent_index[0]], vals[self.latent_index[1]])
self.axes.figure.canvas.draw() self.axes.figure.canvas.draw()
@ -99,6 +101,7 @@ class lvm(data_show):
if event.inaxes!=self.latent_axes: return if event.inaxes!=self.latent_axes: return
self.move_on = not self.move_on self.move_on = not self.move_on
self.called = True self.called = True
def on_move(self, event): def on_move(self, event):
if event.inaxes!=self.latent_axes: return if event.inaxes!=self.latent_axes: return
if self.called and self.move_on: if self.called and self.move_on:
@ -107,38 +110,9 @@ class lvm(data_show):
self.latent_values[self.latent_index[1]]=event.ydata self.latent_values[self.latent_index[1]]=event.ydata
self.modify(self.latent_values) self.modify(self.latent_values)
class lvm_subplots(lvm):
"""
latent_axes is a np array of dimension np.ceil(Q/2) + 1,
one for each pair of the axes, and the last one for the sensitiity bar chart
"""
def __init__(self, vals, model, data_visualize, latent_axes=None, latent_index=[0,1]):
lvm.__init__(self, vals, model,data_visualize,latent_axes,[0,1])
self.nplots = int(np.ceil(model.Q/2.))+1
lvm.__init__(self,model,data_visualize,latent_axes,latent_index)
self.latent_values = np.zeros(2*np.ceil(self.model.Q/2.)) # possibly an extra dimension on this
assert latent_axes.size == self.nplots
class lvm_dimselect(lvm):
"""
A visualizer for latent variable models which allows selection of the latent dimensions to use by clicking on a bar chart of their length scales.
"""
def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0, 1]):
if latent_axes==None and sense_axes==None:
self.fig,(latent_axes,self.sense_axes) = plt.subplots(1,2)
elif sense_axes==None:
fig=plt.figure()
self.sense_axes = fig.add_subplot(111)
else:
self.sense_axes = sense_axes
lvm.__init__(self,vals,model,data_visualize,latent_axes,latent_index)
self.show_sensitivities()
print "use left and right mouse butons to select dimensions"
def show_sensitivities(self): def show_sensitivities(self):
# A click in the bar chart axis for selection a dimension. # A click in the bar chart axis for selection a dimension.
if self.sense_axes != None:
self.sense_axes.cla() self.sense_axes.cla()
self.sense_axes.bar(np.arange(self.model.Q),1./self.model.input_sensitivity(),color='b') self.sense_axes.bar(np.arange(self.model.Q),1./self.model.input_sensitivity(),color='b')
@ -152,6 +126,52 @@ class lvm_dimselect(lvm):
self.sense_axes.figure.canvas.draw() self.sense_axes.figure.canvas.draw()
class lvm_subplots(lvm):
"""
latent_axes is a np array of dimension np.ceil(Q/2),
one for each pair of the latent dimensions.
"""
def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None):
self.nplots = int(np.ceil(model.Q/2.))+1
assert len(latent_axes)==self.nplots
if vals==None:
vals = model.X[0, :]
self.latent_values = vals
for i, axis in enumerate(latent_axes):
if i == self.nplots-1:
if self.nplots*2!=model.Q:
latent_index = [i*2, i*2]
lvm.__init__(self, self.latent_vals, model, data_visualize, axis, sense_axes, latent_index=latent_index)
else:
latent_index = [i*2, i*2+1]
lvm.__init__(self, self.latent_vals, model, data_visualize, axis, latent_index=latent_index)
class lvm_dimselect(lvm):
"""
A visualizer for latent variable models which allows selection of the latent dimensions to use by clicking on a bar chart of their length scales.
For an example of the visualizer's use try:
GPy.examples.dimensionality_reduction.BGPVLM_oil()
"""
def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0, 1]):
if latent_axes==None and sense_axes==None:
self.fig,(latent_axes,self.sense_axes) = plt.subplots(1,2)
elif sense_axes==None:
fig=plt.figure()
self.sense_axes = fig.add_subplot(111)
else:
self.sense_axes = sense_axes
lvm.__init__(self,vals,model,data_visualize,latent_axes,sense_axes,latent_index)
print "use left and right mouse butons to select dimensions"
def on_click(self, event): def on_click(self, event):
if event.inaxes==self.sense_axes: if event.inaxes==self.sense_axes:
@ -177,12 +197,6 @@ class lvm_dimselect(lvm):
self.called = True self.called = True
def on_move(self, event):
if event.inaxes!=self.latent_axes: return
if self.called and self.move_on:
self.latent_values[self.latent_index[0]]=event.xdata
self.latent_values[self.latent_index[1]]=event.ydata
self.modify(self.latent_values)
def on_leave(self,event): def on_leave(self,event):
latent_values = self.latent_values.copy() latent_values = self.latent_values.copy()
@ -214,15 +228,30 @@ class image_show(data_show):
def modify(self, vals): def modify(self, vals):
self.set_image(vals) self.set_image(vals)
self.handle.set_array(self.vals) self.handle.set_array(self.vals)
self.axes.figure.canvas.draw() # Teo - original line: plt.show() self.axes.figure.canvas.draw()
def set_image(self, vals): def set_image(self, vals):
dim = self.dimensions[0] * self.dimensions[1] dim = self.dimensions[0] * self.dimensions[1]
nImg = np.sqrt(vals[0,].size/dim)
if nImg > 1 and nImg.is_integer(): # Show a mosaic of images
nImg = np.int(nImg)
self.vals = np.zeros((self.dimensions[0]*nImg, self.dimensions[1]*nImg))
for iR in range(nImg):
for iC in range(nImg):
currImgId = iR*nImg + iC
currImg = np.reshape(vals[0,dim*currImgId+np.array(range(dim))], self.dimensions, order='F')
firstRow = iR*self.dimensions[0]
lastRow = (iR+1)*self.dimensions[0]
firstCol = iC*self.dimensions[1]
lastCol = (iC+1)*self.dimensions[1]
self.vals[firstRow:lastRow, firstCol:lastCol] = currImg
else:
self.vals = np.reshape(vals[0,dim*self.selectImage+np.array(range(dim))], self.dimensions, order='F') self.vals = np.reshape(vals[0,dim*self.selectImage+np.array(range(dim))], self.dimensions, order='F')
if self.transpose: if self.transpose:
self.vals = self.vals.T.copy() self.vals = self.vals.T.copy()
if not self.scale: # if not self.scale:
self.vals = self.vals # self.vals = self.vals
if self.invert: if self.invert:
self.vals = -self.vals self.vals = -self.vals