Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
James Hensman 2013-05-17 10:28:21 +01:00
commit dbfcebe2a0
19 changed files with 410 additions and 330 deletions

8
.gitignore vendored
View file

@ -39,3 +39,11 @@ nosetests.xml
#bfgs optimiser leaves this lying around
iterate.dat
# Nosetests #
#############
*.noseids
# git merge files #
###################
*.orig

View file

@ -39,23 +39,29 @@ class logexp(transformation):
return '(+ve)'
class logexp_clipped(transformation):
def __init__(self):
max_bound = 1e300
min_bound = 1e-10
log_max_bound = np.log(max_bound)
log_min_bound = np.log(min_bound)
def __init__(self, lower=1e-6):
self.domain = 'positive'
self.lower = lower
def f(self, x):
f = np.log(1. + np.exp(x))
exp = np.exp(np.clip(x, self.log_min_bound, self.log_max_bound))
f = np.log(1. + exp)
return f
def finv(self, f):
return np.log(np.exp(f) - 1.)
return np.log(np.exp(np.clip(f, self.min_bound, self.max_bound)) - 1.)
def gradfactor(self, f):
ef = np.exp(f)
gf = (ef - 1.) / ef
return np.where(f < 1e-6, 0, gf)
def initialize(self,f):
if np.any(f<0.):
return np.where(f < self.lower, 0, gf)
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f)
def __str__(self):
return '(+ve)'
return '(+ve_c)'
class exponent(transformation):
def __init__(self):
@ -67,7 +73,7 @@ class exponent(transformation):
def gradfactor(self, f):
return f
def initialize(self, f):
if np.any(f<0.):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f)
def __str__(self):
@ -83,7 +89,7 @@ class negative_exponent(transformation):
def gradfactor(self, f):
return f
def initialize(self, f):
if np.any(f>0.):
if np.any(f > 0.):
print "Warning: changing parameters to satisfy constraints"
return -np.abs(f)
def __str__(self):
@ -114,11 +120,11 @@ class logistic(transformation):
def finv(self, f):
return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf))
def gradfactor(self, f):
return (f-self.lower)*(self.upper-f)/self.difference
def initialize(self,f):
if np.any(np.logical_or(f<self.lower,f>self.upper)):
return (f - self.lower) * (self.upper - f) / self.difference
def initialize(self, f):
if np.any(np.logical_or(f < self.lower, f > self.upper)):
print "Warning: changing parameters to satisfy constraints"
return np.where(np.logical_or(f<self.lower,f>self.upper),self.f(f*0.),f)
return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f)
def __str__(self):
return '({},{})'.format(self.lower, self.upper)

View file

@ -79,7 +79,6 @@ def toy_linear_1d_classification(seed=default_seed):
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y == -1] = 0
# Kernel object
kernel = GPy.kern.rbf(1)
@ -96,7 +95,7 @@ def toy_linear_1d_classification(seed=default_seed):
m.update_likelihood_approximation()
# Parameters optimization:
m.optimize()
#m.EPEM() #FIXME
#m.pseudo_EM() #FIXME
# Plot
pb.subplot(211)
@ -109,14 +108,13 @@ def toy_linear_1d_classification(seed=default_seed):
def sparse_toy_linear_1d_classification(seed=default_seed):
"""
Simple 1D classification example
Sparse 1D classification example
:param seed : seed value for data generation (default is 4).
:type seed: int
"""
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y == -1] = 0
# Kernel object
kernel = GPy.kern.rbf(1) + GPy.kern.white(1)
@ -168,7 +166,6 @@ def sparse_crescent_data(inducing=10, seed=default_seed):
sample = np.random.randint(0,data['X'].shape[0],inducing)
Z = data['X'][sample,:]
#Z = (np.random.random_sample(2*inducing)*(data['X'].max()-data['X'].min())+data['X'].min()).reshape(inducing,-1)
# create sparse GP EP model
m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)

View file

@ -2,13 +2,11 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from matplotlib import pyplot as plt, pyplot
from matplotlib import pyplot as plt
import GPy
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
from GPy.util.datasets import simulation_BGPLVM
from GPy.core.transformations import square, logexp_clipped
from GPy.util.datasets import swiss_roll_generated
default_seed = np.random.seed(123344)
@ -47,10 +45,11 @@ def BGPLVM(seed=default_seed):
def GPLVM_oil_100(optimize=True):
data = GPy.util.datasets.oil_100()
Y = data['X']
# create simple GP model
kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6)
m = GPy.models.GPLVM(data['X'], 6, kernel=kernel)
m = GPy.models.GPLVM(Y, 6, kernel=kernel)
m.data_labels = data['Y'].argmax(axis=1)
# optimize
@ -63,27 +62,88 @@ def GPLVM_oil_100(optimize=True):
m.plot_latent(labels=m.data_labels)
return m
def BGPLVM_oil(optimize=True, N=100, Q=10, M=20, max_f_eval=300, plot=False):
def swiss_roll(optimize=True, N=1000, M=15, Q=4, sigma=.2, plot=False):
from GPy.util.datasets import swiss_roll
from GPy.core.transformations import logexp_clipped
data = swiss_roll_generated(N=N, sigma=sigma)
Y = data['Y']
Y -= Y.mean()
Y /= Y.std()
t = data['t']
c = data['colors']
try:
from sklearn.manifold.isomap import Isomap
iso = Isomap().fit(Y)
X = iso.embedding_
if Q > 2:
X = np.hstack((X, np.random.randn(N, Q - 2)))
except ImportError:
X = np.random.randn(N, Q)
if plot:
from mpl_toolkits import mplot3d
import pylab
fig = pylab.figure("Swiss Roll Data")
ax = fig.add_subplot(121, projection='3d')
ax.scatter(*Y.T, c=c)
ax.set_title("Swiss Roll")
ax = fig.add_subplot(122)
ax.scatter(*X.T[:2], c=c)
ax.set_title("Initialization")
var = .5
S = (var * np.ones_like(X) + np.clip(np.random.randn(N, Q) * var ** 2,
- (1 - var),
(1 - var))) + .001
Z = np.random.permutation(X)[:M]
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2))
m = Bayesian_GPLVM(Y, Q, X=X, X_variance=S, M=M, Z=Z, kernel=kernel)
m.data_colors = c
m.data_t = t
m.constrain('variance|length', logexp_clipped())
m['lengthscale'] = 1. # X.var(0).max() / X.var(0)
m['noise'] = Y.var() / 100.
m.ensure_default_constraints()
if optimize:
m.optimize('scg', messages=1)
return m
def BGPLVM_oil(optimize=True, N=100, Q=5, M=25, max_f_eval=4e3, plot=False, **k):
data = GPy.util.datasets.oil()
from GPy.core.transformations import logexp_clipped
np.random.seed(0)
# create simple GP model
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2))
Y = data['X'][:N]
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=kernel, M=M)
Yn = Y - Y.mean(0)
Yn /= Yn.std(0)
m = GPy.models.Bayesian_GPLVM(Yn, Q, kernel=kernel, M=M, **k)
m.data_labels = data['Y'][:N].argmax(axis=1)
m.constrain('variance', logexp_clipped())
m.constrain('length', logexp_clipped())
m['lengt'] = 100.
# m.constrain('variance', logexp_clipped())
# m.constrain('length', logexp_clipped())
m['lengt'] = m.X.var(0).max() / m.X.var(0)
m['noise'] = Yn.var() / 100.
m.ensure_default_constraints()
# optimize
if optimize:
m.unconstrain('noise'); m.constrain_fixed('noise', Y.var() / 100.)
m.optimize('scg', messages=1, max_f_eval=150)
m.unconstrain('noise')
m.constrain('noise', logexp_clipped())
# m.unconstrain('noise'); m.constrain_fixed('noise')
# m.optimize('scg', messages=1, max_f_eval=200)
# m.unconstrain('noise')
# m.constrain('noise', logexp_clipped())
m.optimize('scg', messages=1, max_f_eval=max_f_eval)
if plot:
@ -115,6 +175,8 @@ def oil_100():
# m.plot_latent(labels=data['Y'].argmax(axis=1))
return m
def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
x = np.linspace(0, 4 * np.pi, N)[:, None]
s1 = np.vectorize(lambda x: np.sin(x))
@ -178,6 +240,7 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
return slist, [S1, S2, S3], Ylist
def bgplvm_simulation_matlab_compare():
from GPy.util.datasets import simulation_BGPLVM
sim_data = simulation_BGPLVM()
Y = sim_data['Y']
S = sim_data['S']
@ -213,6 +276,8 @@ def bgplvm_simulation(burnin='scg', plot_sim=False,
max_burnin=100, true_X=False,
do_opt=True,
max_f_eval=1000):
from GPy.core.transformations import logexp_clipped
D1, D2, D3, N, M, Q = 15, 8, 8, 350, 3, 6
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
@ -317,6 +382,8 @@ def mrd_simulation(plot_sim=False):
from GPy.models import mrd
from GPy import kern
from GPy.core.transformations import logexp_clipped
reload(mrd); reload(kern)
# k = kern.rbf(2, ARD=True) + kern.bias(2) + kern.white(2)
@ -365,13 +432,23 @@ def mrd_silhouette():
pass
def brendan_faces():
from GPy import kern
data = GPy.util.datasets.brendan_faces()
Y = data['Y'][0:-1:10, :]
m = GPy.models.GPLVM(data['Y'], 2)
Q = 2
# Y = data['Y'][0:-1:2, :]
Y = data['Y']
Yn = Y - Y.mean()
Yn /= Yn.std()
m = GPy.models.GPLVM(Yn, Q)#, M=Y.shape[0]/4)
# optimize
# m.constrain_fixed('white', 1e-2)
# m.constrain_bounded('noise', 1e-6, 10)
m.constrain('rbf', GPy.core.transformations.logexp_clipped())
m.ensure_default_constraints()
m.optimize(messages=1, max_f_eval=10000)
m.optimize('scg', messages=1, max_f_eval=10000)
ax = m.plot_latent()
y = m.likelihood.Y[0, :]

View file

@ -111,7 +111,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
iteration += 1
if display:
print '\r',
print 'i: {0:>5g} f:{1:> 12e} b:{2:> 12e} |g|:{3:> 12e}'.format(iteration, fnow, beta, current_grad),
print 'Iter: {0:>0{mi}g} Obj:{1:> 12e} Scale:{2:> 12e} |g|:{3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len(str(maxiters))),
# print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush()
@ -130,7 +130,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
# If the gradient is zero then we are done.
if current_grad <= gtol:
status = 'converged'
return x, flog, function_eval, status
break
# return x, flog, function_eval, status
# Adjust beta according to comparison ratio.
if Delta < 0.25:
@ -147,9 +148,10 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
elif success:
gamma = np.dot(gradold - gradnew, gradnew) / (mu)
d = gamma * d - gradnew
else:
# If we get here, then we haven't terminated in the given number of
# iterations.
status = "maxiter exceeded"
# If we get here, then we haven't terminated in the given number of
# iterations.
status = "maxiter exceeded"
print ""
return x, flog, function_eval, status

View file

@ -1,146 +0,0 @@
#Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012)
#Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
# HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import numpy as np
import sys
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=1e-6, ftol=1e-6):
"""
Optimisation through Scaled Conjugate Gradients (SCG)
f: the objective function
gradf : the gradient function (should return a 1D np.ndarray)
x : the initial condition
Returns
x the optimal value for x
flog : a list of all the objective values
"""
sigma0 = 1.0e-4
fold = f(x, *optargs) # Initial function value.
function_eval = 1
fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient.
gradold = gradnew.copy()
d = -gradnew # Initial search direction.
success = True # Force calculation of directional derivs.
nsuccess = 0 # nsuccess counts number of successes.
beta = 1.0 # Initial scale parameter.
betamin = 1.0e-15 # Lower bound on scale.
betamax = 1.0e100 # Upper bound on scale.
status = "Not converged"
flog = [fold]
iteration = 0
# Main optimization loop.
while iteration < maxiters:
# Calculate first and second directional derivatives.
if success:
mu = np.dot(d, gradnew)
if mu >= 0:
d = -gradnew
mu = np.dot(d, gradnew)
kappa = np.dot(d, d)
sigma = sigma0/np.sqrt(kappa)
xplus = x + sigma*d
gplus = gradf(xplus, *optargs)
theta = np.dot(d, (gplus - gradnew))/sigma
# Increase effective curvature and evaluate step size alpha.
delta = theta + beta*kappa
if delta <= 0:
delta = beta*kappa
beta = beta - theta/kappa
alpha = - mu/delta
# Calculate the comparison ratio.
xnew = x + alpha*d
fnew = f(xnew, *optargs)
function_eval += 1
if function_eval >= max_f_eval:
status = "Maximum number of function evaluations exceeded"
return x, flog, function_eval, status
Delta = 2.*(fnew - fold)/(alpha*mu)
if Delta >= 0.:
success = True
nsuccess += 1
x = xnew
fnow = fnew
else:
success = False
fnow = fold
# Store relevant variables
flog.append(fnow) # Current function value
iteration += 1
if display:
print '\r',
print 'Iteration: {0:>5g} Objective:{1:> 12e} Scale:{2:> 12e}'.format(iteration, fnow, beta),
# print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush()
if success:
# Test for termination
if (np.max(np.abs(alpha*d)) < xtol) or (np.abs(fnew-fold) < ftol):
status='converged'
return x, flog, function_eval, status
else:
# Update variables for new position
fold = fnew
gradold = gradnew
gradnew = gradf(x, *optargs)
# If the gradient is zero then we are done.
if np.dot(gradnew,gradnew) == 0:
return x, flog, function_eval, status
# Adjust beta according to comparison ratio.
if Delta < 0.25:
beta = min(4.0*beta, betamax)
if Delta > 0.75:
beta = max(0.5*beta, betamin)
# Update search direction using Polak-Ribiere formula, or re-start
# in direction of negative gradient after nparams steps.
if nsuccess == x.size:
d = -gradnew
nsuccess = 0
elif success:
gamma = np.dot(gradold - gradnew,gradnew)/(mu)
d = gamma*d - gradnew
# If we get here, then we haven't terminated in the given number of
# iterations.
status = "maxiter exceeded"
return x, flog, function_eval, status

View file

@ -61,12 +61,12 @@ class kern(parameterised):
ax.bar(np.arange(len(ard_params)) - 0.4, ard_params)
ax.set_xticks(np.arange(len(ard_params)))
ax.set_xticklabels([r"${}$".format(i + 1) for i in range(len(ard_params))])
ax.set_xticklabels([r"${}$".format(i) for i in range(len(ard_params))])
return ax
def _transform_gradients(self, g):
x = self._get_params()
[np.put(x,i,x*t.gradfactor(x[i])) for i,t in zip(self.constrained_indices, self.constraints)]
[np.put(x, i, x * t.gradfactor(x[i])) for i, t in zip(self.constrained_indices, self.constraints)]
[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
if len(self.tied_indices) or len(self.fixed_indices):
to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices]))
@ -88,7 +88,7 @@ class kern(parameterised):
"""
return self.add(other)
def add(self, other,tensor=False):
def add(self, other, tensor=False):
"""
Add another kernel to this one. Both kernels are defined on the same _space_
:param other: the other kernel to be added
@ -103,7 +103,7 @@ class kern(parameterised):
newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
# transfer constraints:
newkern.constrained_indices = self.constrained_indices + [x+self.Nparam for x in other.constrained_indices]
newkern.constrained_indices = self.constrained_indices + [x + self.Nparam for x in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values
@ -113,7 +113,7 @@ class kern(parameterised):
assert self.D == other.D
newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices)
# transfer constraints:
newkern.constrained_indices = self.constrained_indices + [i+self.Nparam for i in other.constrained_indices]
newkern.constrained_indices = self.constrained_indices + [i + self.Nparam for i in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values
@ -126,7 +126,7 @@ class kern(parameterised):
"""
return self.prod(other)
def prod(self, other,tensor=False):
def prod(self, other, tensor=False):
"""
multiply two kernels (either on the same space, or on the tensor product of the input space)
:param other: the other kernel to be added
@ -136,12 +136,12 @@ class kern(parameterised):
K2 = other.copy()
slices = []
for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
s1, s2 = [False]*K1.D, [False]*K2.D
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
s1, s2 = [False] * K1.D, [False] * K2.D
s1[sl1], s2[sl2] = [True], [True]
slices += [s1+s2]
slices += [s1 + s2]
newkernparts = [prod(k1, k2,tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)]
newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)]
if tensor:
newkern = kern(K1.D + K2.D, newkernparts, slices)
@ -176,8 +176,8 @@ class kern(parameterised):
prev_constr_ind = [K1.constrained_indices] + [K1.Nparam + i for i in K2.constrained_indices]
prev_constr = K1.constraints + K2.constraints
prev_constr_fix = K1.fixed_indices + [arr + K1.Nparam for arr in K2.fixed_indices]
prev_constr_fix_values = K1.fixed_values + K2.fixed_values
# prev_constr_fix = K1.fixed_indices + [arr + K1.Nparam for arr in K2.fixed_indices]
# prev_constr_fix_values = K1.fixed_values + K2.fixed_values
# follow the previous ties
for arr in prev_ties:
@ -189,8 +189,8 @@ class kern(parameterised):
index = np.where(index_param == i)[0]
if index.size > 1:
self.tie_params(index)
for i,t in zip(prev_constr_ind,prev_constr):
self.constrain(np.where(index_param == i)[0],t)
for i, t in zip(prev_constr_ind, prev_constr):
self.constrain(np.where(index_param == i)[0], t)
def _get_params(self):
return np.hstack([p._get_params() for p in self.parts])
@ -208,15 +208,15 @@ class kern(parameterised):
return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self.parts)], [])
def K(self, X, X2=None, which_parts='all'):
if which_parts=='all':
which_parts = [True]*self.Nparts
if which_parts == 'all':
which_parts = [True] * self.Nparts
assert X.shape[1] == self.D
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0]))
[p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
else:
target = np.zeros((X.shape[0], X2.shape[0]))
[p.K(X[:, i_s], X2[:,i_s], target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
[p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
return target
def dK_dtheta(self, dL_dK, X, X2=None):
@ -248,8 +248,8 @@ class kern(parameterised):
return target
def Kdiag(self, X, which_parts='all'):
if which_parts=='all':
which_parts = [True]*self.Nparts
if which_parts == 'all':
which_parts = [True] * self.Nparts
assert X.shape[1] == self.D
target = np.zeros(X.shape[0])
[p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self.parts, self.input_slices, which_parts) if part_on]
@ -270,22 +270,22 @@ class kern(parameterised):
def psi0(self, Z, mu, S):
target = np.zeros(mu.shape[0])
[p.psi0(Z[:,i_s], mu[:,i_s], S[:,i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
[p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
return target
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S):
target = np.zeros(self.Nparam)
[p.dpsi0_dtheta(dL_dpsi0, Z[:,i_s], mu[:,i_s], S[:,i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
[p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
return self._transform_gradients(target)
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S):
target_mu, target_S = np.zeros_like(mu), np.zeros_like(S)
[p.dpsi0_dmuS(dL_dpsi0, Z[:,i_s], mu[:,i_s], S[:,i_s], target_mu[:,i_s], target_S[:,i_s]) for p, i_s in zip(self.parts, self.input_slices)]
[p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
return target_mu, target_S
def psi1(self, Z, mu, S):
target = np.zeros((mu.shape[0], Z.shape[0]))
[p.psi1(Z[:,i_s], mu[:,i_s], S[:,i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
[p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
return target
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S):
@ -314,7 +314,7 @@ class kern(parameterised):
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
# compute the "cross" terms
#TODO: input_slices needed
# TODO: input_slices needed
for p1, p2 in itertools.combinations(self.parts, 2):
# white doesn;t combine with anything
if p1.name == 'white' or p2.name == 'white':
@ -335,9 +335,9 @@ class kern(parameterised):
target += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
# rbf X linear
elif p1.name == 'linear' and p2.name == 'rbf':
raise NotImplementedError # TODO
raise NotImplementedError # TODO
elif p2.name == 'linear' and p1.name == 'rbf':
raise NotImplementedError # TODO
raise NotImplementedError # TODO
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return target
@ -365,7 +365,7 @@ class kern(parameterised):
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1._psi1 * 2., Z, mu, S, target[ps2])
# linear X bias
elif p1.name == 'bias' and p2.name == 'linear':
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2]) # [ps1])
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2]) # [ps1])
psi1 = np.zeros((mu.shape[0], Z.shape[0]))
p2.psi1(Z, mu, S, psi1)
p1.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps1])
@ -376,9 +376,9 @@ class kern(parameterised):
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps2])
# rbf X linear
elif p1.name == 'linear' and p2.name == 'rbf':
raise NotImplementedError # TODO
raise NotImplementedError # TODO
elif p2.name == 'linear' and p1.name == 'rbf':
raise NotImplementedError # TODO
raise NotImplementedError # TODO
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
@ -389,7 +389,7 @@ class kern(parameterised):
[p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
# compute the "cross" terms
#TODO: we need input_slices here.
# TODO: we need input_slices here.
for p1, p2 in itertools.combinations(self.parts, 2):
# white doesn;t combine with anything
if p1.name == 'white' or p2.name == 'white':
@ -406,9 +406,9 @@ class kern(parameterised):
p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target)
# rbf X linear
elif p1.name == 'linear' and p2.name == 'rbf':
raise NotImplementedError # TODO
raise NotImplementedError # TODO
elif p2.name == 'linear' and p1.name == 'rbf':
raise NotImplementedError # TODO
raise NotImplementedError # TODO
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
@ -419,7 +419,7 @@ class kern(parameterised):
[p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
# compute the "cross" terms
#TODO: we need input_slices here.
# TODO: we need input_slices here.
for p1, p2 in itertools.combinations(self.parts, 2):
# white doesn;t combine with anything
if p1.name == 'white' or p2.name == 'white':
@ -436,9 +436,9 @@ class kern(parameterised):
p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S)
# rbf X linear
elif p1.name == 'linear' and p2.name == 'rbf':
raise NotImplementedError # TODO
raise NotImplementedError # TODO
elif p2.name == 'linear' and p1.name == 'rbf':
raise NotImplementedError # TODO
raise NotImplementedError # TODO
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"

View file

@ -1,6 +1,6 @@
import numpy as np
from scipy import stats, linalg
from ..util.linalg import pdinv,mdot,jitchol,DSYR
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot
from likelihood import likelihood
class EP(likelihood):
@ -117,8 +117,6 @@ class EP(likelihood):
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i])))
#si=Sigma[:,i:i+1]
#Sigma -= Delta_tau/(1.+ Delta_tau*Sigma[i,i])*np.dot(si,si.T)#DSYR
mu = np.dot(Sigma,self.v_tilde)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
@ -135,12 +133,12 @@ class EP(likelihood):
return self._compute_GP_variables()
#def fit_DTC(self, Knn_diag, Kmn, Kmm):
def fit_DTC(self, Kmm, Kmn):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see ... 2013.
"""
M = Kmm.shape[0]
#TODO: this doesn't work with uncertain inputs!
@ -149,12 +147,20 @@ class EP(likelihood):
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = Qnn = Knm*Kmmi*Kmn
"""
Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
KmnKnm = np.dot(Kmn, Kmn.T)
KmnKnm = np.dot(Kmn,Kmn.T)
Lm = jitchol(Kmm)
Lmi = chol_inv(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
KmmiKmn = np.dot(Kmmi,Kmn)
Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
LLT0 = Kmm.copy()
#Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
#KmnKnm = np.dot(Kmn, Kmn.T)
#KmmiKmn = np.dot(Kmmi,Kmn)
#Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
#LLT0 = Kmm.copy()
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K
@ -197,19 +203,19 @@ class EP(likelihood):
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
L = jitchol(LLT)
#cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau))
V,info = linalg.lapack.flapack.dtrtrs(L,Kmn,lower=1)
Sigma_diag = np.sum(V*V,-2)
si = np.sum(V.T*V[:,i],-1)
mu = mu + (Delta_v-Delta_tau*mu[i])*si
mu += (Delta_v-Delta_tau*mu[i])*si
self.iterations += 1
#Sigma recomputation with Cholesky decompositon
LLT0 = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
L = jitchol(LLT)
V,info = linalg.lapack.flapack.dtrtrs(L,Kmn,lower=1)
V2,info = linalg.lapack.flapack.dtrtrs(L.T,V,lower=0)
@ -235,7 +241,9 @@ class EP(likelihood):
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
"""
Kmmi, self.Lm, self.Lmi, Kmm_logdet = pdinv(Kmm)
Lm = jitchol(Kmm)
Lmi = chol_inv(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
P0 = Kmn.T
KmnKnm = np.dot(P0.T, P0)
KmmiKmn = np.dot(Kmmi,P0.T)
@ -290,8 +298,8 @@ class EP(likelihood):
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
dtd1 = Delta_tau*Diag[i] + 1.
dii = Diag[i]
@ -301,8 +309,8 @@ class EP(likelihood):
Rp_i = np.dot(R,pi_.T)
RTR = np.dot(R.T,np.dot(np.eye(M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
R = jitchol(RTR).T
self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1
self.gamma = self.gamma + (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1
self.gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
mu = self.w + np.dot(P,self.gamma)

View file

@ -14,7 +14,7 @@ class Gaussian(likelihood):
def __init__(self, data, variance=1., normalize=False):
self.is_heteroscedastic = False
self.Nparams = 1
self.Z = 0. # a correction factor which accounts for the approximation made
self.Z = 0. # a correction factor which accounts for the approximation made
N, self.D = data.shape
# normalization
@ -53,10 +53,10 @@ class Gaussian(likelihood):
def _set_params(self, x):
x = float(x)
if self._variance != x:
self._variance = x
self.covariance_matrix = np.eye(self.N) * self._variance
self.precision = 1. / self._variance
self.precision = 1. / x
self.covariance_matrix = np.eye(self.N) * x
self.V = (self.precision) * self.Y
self._variance = x
def predictive_values(self, mu, var, full_cov):
"""
@ -69,6 +69,7 @@ class Gaussian(likelihood):
# Note. for D>1, we need to re-normalise all the outputs independently.
# This will mess up computations of diag(true_var), below.
# note that the upper, lower quantiles should be the same shape as mean
# Augment the output variance with the likelihood variance and rescale.
true_var = (var + np.eye(var.shape[0]) * self._variance) * self._scale ** 2
_5pc = mean - 2.*np.sqrt(np.diag(true_var))
_95pc = mean + 2.*np.sqrt(np.diag(true_var))

View file

@ -58,7 +58,7 @@ class probit(likelihood_function):
norm_975 = [stats.norm.ppf(.975,m,v) for m,v in zip(mu,var)]
p_025 = stats.norm.cdf(norm_025/np.sqrt(1+var))
p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var))
return mean, np.nan*var, p_025, p_975 # TODO: var
return mean[:,None], np.nan*var, p_025[:,None], p_975[:,None] # TODO: var
class Poisson(likelihood_function):
"""

View file

@ -27,7 +27,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
"""
def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10,
Z=None, kernel=None, oldpsave=5, _debug=False,
Z=None, kernel=None, oldpsave=10, _debug=False,
**kwargs):
if X == None:
X = self.initialise_latent(init, Q, Y)
@ -87,19 +87,19 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
return x
def _set_params(self, x, save_old=True, save_count=0):
try:
# try:
N, Q = self.N, self.Q
self.X = x[:self.X.size].reshape(N, Q).copy()
self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy()
sparse_GP._set_params(self, x[(2 * N * Q):])
self.oldps = x
except (LinAlgError, FloatingPointError, ZeroDivisionError):
print "\rWARNING: Caught LinAlgError, continueing without setting "
if self._debug:
self._savederrors.append(self.f_call)
if save_count > 10:
raise
self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1)
# self.oldps = x
# except (LinAlgError, FloatingPointError, ZeroDivisionError):
# print "\rWARNING: Caught LinAlgError, continueing without setting "
# if self._debug:
# self._savederrors.append(self.f_call)
# if save_count > 10:
# raise
# self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1)
def dKL_dmuS(self):
dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
@ -167,8 +167,12 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
# d_dmu = (dL_dmu).flatten()
# d_dS = (dL_dS).flatten()
# ========================
dbound_dmuS = np.hstack((d_dmu, d_dS))
return np.hstack((dbound_dmuS.flatten(), sparse_GP._log_likelihood_gradients(self)))
self.dbound_dmuS = np.hstack((d_dmu, d_dS))
self.dbound_dZtheta = sparse_GP._log_likelihood_gradients(self)
return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))
def _log_likelihood_normal_gradients(self):
Si, _, _, _ = pdinv(self.X_variance)
def plot_latent(self, which_indices=None, *args, **kwargs):
@ -263,7 +267,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
param_dict = dict(self._savedparams)
gradient_dict = dict(self._savedgradients)
kmm_dict = dict(self._savedpsiKmm)
# kmm_dict = dict(self._savedpsiKmm)
iters = np.array(param_dict.keys())
ABCD_dict = np.array(self._savedABCD)
self.showing = 0

View file

@ -3,8 +3,7 @@
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, tdot, symmetrify,pdinv
#from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
from ..util.linalg import mdot, jitchol, chol_inv, tdot, symmetrify,pdinv
from ..util.plot import gpplot
from .. import kern
from scipy import stats, linalg
@ -33,7 +32,6 @@ class FITC(sparse_GP):
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP
#@profile
def _computations(self):
#factor Kmm
@ -58,18 +56,15 @@ class FITC(sparse_GP):
# factor B
self.B = np.eye(self.M) + self.A
self.LB = jitchol(self.B)
self.LBi,info = linalg.lapack.flapack.dtrtrs(self.LB,np.eye(self.M),lower=1)
self.LBi = chol_inv(self.LB)
self.psi1V = np.dot(self.psi1, self.V_star)
# back substutue C into psi1V
Lmi_psi1V, info1 = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
Lmi_psi1V, info = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(Lmi_psi1V), lower=1, trans=0)
Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1)
b_psi1_Ki = self.beta_star * Kmmipsi1.T
Ki_pbp_Ki = np.dot(Kmmipsi1,b_psi1_Ki)
Kmmi = np.dot(self.Lmi.T,self.Lmi)
LBiLmi = np.dot(self.LBi,self.Lmi)
LBL_inv = np.dot(LBiLmi.T,LBiLmi)
@ -78,13 +73,15 @@ class FITC(sparse_GP):
Ki_pVVp_Ki = np.dot(Kmmipsi1,VV_p_Ki)
psi1beta = self.psi1*self.beta_star.T
H = self.Kmm + mdot(self.psi1,psi1beta.T)
Hi, LH, LHi, logdetH = pdinv(H)
LH = jitchol(H)
LHi = chol_inv(LH)
Hi = np.dot(LHi.T,LHi)
betapsi1TLmiLBi = np.dot(psi1beta.T,LBiLmi.T)
alpha = np.array([np.dot(a.T,a) for a in betapsi1TLmiLBi])[:,None]
gamma_1 = mdot(VVT,self.psi1.T,Hi)
pHip = mdot(self.psi1.T,Hi,self.psi1)
gamma_2 = mdot(self.beta_star*pHip,self.V_star)
#gamma_3 = self.V_star * mdot(self.V_star.T,pHip*self.beta_star).T
gamma_3 = self.V_star * gamma_2
self._dL_dpsi0 = -0.5 * self.beta_star#dA_dpsi0: logdet(self.beta_star)
@ -97,31 +94,31 @@ class FITC(sparse_GP):
self._dL_dpsi1 += gamma_1 - mdot(psi1beta.T,Hi,self.psi1,gamma_1) #dD_dpsi1
self._dL_dKmm = -0.5 * np.dot(Kmmipsi1,b_psi1_Ki) #dA_dKmm: logdet(self.beta_star)
self._dL_dKmm += -.5*Kmmi + .5*LBL_inv + mdot(LBL_inv,psi1beta,Kmmipsi1.T) #dC_dKmm
self._dL_dKmm += .5*(LBL_inv - Kmmi) + mdot(LBL_inv,psi1beta,Kmmipsi1.T) #dC_dKmm
self._dL_dKmm += -.5 * mdot(Hi,self.psi1,gamma_1) #dD_dKmm
self._dpsi1_dtheta = 0
self._dpsi1_dX = 0
self._dKmm_dtheta = 0
self._dKmm_dX = 0
for psi1_n,V_n,X_n,alpha_n,gamma_n,gamma_k in zip(self.psi1.T,self.V_star,self.X,alpha,gamma_2,gamma_3):
psin_K = np.dot(psi1_n[None,:],Kmmi)
self._dpsi1_dX_jkj = 0
self._dpsi1_dtheta_jkj = 0
_dpsi1 = -V_n**2 * psin_K #dA_dpsi1: yT*beta_star*y
_dpsi1 += - alpha_n * psin_K #Diag_dC_dpsi1
_dpsi1 += - gamma_n**2 * psin_K + 2. * gamma_k * psin_K #Diag_dD_dpsi1
for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.N),self.V_star,alpha,gamma_2,gamma_3):
K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T)
_dKmm = .5*V_n**2 * np.dot(psin_K.T,psin_K) #dA_dKmm: yT*beta_star*y
_dKmm += .5 * alpha_n * np.dot(psin_K.T,psin_K) #Diag_dC_dKmm
_dKmm += .5*gamma_n**2 * np.dot(psin_K.T,psin_K) - gamma_k * np.dot(psin_K.T,psin_K) #Diag_dD_dKmm
#Diag_dpsi1 = Diag_dA_dpsi1: yT*beta_star*y + Diag_dC_dpsi1 +Diag_dD_dpsi1
_dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:]
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,X_n[None,:],self.Z)
#Diag_dKmm = Diag_dA_dKmm: yT*beta_star*y +Diag_dC_dKmm +Diag_dD_dKmm
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z)
self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z)
self._dKmm_dX += 2.*self.kern.dK_dX(_dKmm ,self.Z)
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,X_n[None,:])
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:])
# the partial derivative vector for the likelihood
if self.likelihood.Nparams == 0:
@ -235,8 +232,6 @@ class FITC(sparse_GP):
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
else:
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
Kxx_ = self.kern.K(Xnew,which_parts=which_parts) # TODO: RA, is this line needed?
var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) # TODO: RA, is this line needed?
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None]
return mu_star[:,None],var
else:

View file

@ -28,7 +28,7 @@ class GPLVM(GP):
if X is None:
X = self.initialise_latent(init, Q, Y)
if kernel is None:
kernel = kern.rbf(Q) + kern.bias(Q)
kernel = kern.rbf(Q, ARD=Q>1) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
likelihood = Gaussian(Y, normalize=normalize_Y)
GP.__init__(self, X, likelihood, kernel, **kwargs)

View file

@ -93,7 +93,7 @@ class MRD(model):
self.NQ = self.N * self.Q
self.MQ = self.M * self.Q
model.__init__(self) # @UndefinedVariable
model.__init__(self) # @UndefinedVariable
@property
def X(self):
@ -255,7 +255,7 @@ class MRD(model):
X[:, qs] = PCA(Y, len(qs))[0]
elif init in "PCA_concat":
X = PCA(numpy.hstack(Ylist), self.Q)[0]
else: # init == 'random':
else: # init == 'random':
X = numpy.random.randn(Ylist[0].shape[0], self.Q)
self.X = X
return X

View file

@ -76,7 +76,7 @@ class sparse_GP(GP):
# psi2_beta_scaled = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1) / sf2)).sum(0)
psi2_beta_scaled = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1))).sum(0)
evals, evecs = linalg.eigh(psi2_beta_scaled)
clipped_evals = np.clip(evals, 0., 1e15) # TODO: make clipping configurable
clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
if not np.allclose(evals, clipped_evals):
print "Warning: clipping posterior eigenvalues"
tmp = evecs * np.sqrt(clipped_evals)

View file

@ -4,6 +4,7 @@ import numpy as np
import GPy
import scipy.sparse
import scipy.io
import cPickle as pickle
data_path = os.path.join(os.path.dirname(__file__), 'datasets')
default_seed = 10000
@ -96,16 +97,29 @@ def stick():
lbls = 'connect'
return {'Y': Y, 'connect' : connect, 'info': "Stick man data from Ohio."}
def swiss_roll_generated(N=1000, sigma=0.0):
with open(os.path.join(data_path, 'swiss_roll.pickle')) as f:
data = pickle.load(f)
Na = data['Y'].shape[0]
perm = np.random.permutation(np.r_[:Na])[:N]
Y = data['Y'][perm, :]
t = data['t'][perm]
c = data['colors'][perm, :]
so = np.argsort(t)
Y = Y[so, :]
t = t[so]
c = c[so, :]
return {'Y':Y, 't':t, 'colors':c}
def swiss_roll_1000():
mat_data = scipy.io.loadmat(os.path.join(data_path, 'swiss_roll_data'))
Y = mat_data['X_data'][:, 0:1000].transpose()
return {'Y': Y, 'info': "Subsample of the swiss roll data extracting only the first 1000 values."}
def swiss_roll():
def swiss_roll(N=3000):
mat_data = scipy.io.loadmat(os.path.join(data_path, 'swiss_roll_data.mat'))
Y = mat_data['X_data'][:, 0:3000].transpose()
return {'Y': Y, 'info': "The first 3,000 points from the swiss roll data of Tennenbaum, de Silva and Langford (2001)."}
Y = mat_data['X_data'][:, 0:N].transpose()
return {'Y': Y, 'X': mat_data['X_data'], 'info': "The first 3,000 points from the swiss roll data of Tennenbaum, de Silva and Langford (2001)."}
def toy_rbf_1d(seed=default_seed):
np.random.seed(seed=seed)
@ -270,13 +284,13 @@ def cmu_mocap(subject, train_motions, test_motions=[], sample_every=4):
end_ind = 0
for i in range(len(temp_Y)):
start_ind = end_ind
start_ind = end_ind
end_ind += temp_Y[i].shape[0]
Y[start_ind:end_ind, :] = temp_Y[i]
lbls[start_ind:end_ind, :] = temp_lbls[i]
if len(test_motions)>0:
if len(test_motions) > 0:
temp_Ytest = []
temp_lblstest = []
temp_lblstest = []
testexlbls = np.eye(len(test_motions))
tot_test_length = 0
@ -292,7 +306,7 @@ def cmu_mocap(subject, train_motions, test_motions=[], sample_every=4):
end_ind = 0
for i in range(len(temp_Ytest)):
start_ind = end_ind
start_ind = end_ind
end_ind += temp_Ytest[i].shape[0]
Ytest[start_ind:end_ind, :] = temp_Ytest[i]
lblstest[start_ind:end_ind, :] = temp_lblstest[i]
@ -304,7 +318,7 @@ def cmu_mocap(subject, train_motions, test_motions=[], sample_every=4):
for motion in train_motions:
info += motion + ', '
info = info[:-2]
if len(test_motions)>0:
if len(test_motions) > 0:
info += '. Test motions: '
for motion in test_motions:
info += motion + ', '

File diff suppressed because one or more lines are too long

View file

@ -237,6 +237,16 @@ def tdot(*args, **kwargs):
return tdot_numpy(*args,**kwargs)
def DSYR(A,x,alpha=1.):
"""
Performs a symmetric rank-1 update operation:
A <- A + alpha * np.dot(x,x.T)
Arguments
---------
:param A: Symmetric NxN np.array
:param x: Nx1 np.array
:param alpha: scalar
"""
N = c_int(A.shape[0])
LDA = c_int(A.shape[0])
UPLO = c_char('l')

View file

@ -44,7 +44,7 @@ class vector_show(data_show):
class lvm(data_show):
def __init__(self, vals, model, data_visualize, latent_axes=None, latent_index=[0,1]):
def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0,1]):
"""Visualize a latent variable model
:param model: the latent variable model to visualize.
@ -71,7 +71,7 @@ class lvm(data_show):
self.data_visualize = data_visualize
self.model = model
self.latent_axes = latent_axes
self.sense_axes = sense_axes
self.called = False
self.move_on = False
self.latent_index = latent_index
@ -81,10 +81,12 @@ class lvm(data_show):
self.latent_values = vals
self.latent_handle = self.latent_axes.plot([0],[0],'rx',mew=2)[0]
self.modify(vals)
self.show_sensitivities()
def modify(self, vals):
"""When latent values are modified update the latent representation and ulso update the output visualization."""
y = self.model.predict(vals)[0]
print y
self.data_visualize.modify(y)
self.latent_handle.set_data(vals[self.latent_index[0]], vals[self.latent_index[1]])
self.axes.figure.canvas.draw()
@ -99,6 +101,7 @@ class lvm(data_show):
if event.inaxes!=self.latent_axes: return
self.move_on = not self.move_on
self.called = True
def on_move(self, event):
if event.inaxes!=self.latent_axes: return
if self.called and self.move_on:
@ -107,22 +110,54 @@ class lvm(data_show):
self.latent_values[self.latent_index[1]]=event.ydata
self.modify(self.latent_values)
def show_sensitivities(self):
# A click in the bar chart axis for selection a dimension.
if self.sense_axes != None:
self.sense_axes.cla()
self.sense_axes.bar(np.arange(self.model.Q),1./self.model.input_sensitivity(),color='b')
if self.latent_index[1] == self.latent_index[0]:
self.sense_axes.bar(np.array(self.latent_index[0]),1./self.model.input_sensitivity()[self.latent_index[0]],color='y')
self.sense_axes.bar(np.array(self.latent_index[1]),1./self.model.input_sensitivity()[self.latent_index[1]],color='y')
else:
self.sense_axes.bar(np.array(self.latent_index[0]),1./self.model.input_sensitivity()[self.latent_index[0]],color='g')
self.sense_axes.bar(np.array(self.latent_index[1]),1./self.model.input_sensitivity()[self.latent_index[1]],color='r')
self.sense_axes.figure.canvas.draw()
class lvm_subplots(lvm):
"""
latent_axes is a np array of dimension np.ceil(Q/2) + 1,
one for each pair of the axes, and the last one for the sensitiity bar chart
latent_axes is a np array of dimension np.ceil(Q/2),
one for each pair of the latent dimensions.
"""
def __init__(self, vals, model, data_visualize, latent_axes=None, latent_index=[0,1]):
lvm.__init__(self, vals, model,data_visualize,latent_axes,[0,1])
def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None):
self.nplots = int(np.ceil(model.Q/2.))+1
lvm.__init__(self,model,data_visualize,latent_axes,latent_index)
self.latent_values = np.zeros(2*np.ceil(self.model.Q/2.)) # possibly an extra dimension on this
assert latent_axes.size == self.nplots
assert len(latent_axes)==self.nplots
if vals==None:
vals = model.X[0, :]
self.latent_values = vals
for i, axis in enumerate(latent_axes):
if i == self.nplots-1:
if self.nplots*2!=model.Q:
latent_index = [i*2, i*2]
lvm.__init__(self, self.latent_vals, model, data_visualize, axis, sense_axes, latent_index=latent_index)
else:
latent_index = [i*2, i*2+1]
lvm.__init__(self, self.latent_vals, model, data_visualize, axis, latent_index=latent_index)
class lvm_dimselect(lvm):
"""
A visualizer for latent variable models which allows selection of the latent dimensions to use by clicking on a bar chart of their length scales.
For an example of the visualizer's use try:
GPy.examples.dimensionality_reduction.BGPVLM_oil()
"""
def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0, 1]):
if latent_axes==None and sense_axes==None:
@ -133,24 +168,9 @@ class lvm_dimselect(lvm):
else:
self.sense_axes = sense_axes
lvm.__init__(self,vals,model,data_visualize,latent_axes,latent_index)
self.show_sensitivities()
lvm.__init__(self,vals,model,data_visualize,latent_axes,sense_axes,latent_index)
print "use left and right mouse butons to select dimensions"
def show_sensitivities(self):
# A click in the bar chart axis for selection a dimension.
self.sense_axes.cla()
self.sense_axes.bar(np.arange(self.model.Q),1./self.model.input_sensitivity(),color='b')
if self.latent_index[1] == self.latent_index[0]:
self.sense_axes.bar(np.array(self.latent_index[0]),1./self.model.input_sensitivity()[self.latent_index[0]],color='y')
self.sense_axes.bar(np.array(self.latent_index[1]),1./self.model.input_sensitivity()[self.latent_index[1]],color='y')
else:
self.sense_axes.bar(np.array(self.latent_index[0]),1./self.model.input_sensitivity()[self.latent_index[0]],color='g')
self.sense_axes.bar(np.array(self.latent_index[1]),1./self.model.input_sensitivity()[self.latent_index[1]],color='r')
self.sense_axes.figure.canvas.draw()
def on_click(self, event):
@ -177,12 +197,6 @@ class lvm_dimselect(lvm):
self.called = True
def on_move(self, event):
if event.inaxes!=self.latent_axes: return
if self.called and self.move_on:
self.latent_values[self.latent_index[0]]=event.xdata
self.latent_values[self.latent_index[1]]=event.ydata
self.modify(self.latent_values)
def on_leave(self,event):
latent_values = self.latent_values.copy()
@ -193,7 +207,7 @@ class lvm_dimselect(lvm):
class image_show(data_show):
"""Show a data vector as an image."""
def __init__(self, vals, axes=None, dimensions=(16,16), transpose=False, invert=False, scale=False, palette=[], presetMean = 0., presetSTD = -1., selectImage = 0):
def __init__(self, vals, axes=None, dimensions=(16,16), transpose=False, invert=False, scale=False, palette=[], presetMean = 0., presetSTD = -1., selectImage=0):
data_show.__init__(self, vals, axes)
self.dimensions = dimensions
self.transpose = transpose
@ -214,15 +228,30 @@ class image_show(data_show):
def modify(self, vals):
self.set_image(vals)
self.handle.set_array(self.vals)
self.axes.figure.canvas.draw() # Teo - original line: plt.show()
self.axes.figure.canvas.draw()
def set_image(self, vals):
dim = self.dimensions[0] * self.dimensions[1]
self.vals = np.reshape(vals[0,dim*self.selectImage+np.array(range(dim))], self.dimensions, order='F')
nImg = np.sqrt(vals[0,].size/dim)
if nImg > 1 and nImg.is_integer(): # Show a mosaic of images
nImg = np.int(nImg)
self.vals = np.zeros((self.dimensions[0]*nImg, self.dimensions[1]*nImg))
for iR in range(nImg):
for iC in range(nImg):
currImgId = iR*nImg + iC
currImg = np.reshape(vals[0,dim*currImgId+np.array(range(dim))], self.dimensions, order='F')
firstRow = iR*self.dimensions[0]
lastRow = (iR+1)*self.dimensions[0]
firstCol = iC*self.dimensions[1]
lastCol = (iC+1)*self.dimensions[1]
self.vals[firstRow:lastRow, firstCol:lastCol] = currImg
else:
self.vals = np.reshape(vals[0,dim*self.selectImage+np.array(range(dim))], self.dimensions, order='F')
if self.transpose:
self.vals = self.vals.T.copy()
if not self.scale:
self.vals = self.vals
# if not self.scale:
# self.vals = self.vals
if self.invert:
self.vals = -self.vals
@ -318,7 +347,7 @@ class stick_show(mocap_data_show):
def process_values(self, vals):
self.vals = vals.reshape((3, vals.shape[1]/3)).T.copy()
class skeleton_show(mocap_data_show):
"""data_show class for visualizing motion capture data encoded as a skeleton with angles."""
def __init__(self, vals, skel, padding=0, axes=None):