Merge branch 'devel' of https://github.com/SheffieldML/GPy into devel

This commit is contained in:
Neil Lawrence 2014-07-24 20:55:23 +01:00
commit 1c9eb270bf
21 changed files with 693 additions and 279 deletions

View file

@ -12,6 +12,10 @@ from .. import likelihoods
from ..likelihoods.gaussian import Gaussian
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation, LatentFunctionInference
from parameterization.variational import VariationalPosterior
from scipy.sparse.base import issparse
import logging
logger = logging.getLogger("GP")
class GP(Model):
"""
@ -34,12 +38,14 @@ class GP(Model):
assert X.ndim == 2
if isinstance(X, (ObsAr, VariationalPosterior)):
self.X = X.copy()
else: self.X = ObsAr(X.copy())
else: self.X = ObsAr(X)
self.num_data, self.input_dim = self.X.shape
assert Y.ndim == 2
self.Y = ObsAr(Y.copy())
logger.info("initializing Y")
if issparse(Y): self.Y = Y
else: self.Y = ObsAr(Y)
assert Y.shape[0] == self.num_data
_, self.output_dim = self.Y.shape
@ -54,6 +60,7 @@ class GP(Model):
self.likelihood = likelihood
#find a sensible inference method
logger.info("initializing inference method")
if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian) or isinstance(likelihood, likelihoods.MixedNoise):
inference_method = exact_gaussian_inference.ExactGaussianInference()
@ -62,6 +69,7 @@ class GP(Model):
print "defaulting to ", inference_method, "for latent function inference"
self.inference_method = inference_method
logger.info("adding kernel and likelihood as parameters")
self.add_parameter(self.kern)
self.add_parameter(self.likelihood)

View file

@ -61,7 +61,7 @@ class Model(Parameterized):
on the current machine.
"""
initial_parameters = self.optimizer_array
initial_parameters = self.optimizer_array.copy()
if parallel:
try:
@ -97,9 +97,9 @@ class Model(Parameterized):
if len(self.optimization_runs):
i = np.argmin([o.f_opt for o in self.optimization_runs])
self._set_params_transformed(self.optimization_runs[i].x_opt)
self.optimizer_array = self.optimization_runs[i].x_opt
else:
self._set_params_transformed(initial_parameters)
self.optimizer_array = initial_parameters
def ensure_default_constraints(self, warning=True):
"""
@ -225,12 +225,16 @@ class Model(Parameterized):
if self.size == 0:
raise RuntimeError, "Model without parameters cannot be optimized"
if optimizer is None:
optimizer = self.preferred_optimizer
if start == None:
start = self.optimizer_array
if optimizer is None:
optimizer = self.preferred_optimizer
if isinstance(optimizer, optimization.Optimizer):
opt = optimizer
opt.model = self
else:
optimizer = optimization.get_optimizer(optimizer)
opt = optimizer(start, model=self, **kwargs)
@ -249,7 +253,7 @@ class Model(Parameterized):
def _checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
"""
Check the gradient of the ,odel by comparing to a numerical
estimate. If the verbose flag is passed, invividual
estimate. If the verbose flag is passed, individual
components are tested (and printed)
:param verbose: If True, print a "full" checking of each parameter

View file

@ -751,8 +751,6 @@ class OptimizationHandlable(Indexable):
Transform the gradients by multiplying the gradient factor for each
constraint to it.
"""
if self.has_parent():
return g
[np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
if self._has_fixes(): return g[self._fixes_]
return g
@ -793,7 +791,7 @@ class OptimizationHandlable(Indexable):
#===========================================================================
# Randomizeable
#===========================================================================
def randomize(self, rand_gen=np.random.normal, loc=0, scale=1, *args, **kwargs):
def randomize(self, rand_gen=np.random.normal, *args, **kwargs):
"""
Randomize the model.
Make this draw from the prior if one exists, else draw from given random generator
@ -804,7 +802,7 @@ class OptimizationHandlable(Indexable):
:param args, kwargs: will be passed through to random number generator
"""
# first take care of all parameters (from N(0,1))
x = rand_gen(loc=loc, scale=scale, size=self._size_transformed(), *args, **kwargs)
x = rand_gen(size=self._size_transformed(), *args, **kwargs)
# now draw from prior where possible
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
self.optimizer_array = x # makes sure all of the tied parameters get the same init (since there's only one prior object...)
@ -835,6 +833,11 @@ class OptimizationHandlable(Indexable):
1.) connect param_array of children to self.param_array
2.) tell all children to propagate further
"""
if self.param_array.size != self.size:
self._param_array_ = np.empty(self.size, dtype=np.float64)
if self.gradient.size != self.size:
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
pi_old_size = 0
for pi in self.parameters:
pislice = slice(pi_old_size, pi_old_size + pi.size)
@ -848,6 +851,9 @@ class OptimizationHandlable(Indexable):
pi._propagate_param_grad(parray[pislice], garray[pislice])
pi_old_size += pi.size
def _connect_parameters(self):
pass
class Parameterizable(OptimizationHandlable):
"""
A parameterisable class.
@ -874,6 +880,9 @@ class Parameterizable(OptimizationHandlable):
"""
Array representing the parameters of this class.
There is only one copy of all parameters in memory, two during optimization.
!WARNING!: setting the parameter array MUST always be done in memory:
m.param_array[:] = m_copy.param_array
"""
if self.__dict__.get('_param_array_', None) is None:
self._param_array_ = np.empty(self.size, dtype=np.float64)
@ -986,6 +995,11 @@ class Parameterizable(OptimizationHandlable):
# notification system
#===========================================================================
def _parameters_changed_notification(self, me, which=None):
"""
In parameterizable we just need to make sure, that the next call to optimizer_array
will update the optimizer_array to the latest parameters
"""
self._optimizer_copy_transformed = False # tells the optimizer array to update on next request
self.parameters_changed()
def _pass_through_notify_observers(self, me, which=None):
self.notify_observers(which=which)
@ -1017,4 +1031,3 @@ class Parameterizable(OptimizationHandlable):
updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer``
"""
pass

View file

@ -8,11 +8,23 @@ from re import compile, _pattern_type
from param import ParamConcatenation
from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
import logging
logger = logging.getLogger("parameters changed meta")
class ParametersChangedMeta(type):
def __call__(self, *args, **kw):
instance = super(ParametersChangedMeta, self).__call__(*args, **kw)
instance.parameters_changed()
return instance
self._in_init_ = True
#import ipdb;ipdb.set_trace()
self = super(ParametersChangedMeta, self).__call__(*args, **kw)
logger.debug("finished init")
self._in_init_ = False
logger.debug("connecting parameters")
self._highest_parent_._connect_parameters()
self._highest_parent_._notify_parent_change()
self._highest_parent_._connect_fixes()
logger.debug("calling parameters changed")
self.parameters_changed()
return self
class Parameterized(Parameterizable):
"""
@ -64,14 +76,12 @@ class Parameterized(Parameterizable):
#===========================================================================
def __init__(self, name=None, parameters=[], *a, **kw):
super(Parameterized, self).__init__(name=name, *a, **kw)
self._in_init_ = True
self.size = sum(p.size for p in self.parameters)
self.add_observer(self, self._parameters_changed_notification, -100)
if not self._has_fixes():
self._fixes_ = None
self._param_slices_ = []
self._connect_parameters()
del self._in_init_
#self._connect_parameters()
self.add_parameters(*parameters)
def build_pydot(self, G=None):
@ -125,6 +135,9 @@ class Parameterized(Parameterizable):
param._parent_.remove_parameter(param)
# make sure the size is set
if index is None:
start = sum(p.size for p in self.parameters)
self.constraints.shift_right(start, param.size)
self.priors.shift_right(start, param.size)
self.constraints.update(param.constraints, self.size)
self.priors.update(param.priors, self.size)
self.parameters.append(param)
@ -143,14 +156,16 @@ class Parameterized(Parameterizable):
parent.size += param.size
parent = parent._parent_
if not self._in_init_:
self._connect_parameters()
self._notify_parent_change()
self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names)
self._highest_parent_._notify_parent_change()
self._highest_parent_._connect_fixes()
else:
raise HierarchyError, """Parameter exists already and no copy made"""
raise HierarchyError, """Parameter exists already, try making a copy"""
def add_parameters(self, *parameters):
@ -198,26 +213,28 @@ class Parameterized(Parameterizable):
# no parameters for this class
return
if self.param_array.size != self.size:
self.param_array = np.empty(self.size, dtype=np.float64)
self._param_array_ = np.empty(self.size, dtype=np.float64)
if self.gradient.size != self.size:
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
old_size = 0
self._param_slices_ = []
for i, p in enumerate(self.parameters):
if not p.param_array.flags['C_CONTIGUOUS']:
raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS"
p._parent_ = self
p._parent_index_ = i
pslice = slice(old_size, old_size + p.size)
# first connect all children
p._propagate_param_grad(self.param_array[pslice], self.gradient_full[pslice])
# then connect children to self
self.param_array[pslice] = p.param_array.flat # , requirements=['C', 'W']).ravel(order='C')
self.gradient_full[pslice] = p.gradient_full.flat # , requirements=['C', 'W']).ravel(order='C')
if not p.param_array.flags['C_CONTIGUOUS']:
raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS"
p.param_array.data = self.param_array[pslice].data
p.gradient_full.data = self.gradient_full[pslice].data

View file

@ -8,6 +8,9 @@ from ..inference.latent_function_inference import var_dtc
from .. import likelihoods
from parameterization.variational import VariationalPosterior
import logging
logger = logging.getLogger("sparse gp")
class SparseGP(GP):
"""
A general purpose Sparse GP model
@ -46,7 +49,7 @@ class SparseGP(GP):
self.num_inducing = Z.shape[0]
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata)
logger.info("Adding Z as parameter")
self.add_parameter(self.Z, index=0)
def has_uncertain_inputs(self):
@ -57,10 +60,14 @@ class SparseGP(GP):
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
if isinstance(self.X, VariationalPosterior):
#gradients wrt kernel
dL_dKmm = self.grad_dict.pop('dL_dKmm')
dL_dKmm = self.grad_dict['dL_dKmm']
self.kern.update_gradients_full(dL_dKmm, self.Z, None)
target = self.kern.gradient.copy()
self.kern.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
self.kern.update_gradients_expectations(variational_posterior=self.X,
Z=self.Z,
dL_dpsi0=self.grad_dict['dL_dpsi0'],
dL_dpsi1=self.grad_dict['dL_dpsi1'],
dL_dpsi2=self.grad_dict['dL_dpsi2'])
self.kern.gradient += target
#gradients wrt Z

View file

@ -296,15 +296,16 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
from GPy.models import BayesianGPLVM
from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 7, 9
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
Y = Ylist[0]
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool)
Y[inan] = _np.nan
inan = _np.random.binomial(1, .8, size=Y.shape).astype(bool) # 80% missing data
Ymissing = Y.copy()
Ymissing[inan] = _np.nan
m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing,
m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
inference_method=VarDTCMissingData(inan=inan), kernel=k)
m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
@ -414,7 +415,7 @@ def olivetti_faces(optimize=True, verbose=True, plot=True):
if optimize: m.optimize('bfgs', messages=verbose, max_iters=1000)
if plot:
ax = m.plot_latent(which_indices=(0, 1))
y = m.likelihood.Y[0, :]
y = m.Y[0, :]
data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False)
lvm = GPy.plotting.matplot_dep.visualize.lvm(m.X.mean[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish')

View file

@ -9,6 +9,8 @@ import numpy as np
from ...util.misc import param_to_array
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi)
import logging, itertools
logger = logging.getLogger('vardtc')
class VarDTC(LatentFunctionInference):
"""
@ -192,11 +194,12 @@ class VarDTC(LatentFunctionInference):
return post, log_marginal, grad_dict
class VarDTCMissingData(LatentFunctionInference):
const_jitter = 1e-6
const_jitter = 1e-10
def __init__(self, limit=1, inan=None):
from ...util.caching import Cacher
self._Y = Cacher(self._subarray_computations, limit)
self._inan = inan
if inan is not None: self._inan = ~inan
else: self._inan = None
pass
def set_limit(self, limit):
@ -217,21 +220,35 @@ class VarDTCMissingData(LatentFunctionInference):
if self._inan is None:
inan = np.isnan(Y)
has_none = inan.any()
self._inan = ~inan
else:
inan = self._inan
has_none = True
if has_none:
from ...util.subarray_and_sorting import common_subarrays
self._subarray_indices = []
for v,ind in common_subarrays(inan, 1).iteritems():
if not np.all(v):
v = ~np.array(v, dtype=bool)
ind = np.array(ind, dtype=int)
if ind.size == Y.shape[1]:
ind = slice(None)
self._subarray_indices.append([v,ind])
Ys = [Y[v, :][:, ind] for v, ind in self._subarray_indices]
traces = [(y**2).sum() for y in Ys]
#print "caching missing data slices, this can take several minutes depending on the number of unique dimensions of the data..."
#csa = common_subarrays(inan, 1)
size = Y.shape[1]
#logger.info('preparing subarrays {:3.3%}'.format((i+1.)/size))
Ys = []
next_ten = [0.]
count = itertools.count()
for v, y in itertools.izip(inan.T, Y.T[:,:,None]):
i = count.next()
if ((i+1.)/size) >= next_ten[0]:
logger.info('preparing subarrays {:>6.1%}'.format((i+1.)/size))
next_ten[0] += .1
Ys.append(y[v,:])
next_ten = [0.]
count = itertools.count()
def trace(y):
i = count.next()
if ((i+1.)/size) >= next_ten[0]:
logger.info('preparing traces {:>6.1%}'.format((i+1.)/size))
next_ten[0] += .1
y = y[inan[:,i],i:i+1]
return np.einsum('ij,ij->', y,y)
traces = [trace(Y) for _ in xrange(size)]
return Ys, traces
else:
self._subarray_indices = [[slice(None),slice(None)]]
@ -253,7 +270,6 @@ class VarDTCMissingData(LatentFunctionInference):
beta_all = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
het_noise = beta_all.size != 1
import itertools
num_inducing = Z.shape[0]
dL_dpsi0_all = np.zeros(Y.shape[0])
@ -273,22 +289,17 @@ class VarDTCMissingData(LatentFunctionInference):
Lm = jitchol(Kmm)
if uncertain_inputs: LmInv = dtrtri(Lm)
VVT_factor_all = np.empty(Y.shape)
full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1]
if not full_VVT_factor:
psi1V = np.dot(Y.T*beta_all, psi1_all).T
for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices):
if het_noise: beta = beta_all[ind]
size = Y.shape[1]
next_ten = 0
for i, [y, v, trYYT] in enumerate(itertools.izip(Ys, self._inan.T, traces)):
if ((i+1.)/size) >= next_ten:
logger.info('inference {:> 6.1%}'.format((i+1.)/size))
next_ten += .1
if het_noise: beta = beta_all[i]
else: beta = beta_all
VVT_factor = (beta*y)
try:
VVT_factor_all[v, ind].flat = VVT_factor.flat
except ValueError:
mult = np.ravel_multi_index((v.nonzero()[0][:,None],ind[None,:]), VVT_factor_all.shape)
VVT_factor_all.flat[mult] = VVT_factor
output_dim = y.shape[1]
VVT_factor = (y*beta)
output_dim = 1#len(ind)
psi0 = psi0_all[v]
psi1 = psi1_all[v, :]
@ -330,7 +341,6 @@ class VarDTCMissingData(LatentFunctionInference):
VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
psi1, het_noise, uncertain_inputs)
#import ipdb;ipdb.set_trace()
dL_dpsi0_all[v] += dL_dpsi0
dL_dpsi1_all[v, :] += dL_dpsi1
if uncertain_inputs:
@ -347,19 +357,20 @@ class VarDTCMissingData(LatentFunctionInference):
psi0, psi1, beta,
data_fit, num_data, output_dim, trYYT, Y)
if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf
else:
print 'foobar'
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
tmp, _ = dpotrs(LB, tmp, lower=1)
woodbury_vector[:, ind] = dtrtrs(Lm, tmp, lower=1, trans=1)[0]
#if full_VVT_factor:
woodbury_vector[:, i:i+1] = Cpsi1Vf
#else:
# print 'foobar'
# tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
# tmp, _ = dpotrs(LB, tmp, lower=1)
# woodbury_vector[:, ind] = dtrtrs(Lm, tmp, lower=1, trans=1)[0]
#import ipdb;ipdb.set_trace()
Bi, _ = dpotri(LB, lower=1)
symmetrify(Bi)
Bi = -dpotri(LB, lower=1)[0]
diag.add(Bi, 1)
woodbury_inv_all[:, :, ind] = backsub_both_sides(Lm, Bi)[:,:,None]
woodbury_inv_all[:, :, i:i+1] = backsub_both_sides(Lm, Bi)[:,:,None]
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
@ -376,23 +387,6 @@ class VarDTCMissingData(LatentFunctionInference):
'dL_dKnm':dL_dpsi1_all,
'dL_dthetaL':dL_dthetaL}
#get sufficient things for posterior prediction
#TODO: do we really want to do this in the loop?
#if not full_VVT_factor:
# print 'foobar'
# psi1V = np.dot(Y.T*beta_all, psi1_all).T
# tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
# tmp, _ = dpotrs(LB_all, tmp, lower=1)
# woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
#import ipdb;ipdb.set_trace()
#Bi, _ = dpotri(LB_all, lower=1)
#symmetrify(Bi)
#Bi = -dpotri(LB_all, lower=1)[0]
#from ...util import diag
#diag.add(Bi, 1)
#woodbury_inv = backsub_both_sides(Lm, Bi)
post = Posterior(woodbury_inv=woodbury_inv_all, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
return post, log_marginal, grad_dict

View file

@ -112,12 +112,12 @@ class VarDTC_minibatch(LatentFunctionInference):
if het_noise:
psi2_full += beta_slice*psi2
else:
psi2_full += psi2
psi2_full += psi2.sum(0)
else:
if het_noise:
psi2_full += beta_slice*np.outer(psi1,psi1)
else:
psi2_full += np.outer(psi1,psi1)
psi2_full += np.einsum('nm,jk->mk',psi1,psi1)
if not het_noise:
psi0_full *= beta
@ -128,7 +128,7 @@ class VarDTC_minibatch(LatentFunctionInference):
#======================================================================
# Compute Common Components
#======================================================================
self.psi1Y = psi1Y_full
Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter)
Lm = jitchol(Kmm)
@ -159,7 +159,10 @@ class VarDTC_minibatch(LatentFunctionInference):
logL_R = -np.log(beta).sum()
else:
logL_R = -num_data*np.log(beta)
logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.-output_dim*(-np.log(np.diag(Lm)).sum()+np.log(np.diag(LL)).sum())
logL = (
-(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.
-output_dim*(-np.log(np.diag(Lm)).sum()+np.log(np.diag(LL)).sum())
)
#======================================================================
# Compute dL_dKmm
@ -256,14 +259,14 @@ class VarDTC_minibatch(LatentFunctionInference):
if het_noise:
if uncertain_inputs:
psiR = np.einsum('mo,nmo->n',dL_dpsi2R,psi2)
psiR = np.einsum('mo,nmo->',dL_dpsi2R,psi2)
else:
psiR = np.einsum('nm,no,mo->n',psi1,psi1,dL_dpsi2R)
psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R)
dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0)-output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum(axis=-1)
else:
if uncertain_inputs:
psiR = np.einsum('mo,mo->',dL_dpsi2R,psi2)
psiR = np.einsum('mo,nmo->',dL_dpsi2R,psi2)
else:
psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R)
@ -305,30 +308,44 @@ def update_gradients(model):
if isinstance(model.X, VariationalPosterior):
X_slice = model.X[n_range[0]:n_range[1]]
dL_dpsi1 = grad_dict['dL_dpsi1']#[None, :]
dL_dpsi2 = grad_dict['dL_dpsi2'][None, :, :]
#gradients w.r.t. kernel
model.kern.update_gradients_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
model.kern.update_gradients_expectations(variational_posterior=X_slice,Z=model.Z,dL_dpsi0=grad_dict['dL_dpsi0'],dL_dpsi1=dL_dpsi1,dL_dpsi2=dL_dpsi2)
kern_grad += model.kern.gradient
#gradients w.r.t. Z
model.Z.gradient += model.kern.gradients_Z_expectations(
dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice)
dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=dL_dpsi1,
dL_dpsi2=dL_dpsi2,
Z=model.Z, variational_posterior=X_slice)
#gradients w.r.t. posterior parameters of X
X_grad = model.kern.gradients_qX_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
model.set_X_gradients(X_slice, X_grad)
X_grad = model.kern.gradients_qX_expectations(
variational_posterior=X_slice,
Z=model.Z,
dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=dL_dpsi1,
dL_dpsi2=dL_dpsi2)
model.X.mean[n_range[0]:n_range[1]].gradient = X_grad[0]
model.X.variance[n_range[0]:n_range[1]].gradient = X_grad[1]
if het_noise:
dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
else:
dL_dthetaL += grad_dict['dL_dthetaL']
# Set the gradients w.r.t. kernel
model.kern.gradient = kern_grad
#import ipdb;ipdb.set_trace()
model.grad_dict = grad_dict
if isinstance(model.X, VariationalPosterior):
# Update Log-likelihood
model._log_marginal_likelihood -= model.variational_prior.KL_divergence(model.X)
# update for the KL divergence
model.variational_prior.update_gradients_KL(model.X)
# Set the gradients w.r.t. kernel
model.kern.gradient = kern_grad
# dL_dthetaL
model.likelihood.update_gradients(dL_dthetaL)

View file

@ -56,13 +56,13 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
if gtol is None:
gtol = 1e-5
sigma0 = 1.0e-8
sigma0 = 1.0e-7
fold = f(x, *optargs) # Initial function value.
function_eval = 1
fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient.
if any(np.isnan(gradnew)):
raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value"
#if any(np.isnan(gradnew)):
# raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value"
current_grad = np.dot(gradnew, gradnew)
gradold = gradnew.copy()
d = -gradnew # Initial search direction.
@ -168,13 +168,13 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
if Delta < 0.25:
beta = min(4.0 * beta, betamax)
if Delta > 0.75:
beta = max(0.5 * beta, betamin)
beta = max(0.25 * beta, betamin)
# Update search direction using Polak-Ribiere formula, or re-start
# in direction of negative gradient after nparams steps.
if nsuccess == x.size:
d = -gradnew
# beta = 1. # TODO: betareset!!
beta = 1. # This is not in the original paper
nsuccess = 0
elif success:
Gamma = np.dot(gradold - gradnew, gradnew) / (mu)

View file

@ -37,19 +37,21 @@ class BayesianGPLVM(SparseGP):
self.init = init
if X_variance is None:
self.logger.info("initializing latent space variance ~ uniform(0,.1)")
X_variance = np.random.uniform(0,.1,X.shape)
if Z is None:
self.logger.info("initializing inducing inputs")
Z = np.random.permutation(X.copy())[:num_inducing]
assert Z.shape[1] == X.shape[1]
if kernel is None:
self.logger.info("initializing kernel RBF")
kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) # + kern.white(input_dim)
if likelihood is None:
likelihood = Gaussian()
self.variational_prior = NormalPrior()
X = NormalPosterior(X, X_variance)
@ -65,6 +67,7 @@ class BayesianGPLVM(SparseGP):
inference_method = VarDTC()
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
self.logger.info("Adding X as parameter")
self.add_parameter(self.X, index=0)
def set_X_gradients(self, X, X_grad):

View file

@ -84,6 +84,7 @@ def plot_latent(model, labels=None, which_indices=None,
cmap=pb.cm.binary, **imshow_kwargs)
# make sure labels are in order of input:
labels = np.asarray(labels)
ulabels = []
for lab in labels:
if not lab in ulabels:

View file

@ -8,7 +8,7 @@ from base_plots import gpplot, x_frame1D, x_frame2D
from ...util.misc import param_to_array
from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
from scipy import sparse
def plot_fit(model, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[],
@ -61,11 +61,14 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
X = model.X.mean
X_variance = param_to_array(model.X.variance)
X_variance = model.X.variance
else:
X = model.X
X, Y = param_to_array(X, model.Y)
if hasattr(model, 'Z'): Z = param_to_array(model.Z)
#X, Y = param_to_array(X, model.Y)
Y = model.Y
if sparse.issparse(Y): Y = Y.todense().view(np.ndarray)
if hasattr(model, 'Z'): Z = model.Z
#work out what the inputs are for plotting (1D or 2D)
fixed_dims = np.array([i for i,v in fixed_inputs])

View file

@ -8,6 +8,7 @@ import GPy
import numpy as np
from GPy.core.parameterization.parameter_core import HierarchyError
from GPy.core.parameterization.observable_array import ObsAr
from GPy.core.parameterization.transformations import NegativeLogexp
class ArrayCoreTest(unittest.TestCase):
def setUp(self):
@ -38,10 +39,25 @@ class ParameterizedTest(unittest.TestCase):
self.test1.kern = self.rbf+self.white
self.test1.add_parameter(self.test1.kern)
self.test1.add_parameter(self.param, 0)
# print self.test1:
#=============================================================================
# test_model. | Value | Constraint | Prior | Tied to
# param | (25L, 2L) | {0.0,1.0} | |
# add.rbf.variance | 1.0 | 0.0,1.0 +ve | |
# add.rbf.lengthscale | 1.0 | 0.0,1.0 +ve | |
# add.white.variance | 1.0 | 0.0,1.0 +ve | |
#=============================================================================
x = np.linspace(-2,6,4)[:,None]
y = np.sin(x)
self.testmodel = GPy.models.GPRegression(x,y)
# print self.testmodel:
#=============================================================================
# GP_regression. | Value | Constraint | Prior | Tied to
# rbf.variance | 1.0 | +ve | |
# rbf.lengthscale | 1.0 | +ve | |
# Gaussian_noise.variance | 1.0 | +ve | |
#=============================================================================
def test_add_parameter(self):
self.assertEquals(self.rbf._parent_index_, 0)
@ -142,7 +158,12 @@ class ParameterizedTest(unittest.TestCase):
self.testmodel.randomize()
self.assertEqual(val, self.testmodel.kern.lengthscale)
def test_add_parameter_in_hierarchy(self):
from GPy.core import Param
self.test1.kern.rbf.add_parameter(Param("NEW", np.random.rand(2), NegativeLogexp()), 1)
self.assertListEqual(self.test1.constraints[NegativeLogexp()].tolist(), range(self.param.size+1, self.param.size+1 + 2))
self.assertListEqual(self.test1.constraints[GPy.transformations.Logistic(0,1)].tolist(), range(self.param.size))
self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp(0,1)].tolist(), np.r_[50, 53:55].tolist())
def test_regular_expression_misc(self):
self.testmodel.kern.lengthscale.fix()

View file

@ -18,7 +18,6 @@ class Cacher(object):
self.operation = operation
self.order = collections.deque()
self.cached_inputs = {} # point from cache_ids to a list of [ind_ids], which where used in cache cache_id
self.logger = logging.getLogger("cache")
#=======================================================================
# point from each ind_id to [ref(obj), cache_ids]
@ -36,23 +35,18 @@ class Cacher(object):
def combine_inputs(self, args, kw):
"Combines the args and kw in a unique way, such that ordering of kwargs does not lead to recompute"
self.logger.debug("combining args and kw")
return args + tuple(c[1] for c in sorted(kw.items(), key=lambda x: x[0]))
def prepare_cache_id(self, combined_args_kw, ignore_args):
"get the cacheid (conc. string of argument self.ids in order) ignoring ignore_args"
cache_id = "".join(self.id(a) for i, a in enumerate(combined_args_kw) if i not in ignore_args)
self.logger.debug("cache_id={} was created".format(cache_id))
return cache_id
def ensure_cache_length(self, cache_id):
"Ensures the cache is within its limits and has one place free"
self.logger.debug("cache length gets ensured")
if len(self.order) == self.limit:
self.logger.debug("cache limit of l={} was reached".format(self.limit))
# we have reached the limit, so lets release one element
cache_id = self.order.popleft()
self.logger.debug("cach_id '{}' gets removed".format(cache_id))
combined_args_kw = self.cached_inputs[cache_id]
for ind in combined_args_kw:
if ind is not None:
@ -66,7 +60,6 @@ class Cacher(object):
else:
cache_ids.remove(cache_id)
self.cached_input_ids[ind_id] = [ref, cache_ids]
self.logger.debug("removing caches")
del self.cached_outputs[cache_id]
del self.inputs_changed[cache_id]
del self.cached_inputs[cache_id]
@ -81,10 +74,8 @@ class Cacher(object):
if a is not None:
ind_id = self.id(a)
v = self.cached_input_ids.get(ind_id, [weakref.ref(a), []])
self.logger.debug("cache_id '{}' gets stored".format(cache_id))
v[1].append(cache_id)
if len(v[1]) == 1:
self.logger.debug("adding observer to object {}".format(repr(a)))
a.add_observer(self, self.on_cache_changed)
self.cached_input_ids[ind_id] = v
@ -108,28 +99,21 @@ class Cacher(object):
cache_id = self.prepare_cache_id(inputs, self.ignore_args)
# 2: if anything is not cachable, we will just return the operation, without caching
if reduce(lambda a, b: a or (not (isinstance(b, Observable) or b is None)), inputs, False):
self.logger.info("some inputs are not observable: returning without caching")
self.logger.debug(str(map(lambda x: isinstance(x, Observable) or x is None, inputs)))
self.logger.debug(str(map(repr, inputs)))
return self.operation(*args, **kw)
# 3&4: check whether this cache_id has been cached, then has it changed?
try:
if(self.inputs_changed[cache_id]):
self.logger.debug("{} already seen, but inputs changed. refreshing cacher".format(cache_id))
# 4: This happens, when elements have changed for this cache self.id
self.inputs_changed[cache_id] = False
self.cached_outputs[cache_id] = self.operation(*args, **kw)
except KeyError:
self.logger.info("{} never seen, creating cache entry".format(cache_id))
# 3: This is when we never saw this chache_id:
self.ensure_cache_length(cache_id)
self.add_to_cache(cache_id, inputs, self.operation(*args, **kw))
except:
self.logger.error("an error occurred while trying to run caching for {}, resetting".format(cache_id))
self.reset()
raise
# 5: We have seen this cache_id and it is cached:
self.logger.info("returning cache {}".format(cache_id))
return self.cached_outputs[cache_id]
def on_cache_changed(self, direct, which=None):
@ -143,7 +127,6 @@ class Cacher(object):
ind_id = self.id(what)
_, cache_ids = self.cached_input_ids.get(ind_id, [None, []])
for cache_id in cache_ids:
self.logger.info("callback from {} changed inputs from {}".format(ind_id, self.inputs_changed[cache_id]))
self.inputs_changed[cache_id] = True
def reset(self):

View file

@ -988,7 +988,8 @@ def olivetti_faces(data_set='olivetti_faces'):
for subject in range(40):
for image in range(10):
image_path = os.path.join(path, 'orl_faces', 's'+str(subject+1), str(image+1) + '.pgm')
Y.append(GPy.util.netpbmfile.imread(image_path).flatten())
from GPy.util import netpbmfile
Y.append(netpbmfile.imread(image_path).flatten())
lbls.append(subject)
Y = np.asarray(Y)
lbls = np.asarray(lbls)[:, None]

View file

@ -18,6 +18,6 @@ def initialize_latent(init, input_dim, Y):
var = Xr.var(0)
Xr -= Xr.mean(0)
Xr /= Xr.var(0)
Xr /= Xr.std(0)
return Xr, var/var.max()

View file

@ -15,6 +15,7 @@ import scipy
import warnings
import os
from config import *
import logging
_scipyversion = np.float64((scipy.__version__).split('.')[:2])
_fix_dpotri_scipy_bug = True
@ -93,14 +94,20 @@ def jitchol(A, maxtries=5):
raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
jitter = diagA.mean() * 1e-6
while maxtries > 0 and np.isfinite(jitter):
print 'Warning: adding jitter of {:.10e}'.format(jitter)
try:
return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
L = linalg.cholesky(A + np.eye(A.shape[0]) * jitter, lower=True)
except:
jitter *= 10
finally:
maxtries -= 1
raise linalg.LinAlgError, "not positive definite, even with jitter."
import traceback
try: raise
except:
logging.warning('\n'.join(['Added jitter of {:.10e}'.format(jitter),
' in '+traceback.format_list(traceback.extract_stack(limit=2)[-2:-1])[0][2:]]))
import ipdb;ipdb.set_trace()
return L
@ -122,10 +129,17 @@ def dtrtrs(A, B, lower=1, trans=0, unitdiag=0):
"""
Wrapper for lapack dtrtrs function
DTRTRS solves a triangular system of the form
A * X = B or A**T * X = B,
where A is a triangular matrix of order N, and B is an N-by-NRHS
matrix. A check is made to verify that A is nonsingular.
:param A: Matrix A(triangular)
:param B: Matrix B
:param lower: is matrix lower (true) or upper (false)
:returns:
:returns: Solution to A * X = B or A**T * X = B
"""
A = np.asfortranarray(A)

331
GPy/util/netpbmfile.py Normal file
View file

@ -0,0 +1,331 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# netpbmfile.py
# Copyright (c) 2011-2013, Christoph Gohlke
# Copyright (c) 2011-2013, The Regents of the University of California
# Produced at the Laboratory for Fluorescence Dynamics.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the copyright holders nor the names of any
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""Read and write image data from respectively to Netpbm files.
This implementation follows the Netpbm format specifications at
http://netpbm.sourceforge.net/doc/. No gamma correction is performed.
The following image formats are supported: PBM (bi-level), PGM (grayscale),
PPM (color), PAM (arbitrary), XV thumbnail (RGB332, read-only).
:Author:
`Christoph Gohlke <http://www.lfd.uci.edu/~gohlke/>`_
:Organization:
Laboratory for Fluorescence Dynamics, University of California, Irvine
:Version: 2013.01.18
Requirements
------------
* `CPython 2.7, 3.2 or 3.3 <http://www.python.org>`_
* `Numpy 1.7 <http://www.numpy.org>`_
* `Matplotlib 1.2 <http://www.matplotlib.org>`_ (optional for plotting)
Examples
--------
>>> im1 = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16)
>>> imsave('_tmp.pgm', im1)
>>> im2 = imread('_tmp.pgm')
>>> assert numpy.all(im1 == im2)
"""
from __future__ import division, print_function
import sys
import re
import math
from copy import deepcopy
import numpy
__version__ = '2013.01.18'
__docformat__ = 'restructuredtext en'
__all__ = ['imread', 'imsave', 'NetpbmFile']
def imread(filename, *args, **kwargs):
"""Return image data from Netpbm file as numpy array.
`args` and `kwargs` are arguments to NetpbmFile.asarray().
Examples
--------
>>> image = imread('_tmp.pgm')
"""
try:
netpbm = NetpbmFile(filename)
image = netpbm.asarray()
finally:
netpbm.close()
return image
def imsave(filename, data, maxval=None, pam=False):
"""Write image data to Netpbm file.
Examples
--------
>>> image = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16)
>>> imsave('_tmp.pgm', image)
"""
try:
netpbm = NetpbmFile(data, maxval=maxval)
netpbm.write(filename, pam=pam)
finally:
netpbm.close()
class NetpbmFile(object):
"""Read and write Netpbm PAM, PBM, PGM, PPM, files."""
_types = {b'P1': b'BLACKANDWHITE', b'P2': b'GRAYSCALE', b'P3': b'RGB',
b'P4': b'BLACKANDWHITE', b'P5': b'GRAYSCALE', b'P6': b'RGB',
b'P7 332': b'RGB', b'P7': b'RGB_ALPHA'}
def __init__(self, arg=None, **kwargs):
"""Initialize instance from filename, open file, or numpy array."""
for attr in ('header', 'magicnum', 'width', 'height', 'maxval',
'depth', 'tupltypes', '_filename', '_fh', '_data'):
setattr(self, attr, None)
if arg is None:
self._fromdata([], **kwargs)
elif isinstance(arg, basestring):
self._fh = open(arg, 'rb')
self._filename = arg
self._fromfile(self._fh, **kwargs)
elif hasattr(arg, 'seek'):
self._fromfile(arg, **kwargs)
self._fh = arg
else:
self._fromdata(arg, **kwargs)
def asarray(self, copy=True, cache=False, **kwargs):
"""Return image data from file as numpy array."""
data = self._data
if data is None:
data = self._read_data(self._fh, **kwargs)
if cache:
self._data = data
else:
return data
return deepcopy(data) if copy else data
def write(self, arg, **kwargs):
"""Write instance to file."""
if hasattr(arg, 'seek'):
self._tofile(arg, **kwargs)
else:
with open(arg, 'wb') as fid:
self._tofile(fid, **kwargs)
def close(self):
"""Close open file. Future asarray calls might fail."""
if self._filename and self._fh:
self._fh.close()
self._fh = None
def __del__(self):
self.close()
def _fromfile(self, fh):
"""Initialize instance from open file."""
fh.seek(0)
data = fh.read(4096)
if (len(data) < 7) or not (b'0' < data[1:2] < b'8'):
raise ValueError("Not a Netpbm file:\n%s" % data[:32])
try:
self._read_pam_header(data)
except Exception:
try:
self._read_pnm_header(data)
except Exception:
raise ValueError("Not a Netpbm file:\n%s" % data[:32])
def _read_pam_header(self, data):
"""Read PAM header and initialize instance."""
regroups = re.search(
b"(^P7[\n\r]+(?:(?:[\n\r]+)|(?:#.*)|"
b"(HEIGHT\s+\d+)|(WIDTH\s+\d+)|(DEPTH\s+\d+)|(MAXVAL\s+\d+)|"
b"(?:TUPLTYPE\s+\w+))*ENDHDR\n)", data).groups()
self.header = regroups[0]
self.magicnum = b'P7'
for group in regroups[1:]:
key, value = group.split()
setattr(self, unicode(key).lower(), int(value))
matches = re.findall(b"(TUPLTYPE\s+\w+)", self.header)
self.tupltypes = [s.split(None, 1)[1] for s in matches]
def _read_pnm_header(self, data):
"""Read PNM header and initialize instance."""
bpm = data[1:2] in b"14"
regroups = re.search(b"".join((
b"(^(P[123456]|P7 332)\s+(?:#.*[\r\n])*",
b"\s*(\d+)\s+(?:#.*[\r\n])*",
b"\s*(\d+)\s+(?:#.*[\r\n])*" * (not bpm),
b"\s*(\d+)\s(?:\s*#.*[\r\n]\s)*)")), data).groups() + (1, ) * bpm
self.header = regroups[0]
self.magicnum = regroups[1]
self.width = int(regroups[2])
self.height = int(regroups[3])
self.maxval = int(regroups[4])
self.depth = 3 if self.magicnum in b"P3P6P7 332" else 1
self.tupltypes = [self._types[self.magicnum]]
def _read_data(self, fh, byteorder='>'):
"""Return image data from open file as numpy array."""
fh.seek(len(self.header))
data = fh.read()
dtype = 'u1' if self.maxval < 256 else byteorder + 'u2'
depth = 1 if self.magicnum == b"P7 332" else self.depth
shape = [-1, self.height, self.width, depth]
size = numpy.prod(shape[1:])
if self.magicnum in b"P1P2P3":
data = numpy.array(data.split(None, size)[:size], dtype)
data = data.reshape(shape)
elif self.maxval == 1:
shape[2] = int(math.ceil(self.width / 8))
data = numpy.frombuffer(data, dtype).reshape(shape)
data = numpy.unpackbits(data, axis=-2)[:, :, :self.width, :]
else:
data = numpy.frombuffer(data, dtype)
data = data[:size * (data.size // size)].reshape(shape)
if data.shape[0] < 2:
data = data.reshape(data.shape[1:])
if data.shape[-1] < 2:
data = data.reshape(data.shape[:-1])
if self.magicnum == b"P7 332":
rgb332 = numpy.array(list(numpy.ndindex(8, 8, 4)), numpy.uint8)
rgb332 *= [36, 36, 85]
data = numpy.take(rgb332, data, axis=0)
return data
def _fromdata(self, data, maxval=None):
"""Initialize instance from numpy array."""
data = numpy.array(data, ndmin=2, copy=True)
if data.dtype.kind not in "uib":
raise ValueError("not an integer type: %s" % data.dtype)
if data.dtype.kind == 'i' and numpy.min(data) < 0:
raise ValueError("data out of range: %i" % numpy.min(data))
if maxval is None:
maxval = numpy.max(data)
maxval = 255 if maxval < 256 else 65535
if maxval < 0 or maxval > 65535:
raise ValueError("data out of range: %i" % maxval)
data = data.astype('u1' if maxval < 256 else '>u2')
self._data = data
if data.ndim > 2 and data.shape[-1] in (3, 4):
self.depth = data.shape[-1]
self.width = data.shape[-2]
self.height = data.shape[-3]
self.magicnum = b'P7' if self.depth == 4 else b'P6'
else:
self.depth = 1
self.width = data.shape[-1]
self.height = data.shape[-2]
self.magicnum = b'P5' if maxval > 1 else b'P4'
self.maxval = maxval
self.tupltypes = [self._types[self.magicnum]]
self.header = self._header()
def _tofile(self, fh, pam=False):
"""Write Netbm file."""
fh.seek(0)
fh.write(self._header(pam))
data = self.asarray(copy=False)
if self.maxval == 1:
data = numpy.packbits(data, axis=-1)
data.tofile(fh)
def _header(self, pam=False):
"""Return file header as byte string."""
if pam or self.magicnum == b'P7':
header = "\n".join((
"P7",
"HEIGHT %i" % self.height,
"WIDTH %i" % self.width,
"DEPTH %i" % self.depth,
"MAXVAL %i" % self.maxval,
"\n".join("TUPLTYPE %s" % unicode(i) for i in self.tupltypes),
"ENDHDR\n"))
elif self.maxval == 1:
header = "P4 %i %i\n" % (self.width, self.height)
elif self.depth == 1:
header = "P5 %i %i %i\n" % (self.width, self.height, self.maxval)
else:
header = "P6 %i %i %i\n" % (self.width, self.height, self.maxval)
if sys.version_info[0] > 2:
header = bytes(header, 'ascii')
return header
def __str__(self):
"""Return information about instance."""
return unicode(self.header)
if sys.version_info[0] > 2:
basestring = str
unicode = lambda x: str(x, 'ascii')
if __name__ == "__main__":
# Show images specified on command line or all images in current directory
from glob import glob
from matplotlib import pyplot
files = sys.argv[1:] if len(sys.argv) > 1 else glob('*.p*m')
for fname in files:
try:
pam = NetpbmFile(fname)
img = pam.asarray(copy=False)
if False:
pam.write('_tmp.pgm.out', pam=True)
img2 = imread('_tmp.pgm.out')
assert numpy.all(img == img2)
imsave('_tmp.pgm.out', img)
img2 = imread('_tmp.pgm.out')
assert numpy.all(img == img2)
pam.close()
except ValueError as e:
print(fname, e)
continue
_shape = img.shape
if img.ndim > 3 or (img.ndim > 2 and img.shape[-1] not in (3, 4)):
img = img[0]
cmap = 'gray' if pam.maxval > 1 else 'binary'
pyplot.imshow(img, cmap, interpolation='nearest')
pyplot.title("%s %s %s %s" % (fname, unicode(pam.magicnum),
_shape, img.dtype))
pyplot.show()

View file

@ -48,14 +48,10 @@ def common_subarrays(X, axis=0):
assert X.ndim == 2 and axis in (0,1), "Only implemented for 2D arrays"
subarrays = defaultdict(list)
cnt = count()
logger = logging.getLogger("common_subarrays")
def accumulate(x, s, c):
logger.debug("creating tuple")
t = tuple(x)
logger.debug("tuple done")
col = c.next()
iadd(s[t], [col])
logger.debug("added col {}".format(col))
return None
if axis == 0: [accumulate(x, subarrays, cnt) for x in X]
else: [accumulate(x, subarrays, cnt) for x in X.T]