diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 2c26ee7d..e0f5755c 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -14,6 +14,9 @@ from ..inference.latent_function_inference import exact_gaussian_inference, expe from parameterization.variational import VariationalPosterior from scipy.sparse.base import issparse +import logging +logger = logging.getLogger("GP") + class GP(Model): """ General purpose Gaussian process model @@ -40,6 +43,7 @@ class GP(Model): self.num_data, self.input_dim = self.X.shape assert Y.ndim == 2 + logger.info("initializing Y") if issparse(Y): self.Y = Y else: self.Y = ObsAr(Y) assert Y.shape[0] == self.num_data @@ -56,6 +60,7 @@ class GP(Model): self.likelihood = likelihood #find a sensible inference method + logger.info("initializing inference method") if inference_method is None: if isinstance(likelihood, likelihoods.Gaussian) or isinstance(likelihood, likelihoods.MixedNoise): inference_method = exact_gaussian_inference.ExactGaussianInference() @@ -64,6 +69,7 @@ class GP(Model): print "defaulting to ", inference_method, "for latent function inference" self.inference_method = inference_method + logger.info("adding kernel and likelihood as parameters") self.add_parameter(self.kern) self.add_parameter(self.likelihood) diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index d80ac2ce..b56cf77c 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -9,6 +9,8 @@ import numpy as np from ...util.misc import param_to_array from . import LatentFunctionInference log_2_pi = np.log(2*np.pi) +import logging +logger = logging.getLogger('vardtc') class VarDTC(LatentFunctionInference): """ @@ -225,14 +227,18 @@ class VarDTCMissingData(LatentFunctionInference): from ...util.subarray_and_sorting import common_subarrays self._subarray_indices = [] csa = common_subarrays(inan, 1) - for v,ind in csa.iteritems(): + size = len(csa) + for i, (v,ind) in enumerate(csa.iteritems()): if not np.all(v): + logger.info('preparing subarrays {:.3%}'.format((i+1.)/size)) v = ~np.array(v, dtype=bool) ind = np.array(ind, dtype=int) if ind.size == Y.shape[1]: ind = slice(None) self._subarray_indices.append([v,ind]) + logger.info('preparing subarrays Y') Ys = [Y[v, :][:, ind] for v, ind in self._subarray_indices] + logger.info('preparing traces Y') traces = [(y**2).sum() for y in Ys] return Ys, traces else: @@ -280,7 +286,10 @@ class VarDTCMissingData(LatentFunctionInference): #if not full_VVT_factor: # psi1V = np.dot(Y.T*beta_all, psi1_all).T + #logger.info('computing dimension-wise likelihood and derivatives') + #size = len(Ys) for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices): + #logger.info('{:.3%} dimensions:{}'.format((i+1.)/size, ind)) if het_noise: beta = beta_all[ind] else: beta = beta_all diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 5a885bb0..29478ed7 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -37,13 +37,16 @@ class BayesianGPLVM(SparseGP): self.init = init if X_variance is None: + self.logger.info("initializing latent space variance ~ N(0,.1)") X_variance = np.random.uniform(0,.1,X.shape) if Z is None: + self.logger.info("initializing inducing inputs") Z = np.random.permutation(X.copy())[:num_inducing] assert Z.shape[1] == X.shape[1] if kernel is None: + self.logger.info("initializing kernel RBF") kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) # + kern.white(input_dim) if likelihood is None: diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index 8f3e55b0..7926410e 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -8,7 +8,7 @@ from base_plots import gpplot, x_frame1D, x_frame2D from ...util.misc import param_to_array from ...models.gp_coregionalized_regression import GPCoregionalizedRegression from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression - +from scipy import sparse def plot_fit(model, plot_limits=None, which_data_rows='all', which_data_ycols='all', fixed_inputs=[], @@ -61,11 +61,14 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs(): X = model.X.mean - X_variance = param_to_array(model.X.variance) + X_variance = model.X.variance else: X = model.X - X, Y = param_to_array(X, model.Y) - if hasattr(model, 'Z'): Z = param_to_array(model.Z) + #X, Y = param_to_array(X, model.Y) + Y = model.Y + if sparse.issparse(Y): Y = Y.todense().view(np.ndarray) + + if hasattr(model, 'Z'): Z = model.Z #work out what the inputs are for plotting (1D or 2D) fixed_dims = np.array([i for i,v in fixed_inputs]) diff --git a/GPy/util/caching.py b/GPy/util/caching.py index d54b3a0b..dd0dc043 100644 --- a/GPy/util/caching.py +++ b/GPy/util/caching.py @@ -18,13 +18,12 @@ class Cacher(object): self.operation = operation self.order = collections.deque() self.cached_inputs = {} # point from cache_ids to a list of [ind_ids], which where used in cache cache_id - self.logger = logging.getLogger("cache") #======================================================================= # point from each ind_id to [ref(obj), cache_ids] # 0: a weak reference to the object itself # 1: the cache_ids in which this ind_id is used (len will be how many times we have seen this ind_id) - self.cached_input_ids = {} + self.cached_input_ids = {} #======================================================================= self.cached_outputs = {} # point from cache_ids to outputs @@ -36,23 +35,18 @@ class Cacher(object): def combine_inputs(self, args, kw): "Combines the args and kw in a unique way, such that ordering of kwargs does not lead to recompute" - self.logger.debug("combining args and kw") return args + tuple(c[1] for c in sorted(kw.items(), key=lambda x: x[0])) def prepare_cache_id(self, combined_args_kw, ignore_args): "get the cacheid (conc. string of argument self.ids in order) ignoring ignore_args" cache_id = "".join(self.id(a) for i, a in enumerate(combined_args_kw) if i not in ignore_args) - self.logger.debug("cache_id={} was created".format(cache_id)) return cache_id def ensure_cache_length(self, cache_id): "Ensures the cache is within its limits and has one place free" - self.logger.debug("cache length gets ensured") if len(self.order) == self.limit: - self.logger.debug("cache limit of l={} was reached".format(self.limit)) # we have reached the limit, so lets release one element cache_id = self.order.popleft() - self.logger.debug("cach_id '{}' gets removed".format(cache_id)) combined_args_kw = self.cached_inputs[cache_id] for ind in combined_args_kw: if ind is not None: @@ -66,7 +60,6 @@ class Cacher(object): else: cache_ids.remove(cache_id) self.cached_input_ids[ind_id] = [ref, cache_ids] - self.logger.debug("removing caches") del self.cached_outputs[cache_id] del self.inputs_changed[cache_id] del self.cached_inputs[cache_id] @@ -81,10 +74,8 @@ class Cacher(object): if a is not None: ind_id = self.id(a) v = self.cached_input_ids.get(ind_id, [weakref.ref(a), []]) - self.logger.debug("cache_id '{}' gets stored".format(cache_id)) v[1].append(cache_id) if len(v[1]) == 1: - self.logger.debug("adding observer to object {}".format(repr(a))) a.add_observer(self, self.on_cache_changed) self.cached_input_ids[ind_id] = v @@ -108,28 +99,23 @@ class Cacher(object): cache_id = self.prepare_cache_id(inputs, self.ignore_args) # 2: if anything is not cachable, we will just return the operation, without caching if reduce(lambda a, b: a or (not (isinstance(b, Observable) or b is None)), inputs, False): - self.logger.info("some inputs are not observable: returning without caching") self.logger.debug(str(map(lambda x: isinstance(x, Observable) or x is None, inputs))) self.logger.debug(str(map(repr, inputs))) return self.operation(*args, **kw) # 3&4: check whether this cache_id has been cached, then has it changed? try: if(self.inputs_changed[cache_id]): - self.logger.debug("{} already seen, but inputs changed. refreshing cacher".format(cache_id)) # 4: This happens, when elements have changed for this cache self.id self.inputs_changed[cache_id] = False self.cached_outputs[cache_id] = self.operation(*args, **kw) except KeyError: - self.logger.info("{} never seen, creating cache entry".format(cache_id)) # 3: This is when we never saw this chache_id: self.ensure_cache_length(cache_id) self.add_to_cache(cache_id, inputs, self.operation(*args, **kw)) except: - self.logger.error("an error occurred while trying to run caching for {}, resetting".format(cache_id)) self.reset() raise # 5: We have seen this cache_id and it is cached: - self.logger.info("returning cache {}".format(cache_id)) return self.cached_outputs[cache_id] def on_cache_changed(self, direct, which=None): @@ -143,7 +129,6 @@ class Cacher(object): ind_id = self.id(what) _, cache_ids = self.cached_input_ids.get(ind_id, [None, []]) for cache_id in cache_ids: - self.logger.info("callback from {} changed inputs from {}".format(ind_id, self.inputs_changed[cache_id])) self.inputs_changed[cache_id] = True def reset(self): diff --git a/GPy/util/subarray_and_sorting.py b/GPy/util/subarray_and_sorting.py index 3eea10a0..0966084c 100644 --- a/GPy/util/subarray_and_sorting.py +++ b/GPy/util/subarray_and_sorting.py @@ -48,16 +48,10 @@ def common_subarrays(X, axis=0): assert X.ndim == 2 and axis in (0,1), "Only implemented for 2D arrays" subarrays = defaultdict(list) cnt = count() - if axis == 0: size = X.shape[0] - else: size = X.shape[1] - logger = logging.getLogger("common_subarrays") def accumulate(x, s, c): - logger.debug("creating tuple") t = tuple(x) - logger.debug("tuple done") col = c.next() iadd(s[t], [col]) - logger.info("added col {} {:.2%}".format(col, col/float(size))) return None if axis == 0: [accumulate(x, subarrays, cnt) for x in X] else: [accumulate(x, subarrays, cnt) for x in X.T]