diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index fec61204..2b7ca7ad 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -60,7 +60,7 @@ class VarDTC(object): _, output_dim = Y.shape #see whether we've got a different noise variance for each datum - beta = 1./np.squeeze(likelihood.variance) + beta = 1./max(1e-6, np.squeeze(likelihood.variance)) # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! #self.YYTfactor = self.get_YYTfactor(Y) @@ -214,7 +214,7 @@ class VarDTCMissingData(object): psi2_all = None Ys, traces = self._Y(Y) - beta_all = 1./likelihood.variance + beta_all = 1./max(1e-6, likelihood.variance) het_noise = beta_all.size != 1 import itertools diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index d4a60077..38022bd4 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -159,7 +159,7 @@ class RBF(Stationary): grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * dist, 1) grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (dist_sq - 1), 1) #psi2 - denom, Zdist, Zdist_sq, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior) + denom, _, _, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior) tmp = psi2[:, :, :, None] / l2 / denom grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * mudist).sum(1).sum(1) grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*mudist_sq - 1)).sum(1).sum(1) @@ -237,7 +237,7 @@ class RBF(Stationary): return denom, dist, dist_sq, psi1 - #@cache_this(ignore_args=(1,)) + @Cache_this(limit=1, ignore_args=(0,)) def _Z_distances(self, Z): Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py index ae4cd879..bc51d850 100644 --- a/GPy/kern/_src/stationary.py +++ b/GPy/kern/_src/stationary.py @@ -69,18 +69,18 @@ class Stationary(Kern): def dK_dr(self, r): raise NotImplementedError, "implement derivative of the covariance function wrt r to use this class" - #@Cache_this(limit=5, ignore_args=()) + @Cache_this(limit=5, ignore_args=()) def K(self, X, X2=None): r = self._scaled_dist(X, X2) return self.K_of_r(r) - #@Cache_this(limit=5, ignore_args=(0,)) + @Cache_this(limit=5, ignore_args=(0,)) def _dist(self, X, X2): if X2 is None: X2 = X return X[:, None, :] - X2[None, :, :] - #@Cache_this(limit=5, ignore_args=(0,)) + @Cache_this(limit=5, ignore_args=(0,)) def _unscaled_dist(self, X, X2=None): """ Compute the square distance between each row of X and X2, or between @@ -94,7 +94,7 @@ class Stationary(Kern): X2sq = np.sum(np.square(X2),1) return np.sqrt(-2.*np.dot(X, X2.T) + (X1sq[:,None] + X2sq[None,:])) - #@Cache_this(limit=5, ignore_args=()) + @Cache_this(limit=5, ignore_args=()) def _scaled_dist(self, X, X2=None): """ Efficiently compute the scaled distance, r. @@ -147,7 +147,7 @@ class Stationary(Kern): diagonal, where we return zero (the distance on the diagonal is zero). This term appears in derviatives. """ - dist = self._scaled_dist(X, X2) + dist = self._scaled_dist(X, X2).copy() if X2 is None: nondiag = util.diag.offdiag_view(dist) nondiag[:] = 1./nondiag diff --git a/GPy/util/caching.py b/GPy/util/caching.py index 76d030ca..a2017407 100644 --- a/GPy/util/caching.py +++ b/GPy/util/caching.py @@ -39,7 +39,7 @@ class Cacher(object): return self.operation(*args) # TODO: WARNING !!! Cache OFFSWITCH !!! WARNING - return self.operation(*args) + # return self.operation(*args) #if the result is cached, return the cached computation state = [all(a is b for a, b in zip(args, cached_i)) for cached_i in self.cached_inputs]