diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index bd316eba..85528b72 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -24,7 +24,7 @@ def BGPLVM(seed=default_seed): Y = np.random.multivariate_normal(np.zeros(N), K, Q).T lik = Gaussian(Y, normalize=True) - k = GPy.kern.rbf_inv(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q) + k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q) # k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) # k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001) @@ -144,7 +144,7 @@ def BGPLVM_oil(optimize=True, N=200, Q=10, num_inducing=15, max_iters=150, plot= data = GPy.util.datasets.oil() # create simple GP model - kernel = GPy.kern.rbf_inv(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2)) + kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2)) Y = data['X'][:N] Yn = Y - Y.mean(0) @@ -160,9 +160,9 @@ def BGPLVM_oil(optimize=True, N=200, Q=10, num_inducing=15, max_iters=150, plot= # optimize if optimize: - m.constrain_fixed('noise') - m.optimize('scg', messages=1, max_iters=200, gtol=.05) - m.constrain_positive('noise') +# m.constrain_fixed('noise') +# m.optimize('scg', messages=1, max_iters=200, gtol=.05) +# m.constrain_positive('noise') m.optimize('scg', messages=1, max_iters=max_iters, gtol=.05) if plot: @@ -377,10 +377,10 @@ def stick_bgplvm(model=None): data = GPy.util.datasets.stick() Q = 6 kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2)) - m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20,kernel=kernel) + m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel) # optimize m.ensure_default_constraints() - m.optimize(messages=1, max_iters=3000,xtol=1e-300,ftol=1e-300) + m.optimize(messages=1, max_iters=3000, xtol=1e-300, ftol=1e-300) m._set_params(m._get_params()) plt.clf, (latent_axes, sense_axes) = plt.subplots(1, 2) plt.sca(latent_axes) diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 8db28e0a..b2b99094 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -104,6 +104,8 @@ class kern(Parameterized): x = np.arange(x0) transOffset = offset_copy(ax.transData, fig=fig, x=0., y= -2., units='points') + transOffsetUp = offset_copy(ax.transData, fig=fig, + x=0., y=2., units='points') for bar in bars: for patch, num in zip(bar.patches, np.arange(len(bar.patches))): height = patch.get_height() @@ -111,10 +113,12 @@ class kern(Parameterized): va = 'top' c = 'w' t = TextPath((0, 0), "${xi}$".format(xi=xi), rotation=0, usetex=True, ha='center') + transform = transOffset if patch.get_extents().height <= t.get_extents().height + 2: va = 'bottom' c = 'k' - ax.text(xi, height, "${xi}$".format(xi=int(num)), color=c, rotation=0, ha='center', va=va, transform=transOffset) + transform = transOffsetUp + ax.text(xi, height, "${xi}$".format(xi=int(num)), color=c, rotation=0, ha='center', va=va, transform=transform) # for xi, t in zip(x, xticklabels): # ax.text(xi, maxi / 2, t, rotation=90, ha='center', va='center') # ax.set_xticklabels(xticklabels, rotation=17) diff --git a/GPy/kern/parts/linear.py b/GPy/kern/parts/linear.py index 04bd78a4..82a8c263 100644 --- a/GPy/kern/parts/linear.py +++ b/GPy/kern/parts/linear.py @@ -140,30 +140,26 @@ class Linear(Kernpart): def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target): self.dK_dX(dL_dpsi1.T, Z, mu, target) - def psi2(self, Z, mu, S, target): - """ - returns N,num_inducing,num_inducing matrix - """ + def psi2_old(self, Z, mu, S, target): self._psi_computations(Z, mu, S) -# psi2_old = self.ZZ * np.square(self.variances) * self.mu2_S[:, None, None, :] -# target += psi2.sum(-1) - # slow way of doing it, but right -# psi2_real = rm np.zeros((mu.shape[0], Z.shape[0], Z.shape[0])) -# for n in range(mu.shape[0]): -# for m_prime in range(Z.shape[0]): -# for m in range(Z.shape[0]): -# tmp = self._Z[m:m + 1] * self.variances -# tmp = np.dot(tmp, (tdot(self._mu[n:n + 1].T) + np.diag(S[n]))) -# psi2_real[n, m, m_prime] = np.dot(tmp, ( -# self._Z[m_prime:m_prime + 1] * self.variances).T) -# mu2_S = (self._mu[:, None, :] * self._mu[:, :, None]) -# mu2_S[:, np.arange(self.input_dim), np.arange(self.input_dim)] += self._S -# psi2 = (self.ZA[None, :, None, :] * mu2_S[:, None]).sum(-1) -# psi2 = (psi2[:, :, None] * self.ZA[None, None]).sum(-1) -# psi2_tensor = np.tensordot(self.ZZ[None, :, :, :] * np.square(self.variances), self.mu2_S[:, None, None, :], ((3), (3))).squeeze().T target += self._psi2 + def psi2(self,Z,mu,S,target): + tmp = np.zeros((mu.shape[0], Z.shape[0])) + self.K(mu,Z,tmp) + target += tmp[:,:,None]*tmp[:,None,:] + np.sum(S[:,None,None,:]*self.variances**2*Z[None,:,None,:]*Z[None,None,:,:],-1) + def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target): + tmp = np.zeros((mu.shape[0], Z.shape[0])) + self.K(mu,Z,tmp) + self.dK_dtheta(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target) + result= 2.*(dL_dpsi2[:,:,:,None]*S[:,None,None,:]*self.variances*Z[None,:,None,:]*Z[None,None,:,:]).sum(0).sum(0).sum(0) + if self.ARD: + target += result.sum(0).sum(0).sum(0) + else: + target += result.sum() + + def dpsi2_dtheta_old(self, dL_dpsi2, Z, mu, S, target): self._psi_computations(Z, mu, S) tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :]) if self.ARD: @@ -172,6 +168,15 @@ class Linear(Kernpart): target += tmp.sum() def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S): + tmp = np.zeros((mu.shape[0], Z.shape[0])) + self.K(mu,Z,tmp) + self.dK_dX(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target_mu) + + Zs = Z*self.variances + Zs_sq = Zs[:,None,:]*Zs[None,:,:] + target_S += (dL_dpsi2[:,:,:,None]*Zs_sq[None,:,:,:]).sum(1).sum(1) + + def dpsi2_dmuS_old(self, dL_dpsi2, Z, mu, S, target_mu, target_S): """Think N,num_inducing,num_inducing,input_dim """ self._psi_computations(Z, mu, S) AZZA = self.ZA.T[:, None, :, None] * self.ZA[None, :, None, :] diff --git a/GPy/kern/parts/rbf.py b/GPy/kern/parts/rbf.py index f8ef57df..345134bd 100644 --- a/GPy/kern/parts/rbf.py +++ b/GPy/kern/parts/rbf.py @@ -221,9 +221,10 @@ class RBF(Kernpart): #---------------------------------------# def _K_computations(self, X, X2): - if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2) and fast_array_equal(self._params , self._get_params())): + params = self._get_params() + if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2) and fast_array_equal(self._params , params)): self._X = X.copy() - self._params = self._get_params().copy() + self._params = params.copy() if X2 is None: self._X2 = None X = X / self.lengthscale @@ -244,7 +245,7 @@ class RBF(Kernpart): self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q self._psi2_Zdist_sq = np.square(self._psi2_Zdist / self.lengthscale) # M,M,Q - if not (fast_array_equal(Z, self._Z) and fast_array_equal(mu, self._mu) and fast_array_equal(S, self._S)): + if not fast_array_equal(Z, self._Z) or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S): # something's changed. recompute EVERYTHING # psi1