diff --git a/GPy/kern/parts/kernpart.py b/GPy/kern/parts/kernpart.py index 2ed56d66..bb836d1c 100644 --- a/GPy/kern/parts/kernpart.py +++ b/GPy/kern/parts/kernpart.py @@ -29,7 +29,11 @@ class Kernpart(object): def dK_dtheta(self,dL_dK,X,X2,target): raise NotImplementedError def dKdiag_dtheta(self,dL_dKdiag,X,target): - raise NotImplementedError + # In the base case compute this by calling dK_dtheta. Need to + # override for stationary covariances (for example) to save + # time. + for i in range(X.shape[0]): + self.dK_dtheta(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target) def psi0(self,Z,mu,S,target): raise NotImplementedError def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target): diff --git a/GPy/kern/parts/mlp.py b/GPy/kern/parts/mlp.py index 572713ef..e2531b23 100644 --- a/GPy/kern/parts/mlp.py +++ b/GPy/kern/parts/mlp.py @@ -96,34 +96,39 @@ class MLP(Kernpart): vec2 = (X2*X2).sum(1) target[1] += ((self._K_inner_prod/self._K_denom -.5*self._K_numer/denom3 - *(np.outer((self.weight_variance*vec1+self.bias_variance+1.), vec2) + np.outer(vec1, self.weight_variance*vec2 + self.bias_variance_1.)))*base_cov_grad).sum() + *(np.outer((self.weight_variance*vec1+self.bias_variance+1.), vec2) + np.outer(vec1, self.weight_variance*vec2 + self.bias_variance+1.)))*base_cov_grad).sum() target[2] += ((1./self._K_denom -.5*self._K_numer/denom3 - *((vec1[None, :]+vec2[:, None])*self.weight_variance + *((vec1[:, None]+vec2[None, :])*self.weight_variance + 2*self.bias_variance + 2.))*base_cov_grad).sum() target[0] += np.sum(self._K_dvar*dL_dK) + def dK_dX(self, dL_dK, X, X2, target): """Derivative of the covariance matrix with respect to X""" self._K_computations(X, X2) gX = np.zeros((X2.shape[0], X.shape[1], X.shape[0])) for i in range(X.shape[0]): - gX[:, :, i] = self._dK_dX_point(X[i, :], X2) - - def _dK_dX_point(self, x, X2): + gX[:, :, i] = self._dK_dX_point(X, X2, i) + + + def _dK_dX_point(self, X, X2, i): """Gradient with respect to one point of X""" - inner_prod = np.dot(X2,x.T) - numer = inner_prod*self.weight_variance + self.bias_variance - vec1 = (x*x).sum(1)*self.weight_variance + self.bias_variance + 1. + + inner_prod = self._K_inner_prod[i, :].T + numer = self._K_numer[i, :].T + denom = self._K_denom[i, :].T + arg = self._K_asin_arg[i, :].T + vec1 = (X[i, :]*X[i, :]).sum()*self.weight_variance + self.bias_variance + 1. vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1. - denom = np.sqrt(np.outer(vec2,vec1)) - arg = numer/denom + #denom = np.sqrt(np.outer(vec2,vec1)) + #arg = numer/denom gX = np.zeros(X2.shape) denom3 = denom*denom*denom for j in range(X2.shape[1]): - gX[:, j]=X2[:, j]/denom - vec2*x[:, j]*numer/denom3 + gX[:, j]=X2[:, j]/denom - vec2*X[i, j]*numer/denom3 gX[:, j] = four_over_tau*self.weight_variance*self.variance*gX[:, j]/np.sqrt(1-arg*arg) diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index fb47646f..9c46b2f1 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -203,17 +203,22 @@ def swiss_roll(N=3000): Y = mat_data['X_data'][:, 0:N].transpose() return {'Y': Y, 'X': mat_data['X_data'], 'info': "The first 3,000 points from the swiss roll data of Tennenbaum, de Silva and Langford (2001)."} -def toy_rbf_1d(seed=default_seed): +def toy_rbf_1d(seed=default_seed, num_samples=500): + """Samples 500 values of a function from an RBF covariance with very small noise for inputs uniformly distributed between -1 and 1. + :param seed: seed to use for random sampling. + :type seed: int + :param num_samples: number of samples to sample in the function (default 500). + :type num_samples: int + """ np.random.seed(seed=seed) - numIn = 1 - N = 500 - X = np.random.uniform(low= -1.0, high=1.0, size=(N, numIn)) + num_in = 1 + X = np.random.uniform(low= -1.0, high=1.0, size=(num_samples, num_in)) X.sort(axis=0) - rbf = GPy.kern.rbf(numIn, variance=1., lengthscale=np.array((0.25,))) - white = GPy.kern.white(numIn, variance=1e-2) + rbf = GPy.kern.rbf(num_in, variance=1., lengthscale=np.array((0.25,))) + white = GPy.kern.white(num_in, variance=1e-2) kernel = rbf + white K = kernel.K(X) - y = np.reshape(np.random.multivariate_normal(np.zeros(N), K), (N, 1)) + y = np.reshape(np.random.multivariate_normal(np.zeros(num_samples), K), (num_samples, 1)) return {'X':X, 'Y':y, 'info': "Samples 500 values of a function from an RBF covariance with very small noise for inputs uniformly distributed between -1 and 1."} def toy_rbf_1d_50(seed=default_seed):