diff --git a/GPy/core/variational.py b/GPy/core/variational.py new file mode 100644 index 00000000..74287dcf --- /dev/null +++ b/GPy/core/variational.py @@ -0,0 +1,19 @@ +''' +Created on 6 Nov 2013 + +@author: maxz +''' +from parameterized import Parameterized +from parameter import Param + +class Normal(Parameterized): + ''' + Normal distribution for variational approximations. + + holds the means and variances for a factorizing multivariate normal distribution + ''' + def __init__(self, name, means, variances): + Parameterized.__init__(self, name=name) + self.means = Param("mean", means) + self.variances = Param('variance', variances) + self.add_parameters(self.means, self.variances) \ No newline at end of file diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 805c6b43..37839423 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -18,37 +18,37 @@ class kern(Parameterized): like which parameters live where. The technical code for kernels is divided into _parts_ (see - e.g. rbf.py). This object contains a list of parts, which are - computed additively. For multiplication, special _prod_ parts + e.g. rbf.py). This object contains a list of _parameters_, which are + computed additively. For multiplication, special _prod_ _parameters_ are used. :param input_dim: The dimensionality of the kernel's input space :type input_dim: int - :param parts: the 'parts' (PD functions) of the kernel - :type parts: list of Kernpart objects + :param _parameters_: the '_parameters_' (PD functions) of the kernel + :type _parameters_: list of Kernpart objects :param input_slices: the slices on the inputs which apply to each kernel :type input_slices: list of slice objects, or list of bools """ - self.parts = parts + self._parameters_ = parts self.num_parts = len(parts) - self.num_params = sum([p.num_params for p in self.parts]) + self.num_params = sum([p.num_params for p in self._parameters_]) self.input_dim = input_dim - part_names = [k.name for k in self.parts] + part_names = [k.name for k in self._parameters_] self.name='' for name in part_names: self.name += name + '+' self.name = self.name[:-1] # deal with input_slices if input_slices is None: - self.input_slices = [slice(None) for p in self.parts] + self.input_slices = [slice(None) for p in self._parameters_] else: - assert len(input_slices) == len(self.parts) + assert len(input_slices) == len(self._parameters_) self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices] - for p in self.parts: + for p in self._parameters_: assert isinstance(p, Kernpart), "bad kernel part" self.compute_param_slices() @@ -60,7 +60,7 @@ class kern(Parameterized): Get the current state of the class, here just all the indices, rest can get recomputed """ - return Parameterized.getstate(self) + [self.parts, + return Parameterized.getstate(self) + [self._parameters_, self.num_parts, self.num_params, self.input_dim, @@ -74,7 +74,7 @@ class kern(Parameterized): self.input_dim = state.pop() self.num_params = state.pop() self.num_parts = state.pop() - self.parts = state.pop() + self._parameters_ = state.pop() Parameterized.setstate(self, state) @@ -99,7 +99,7 @@ class kern(Parameterized): xticklabels = [] bars = [] x0 = 0 - for p in self.parts: + for p in self._parameters_: c = Tango.nextMedium() if hasattr(p, 'ARD') and p.ARD: if title is None: @@ -173,7 +173,7 @@ class kern(Parameterized): """ self.param_slices = [] count = 0 - for p in self.parts: + for p in self._parameters_: self.param_slices.append(slice(count, count + p.num_params)) count += p.num_params @@ -202,7 +202,7 @@ class kern(Parameterized): other_input_indices = [sl.indices(other.input_dim) for sl in other.input_slices] other_input_slices = [slice(i[0] + self.input_dim, i[1] + self.input_dim, i[2]) for i in other_input_indices] - newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices) + newkern = kern(D, self._parameters_ + other._parameters_, self_input_slices + other_input_slices) # transfer constraints: newkern.constrained_indices = self.constrained_indices + [x + self.num_params for x in other.constrained_indices] @@ -213,7 +213,7 @@ class kern(Parameterized): newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices] else: assert self.input_dim == other.input_dim - newkern = kern(self.input_dim, self.parts + other.parts, self.input_slices + other.input_slices) + newkern = kern(self.input_dim, self._parameters_ + other._parameters_, self.input_slices + other.input_slices) # transfer constraints: newkern.constrained_indices = self.constrained_indices + [i + self.num_params for i in other.constrained_indices] newkern.constraints = self.constraints + other.constraints @@ -251,7 +251,7 @@ class kern(Parameterized): s1[sl1], s2[sl2] = [True], [True] slices += [s1 + s2] - newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)] + newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1._parameters_, K2._parameters_)] if tensor: newkern = kern(K1.input_dim + K2.input_dim, newkernparts, slices) @@ -266,12 +266,12 @@ class kern(Parameterized): # Build the array that allows to go from the initial indices of the param to the new ones K1_param = [] n = 0 - for k1 in K1.parts: + for k1 in K1._parameters_: K1_param += [range(n, n + k1.num_params)] n += k1.num_params n = 0 K2_param = [] - for k2 in K2.parts: + for k2 in K2._parameters_: K2_param += [range(K1.num_params + n, K1.num_params + n + k2.num_params)] n += k2.num_params index_param = [] @@ -303,19 +303,19 @@ class kern(Parameterized): self.constrain(np.where(index_param == i)[0], t) def _get_params(self): - return np.hstack([p._get_params() for p in self.parts]) + return np.hstack([p._get_params() for p in self._parameters_]) def _set_params(self, x): - [p._set_params(x[s]) for p, s in zip(self.parts, self.param_slices)] + [p._set_params(x[s]) for p, s in zip(self._parameters_, self.param_slices)] def _get_param_names(self): - # this is a bit nasty: we want to distinguish between parts with the same name by appending a count - part_names = np.array([k.name for k in self.parts], dtype=np.str) + # this is a bit nasty: we want to distinguish between _parameters_ with the same name by appending a count + part_names = np.array([k.name for k in self._parameters_], dtype=np.str) counts = [np.sum(part_names == ni) for i, ni in enumerate(part_names)] cum_counts = [np.sum(part_names[i:] == ni) for i, ni in enumerate(part_names)] names = [name + '_' + str(cum_count) if count > 1 else name for name, count, cum_count in zip(part_names, counts, cum_counts)] - return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self.parts)], []) + return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self._parameters_)], []) def K(self, X, X2=None, which_parts='all'): """ @@ -334,10 +334,10 @@ class kern(Parameterized): assert X.shape[1] == self.input_dim if X2 is None: target = np.zeros((X.shape[0], X.shape[0])) - [p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used] + [p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] else: target = np.zeros((X.shape[0], X2.shape[0])) - [p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used] + [p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] return target def dK_dtheta(self, dL_dK, X, X2=None): @@ -356,9 +356,9 @@ class kern(Parameterized): assert X.shape[1] == self.input_dim target = np.zeros(self.num_params) if X2 is None: - [p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)] + [p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self.param_slices)] else: - [p.dK_dtheta(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)] + [p.dK_dtheta(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self.param_slices)] return self._transform_gradients(target) @@ -374,9 +374,9 @@ class kern(Parameterized): target = np.zeros_like(X) if X2 is None: - [p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] + [p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] else: - [p.dK_dX(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] + [p.dK_dX(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] return target def Kdiag(self, X, which_parts='all'): @@ -385,7 +385,7 @@ class kern(Parameterized): which_parts = [True] * self.num_parts assert X.shape[1] == self.input_dim target = np.zeros(X.shape[0]) - [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self.parts, self.input_slices, which_parts) if part_on] + [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self._parameters_, self.input_slices, which_parts) if part_on] return target def dKdiag_dtheta(self, dL_dKdiag, X): @@ -393,131 +393,200 @@ class kern(Parameterized): assert X.shape[1] == self.input_dim assert dL_dKdiag.size == X.shape[0] target = np.zeros(self.num_params) - [p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)] + [p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self.param_slices)] return self._transform_gradients(target) def dKdiag_dX(self, dL_dKdiag, X): assert X.shape[1] == self.input_dim target = np.zeros_like(X) - [p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] + [p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] return target def psi0(self, Z, mu, S): target = np.zeros(mu.shape[0]) - [p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)] + [p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] return target def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S): target = np.zeros(self.num_params) - [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)] + [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self.param_slices, self.input_slices)] return self._transform_gradients(target) def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S): target_mu, target_S = np.zeros_like(mu), np.zeros_like(S) - [p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] + [p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] return target_mu, target_S def psi1(self, Z, mu, S): target = np.zeros((mu.shape[0], Z.shape[0])) - [p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)] + [p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] return target def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S): target = np.zeros((self.num_params)) - [p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)] + [p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self.param_slices, self.input_slices)] return self._transform_gradients(target) def dpsi1_dZ(self, dL_dpsi1, Z, mu, S): target = np.zeros_like(Z) - [p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] + [p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] return target def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S): """return shapes are num_samples,num_inducing,input_dim""" target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) - [p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] + [p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] return target_mu, target_S def psi2(self, Z, mu, S): """ - Computer the psi2 statistics for the covariance function. - - :param Z: np.ndarray of inducing inputs (num_inducing x input_dim) - :param mu, S: np.ndarrays of means and variances (each num_samples x input_dim) - :returns psi2: np.ndarray (num_samples,num_inducing,num_inducing) - + :param Z: np.ndarray of inducing inputs (M x Q) + :param mu, S: np.ndarrays of means and variances (each N x Q) + :returns psi2: np.ndarray (N,M,M) """ target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0])) - [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)] + [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] # compute the "cross" terms # TODO: input_slices needed - crossterms = 0 + from parts.white import White + from parts.rbf import RBF + from parts.rbf_inv import RBFInv + from parts.bias import Bias + from parts.linear import Linear - for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self.parts, self.input_slices), 2): - if i_s1 == i_s2: - # TODO psi1 this must be faster/better/precached/more nice - tmp1 = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1) - tmp2 = np.zeros((mu.shape[0], Z.shape[0])) - p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2) - - prod = np.multiply(tmp1, tmp2) - crossterms += prod[:, :, None] + prod[:, None, :] - - # target += crossterms - return target + crossterms + for (p1, i1), (p2, i2) in itertools.combinations(itertools.izip(self._parameters_, self._param_slices_), 2): + # white doesn;t combine with anything + if isinstance(p1, White) or isinstance(p2, White): + pass + # rbf X bias + elif isinstance(p1, Bias) and isinstance(p2, (RBF, RBFInv)): + target += p1.variance * (p2._psi1[:, :, None] + p2._psi1[:, None, :]) + elif isinstance(p2, Bias) and isinstance(p1, (RBF, RBFInv)): + target += p2.variance * (p1._psi1[:, :, None] + p1._psi1[:, None, :]) + # linear X bias + elif isinstance(p1, Bias) and isinstance(p2, Linear): + tmp = np.zeros((mu.shape[0], Z.shape[0])) + p2.psi1(Z, mu, S, tmp) + target += p1.variance * (tmp[:, :, None] + tmp[:, None, :]) + elif isinstance(p2, Bias) and isinstance(p1, Linear): + tmp = np.zeros((mu.shape[0], Z.shape[0])) + p1.psi1(Z, mu, S, tmp) + target += p2.variance * (tmp[:, :, None] + tmp[:, None, :]) + # rbf X linear + elif isinstance(p1, Linear) and isinstance(p2, (RBF, RBFInv)): + pass + elif isinstance(p2, Linear) and isinstance(p1, (RBF, RBFInv)): + raise NotImplementedError # TODO + elif isinstance(p1, (RBF, RBFInv)) and isinstance(p2, (RBF, RBFInv)): + raise NotImplementedError # TODO + elif isinstance(p2, (RBF, RBFInv)) and isinstance(p1, (RBF, RBFInv)): + raise NotImplementedError # TODO + else: + raise NotImplementedError, "psi2 cannot be computed for this kernel" + return target def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S): - """Gradient of the psi2 statistics with respect to the parameters.""" - target = np.zeros(self.num_params) - [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)] + target = np.zeros(self.Nparam) + [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self.param_slices)] # compute the "cross" terms # TODO: better looping, input_slices - for i1, i2 in itertools.permutations(range(len(self.parts)), 2): - p1, p2 = self.parts[i1], self.parts[i2] + for i1, i2 in itertools.combinations(range(len(self._parameters_)), 2): + p1, p2 = self._parameters_[i1], self._parameters_[i2] # ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2] ps1, ps2 = self.param_slices[i1], self.param_slices[i2] - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2]) + # white doesn;t combine with anything + if p1.name == 'white' or p2.name == 'white': + pass + # rbf X bias + elif p1.name == 'bias' and p2.name == 'rbf': + p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2]) + p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2._psi1 * 2., Z, mu, S, target[ps1]) + elif p2.name == 'bias' and p1.name == 'rbf': + p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target[ps1]) + p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1._psi1 * 2., Z, mu, S, target[ps2]) + # linear X bias + elif p1.name == 'bias' and p2.name == 'linear': + p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2]) # [ps1]) + psi1 = np.zeros((mu.shape[0], Z.shape[0])) + p2.psi1(Z, mu, S, psi1) + p1.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps1]) + elif p2.name == 'bias' and p1.name == 'linear': + p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target[ps1]) + psi1 = np.zeros((mu.shape[0], Z.shape[0])) + p1.psi1(Z, mu, S, psi1) + p2.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps2]) + # rbf X linear + elif p1.name == 'linear' and p2.name == 'rbf': + raise NotImplementedError # TODO + elif p2.name == 'linear' and p1.name == 'rbf': + raise NotImplementedError # TODO + else: + raise NotImplementedError, "psi2 cannot be computed for this kernel" return self._transform_gradients(target) def dpsi2_dZ(self, dL_dpsi2, Z, mu, S): target = np.zeros_like(Z) - [p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] - # target *= 2 + [p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] # compute the "cross" terms # TODO: we need input_slices here. - for p1, p2 in itertools.permutations(self.parts, 2): - if p1.name == 'linear' and p2.name == 'linear': - raise NotImplementedError("We don't handle linear/linear cross-terms") - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target) + for p1, p2 in itertools.combinations(self._parameters_, 2): + # white doesn;t combine with anything + if p1.name == 'white' or p2.name == 'white': + pass + # rbf X bias + elif p1.name == 'bias' and p2.name == 'rbf': + p2.dpsi1_dX(dL_dpsi2.sum(1).T * p1.variance, Z, mu, S, target) + elif p2.name == 'bias' and p1.name == 'rbf': + p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target) + # linear X bias + elif p1.name == 'bias' and p2.name == 'linear': + p2.dpsi1_dZ(dL_dpsi2.sum(1).T * p1.variance, Z, mu, S, target) + elif p2.name == 'bias' and p1.name == 'linear': + p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target) + # rbf X linear + elif p1.name == 'linear' and p2.name == 'rbf': + raise NotImplementedError # TODO + elif p2.name == 'linear' and p1.name == 'rbf': + raise NotImplementedError # TODO + else: + raise NotImplementedError, "psi2 cannot be computed for this kernel" - return target * 2 + return target * 2. def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S): target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) - [p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] + [p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] # compute the "cross" terms # TODO: we need input_slices here. - for p1, p2 in itertools.permutations(self.parts, 2): - if p1.name == 'linear' and p2.name == 'linear': - raise NotImplementedError("We don't handle linear/linear cross-terms") - - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S) + for p1, p2 in itertools.combinations(self._parameters_, 2): + # white doesn;t combine with anything + if p1.name == 'white' or p2.name == 'white': + pass + # rbf X bias + elif p1.name == 'bias' and p2.name == 'rbf': + p2.dpsi1_dmuS(dL_dpsi2.sum(1).T * p1.variance * 2., Z, mu, S, target_mu, target_S) + elif p2.name == 'bias' and p1.name == 'rbf': + p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S) + # linear X bias + elif p1.name == 'bias' and p2.name == 'linear': + p2.dpsi1_dmuS(dL_dpsi2.sum(1).T * p1.variance * 2., Z, mu, S, target_mu, target_S) + elif p2.name == 'bias' and p1.name == 'linear': + p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S) + # rbf X linear + elif p1.name == 'linear' and p2.name == 'rbf': + raise NotImplementedError # TODO + elif p2.name == 'linear' and p1.name == 'rbf': + raise NotImplementedError # TODO + else: + raise NotImplementedError, "psi2 cannot be computed for this kernel" return target_mu, target_S - def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs): if which_parts == 'all': which_parts = [True] * self.num_parts diff --git a/GPy/testing/psi_stat_expectation_tests.py b/GPy/testing/psi_stat_expectation_tests.py index bcdbd2af..16904927 100644 --- a/GPy/testing/psi_stat_expectation_tests.py +++ b/GPy/testing/psi_stat_expectation_tests.py @@ -28,8 +28,8 @@ def ard(p): class Test(unittest.TestCase): input_dim = 9 num_inducing = 4 - N = 3 - Nsamples = 5e6 + N = 30 + Nsamples = 9e6 def setUp(self): i_s_dim_list = [2,4,3] @@ -45,20 +45,26 @@ class Test(unittest.TestCase): input_slices = input_slices ) self.kerns = ( - input_slice_kern, +# input_slice_kern, # (GPy.kern.rbf(self.input_dim, ARD=True) + # GPy.kern.linear(self.input_dim, ARD=True) + # GPy.kern.bias(self.input_dim) + # GPy.kern.white(self.input_dim)), # (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + -# GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + -# GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) + -# GPy.kern.bias(self.input_dim) + -# GPy.kern.white(self.input_dim)), -# GPy.kern.rbf(self.input_dim), GPy.kern.rbf(self.input_dim, ARD=True), +# GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + +# GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) + +# GPy.kern.bias(self.input_dim) + +# GPy.kern.white(self.input_dim)), + (GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) + + GPy.kern.bias(self.input_dim, np.random.rand()) + + GPy.kern.white(self.input_dim, np.random.rand())), + (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + + GPy.kern.bias(self.input_dim, np.random.rand()) + + GPy.kern.white(self.input_dim, np.random.rand())), +# GPy.kern.rbf(self.input_dim), GPy.kern.rbf(self.input_dim, ARD=True), # GPy.kern.linear(self.input_dim, ARD=False), GPy.kern.linear(self.input_dim, ARD=True), # GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim), -# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim), +# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim), # GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim), # GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim), # GPy.kern.bias(self.input_dim), GPy.kern.white(self.input_dim), @@ -79,7 +85,7 @@ class Test(unittest.TestCase): def test_psi1(self): for kern in self.kerns: - Nsamples = np.floor(self.Nsamples/300.) + Nsamples = np.floor(self.Nsamples/self.N) psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance) K_ = np.zeros((Nsamples, self.num_inducing)) diffs = [] @@ -105,7 +111,7 @@ class Test(unittest.TestCase): def test_psi2(self): for kern in self.kerns: - Nsamples = self.Nsamples/10. + Nsamples = int(np.floor(self.Nsamples/self.N)) psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance) K_ = np.zeros((self.num_inducing, self.num_inducing)) diffs = [] @@ -119,10 +125,10 @@ class Test(unittest.TestCase): try: import pylab pylab.figure(msg) - pylab.plot(diffs) + pylab.plot(diffs, marker='x', mew=1.3) # print msg, np.allclose(psi2.squeeze(), K_, rtol=1e-1, atol=.1) - self.assertTrue(np.allclose(psi2.squeeze(), K_, - rtol=1e-1, atol=.1), + self.assertTrue(np.allclose(psi2.squeeze(), K_), + #rtol=1e-1, atol=.1), msg=msg + ": not matching") # sys.stdout.write(".") except: