diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 7095c55f..97abfb3e 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -61,20 +61,21 @@ def BGPLVM_oil(): data = GPy.util.datasets.oil() Y, X = data['Y'], data['X'] X -= X.mean(axis=0) - # X /= X.std(axis=0) + X /= X.std(axis=0) Q = 10 M = 30 kernel = GPy.kern.rbf(Q, ARD = True) + GPy.kern.bias(Q) + GPy.kern.white(Q) m = GPy.models.Bayesian_GPLVM(X, Q, kernel=kernel, M=M) - m.scale_factor = 10000.0 + # m.scale_factor = 100.0 m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)') from sklearn import cluster km = cluster.KMeans(M, verbose=10) Z = km.fit(m.X).cluster_centers_ # Z = GPy.util.misc.kmm_init(m.X, M) m.set('iip', Z) + m.set('bias', 1e-4) # optimize # m.ensure_default_constraints() diff --git a/GPy/inference/SGD.py b/GPy/inference/SGD.py index b4228bff..dd402d5e 100644 --- a/GPy/inference/SGD.py +++ b/GPy/inference/SGD.py @@ -33,12 +33,12 @@ class opt_SGD(Optimizer): self.self_paced = self_paced self.center = center self.param_traces = [('noise',[])] - if len([p for p in self.model.kern.parts if p.name == 'bias']) == 1: - self.param_traces.append(('bias',[])) - if len([p for p in self.model.kern.parts if p.name == 'linear']) == 1: - self.param_traces.append(('linear',[])) - if len([p for p in self.model.kern.parts if p.name == 'rbf']) == 1: - self.param_traces.append(('rbf_var',[])) + # if len([p for p in self.model.kern.parts if p.name == 'bias']) == 1: + # self.param_traces.append(('bias',[])) + # if len([p for p in self.model.kern.parts if p.name == 'linear']) == 1: + # self.param_traces.append(('linear',[])) + # if len([p for p in self.model.kern.parts if p.name == 'rbf']) == 1: + # self.param_traces.append(('rbf_var',[])) self.param_traces = dict(self.param_traces) self.fopt_trace = [] @@ -156,29 +156,23 @@ class opt_SGD(Optimizer): Y = self.model.likelihood.Y samples = self.non_null_samples(self.model.likelihood.Y) self.model.N = samples.sum() - - if self.center: - self.model.likelihood._mean = Y[samples].mean() - self.model.likelihood._std = Y[samples].std() - - self.model.likelihood.set_data(Y[samples]) + Y = Y[samples] else: samples = self.model.likelihood.Y.nonzero()[0] self.model.N = len(samples) Y = np.asarray(self.model.likelihood.Y[samples].todense(), dtype = np.float64) - if self.center: - self.model.likelihood._mean = Y.mean() - self.model.likelihood._std = Y.std() - self.model.likelihood.set_data(Y) + if self.model.N == 0 or Y.std() == 0.0: + return 0, step, self.model.N + + # FIXME: get rid of self.center, everything should be centered by default + self.model.likelihood._mean = Y.mean() + self.model.likelihood._std = Y.std() + self.model.likelihood.set_data(Y) - # self.model.likelihood.N = self.model.N j = self.subset_parameter_vector(self.x_opt, samples, shapes) self.model.X = X[samples] - if self.model.N == 0 or self.model.likelihood.Y.std() == 0.0: - return 0, step, self.model.N - # if self.center: # self.model.likelihood.Y -= self.model.likelihood.Y.mean() # self.model.likelihood.Y /= self.model.likelihood.Y.std() @@ -186,7 +180,8 @@ class opt_SGD(Optimizer): model_name = self.model.__class__.__name__ if model_name == 'Bayesian_GPLVM': - self.model.likelihood.trYYT = np.sum(np.square(self.model.likelihood.Y)) + self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T) + self.model.likelihood.trYYT = np.trace(self.model.likelihood.YYT) b, p = self.shift_constraints(j) f, fp = f_fp(self.x_opt[j]) @@ -196,6 +191,7 @@ class opt_SGD(Optimizer): step[j] = self.momentum * step[j] + self.learning_rate[j] * fp self.x_opt[j] -= step[j] + self.restore_constraints(b, p) return f, step, self.model.N @@ -256,14 +252,13 @@ class opt_SGD(Optimizer): sys.stdout.write(status) sys.stdout.flush() last_printed_count = count - + self.param_traces['noise'].append(noise) NLL.append(f) self.fopt_trace.append(f) - for k in self.param_traces.keys(): - self.param_traces[k].append(self.model.get(k)[0]) - + # for k in self.param_traces.keys(): + # self.param_traces[k].append(self.model.get(k)[0]) # should really be a sum(), but earlier samples in the iteration will have a very crappy ll self.f_opt = np.mean(NLL) diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index be45fa70..7d3b1737 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -51,6 +51,27 @@ class kern(parameterised): parameterised.__init__(self) + + def plot_ARD(self): + """ + If an ARD kernel is present, it bar-plots the ARD parameters + + + """ + for p in self.parts: + if hasattr(p, 'ARD') and p.ARD: + pb.figure() + pb.title('ARD parameters, %s kernel' % p.name) + + if p.name == 'linear': + ard_params = p.variances + else: + ard_params = 1./p.lengthscale + + pb.bar(np.arange(len(ard_params))-0.4, ard_params) + + + def _transform_gradients(self,g): x = self._get_params() g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices]