diff --git a/GPy/core/model.py b/GPy/core/model.py index 1595e347..7feb72b2 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -349,7 +349,7 @@ class Model(Parameterized): numerical_gradient = (f1 - f2) / (2 * step) if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind] else: ratio = (f1 - f2) / (2 * step * gradient[xind]) - difference = np.abs((f1 - f2) / 2 / step - gradient[xind]) + difference = np.abs(numerical_gradient - gradient[xind]) if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance: formatted_name = "\033[92m {0} \033[0m".format(names[nind]) diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index e359409e..0ecc1ebf 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -281,7 +281,7 @@ class Gradcheckable(Pickleable, Parentable): """ if self.has_parent(): return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance) - return self._checkgrad(self[''], verbose=verbose, step=step, tolerance=tolerance) + return self._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance) def _checkgrad(self, param, verbose=0, step=1e-6, tolerance=1e-3): """ @@ -372,8 +372,9 @@ class Indexable(Nameable, Observable): basically just sums up the parameter sizes which come before param. """ if param.has_parent(): - if param._parent_._get_original(param) in self.parameters: - return self._param_slices_[param._parent_._get_original(param)._parent_index_].start + p = param._parent_._get_original(param) + if p in self.parameters: + return reduce(lambda a,b: a + b.size, self.parameters[:p._parent_index_], 0) return self._offset_for(param._parent_) + param._parent_._offset_for(param) return 0 @@ -699,36 +700,10 @@ class OptimizationHandlable(Indexable): def _get_params_transformed(self): raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!" -# # transformed parameters (apply un-transformation rules) -# p = self.param_array.copy() -# [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] -# if self.has_parent() and self.constraints[__fixed__].size != 0: -# fixes = np.ones(self.size).astype(bool) -# fixes[self.constraints[__fixed__]] = FIXED -# return p[fixes] -# elif self._has_fixes(): -# return p[self._fixes_] -# return p # def _set_params_transformed(self, p): raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!" -# """ -# Set parameters p, but make sure they get transformed before setting. -# This means, the optimizer sees p, whereas the model sees transformed(p), -# such that, the parameters the model sees are in the right domain. -# """ -# if not(p is self.param_array): -# if self.has_parent() and self.constraints[__fixed__].size != 0: -# fixes = np.ones(self.size).astype(bool) -# fixes[self.constraints[__fixed__]] = FIXED -# self.param_array.flat[fixes] = p -# elif self._has_fixes(): self.param_array.flat[self._fixes_] = p -# else: self.param_array.flat = p -# [np.put(self.param_array, ind, c.f(self.param_array.flat[ind])) -# for c, ind in self.constraints.iteritems() if c != __fixed__] -# self._trigger_params_changed() - def _trigger_params_changed(self, trigger_parent=True): """ First tell all children to update, @@ -736,7 +711,7 @@ class OptimizationHandlable(Indexable): If trigger_parent is True, we will tell the parent, otherwise not. """ - [p._trigger_params_changed(trigger_parent=False) for p in self.parameters] + [p._trigger_params_changed(trigger_parent=False) for p in self.parameters if not p.is_fixed] self.notify_observers(None, None if trigger_parent else -np.inf) def _size_transformed(self): @@ -888,6 +863,25 @@ class Parameterizable(OptimizationHandlable): self._param_array_ = np.empty(self.size, dtype=np.float64) return self._param_array_ + @property + def unfixed_param_array(self): + """ + Array representing the parameters of this class. + There is only one copy of all parameters in memory, two during optimization. + + !WARNING!: setting the parameter array MUST always be done in memory: + m.param_array[:] = m_copy.param_array + """ + if self.__dict__.get('_param_array_', None) is None: + self._param_array_ = np.empty(self.size, dtype=np.float64) + + if self.constraints[__fixed__].size !=0: + fixes = np.ones(self.size).astype(bool) + fixes[self.constraints[__fixed__]] = FIXED + return self._param_array_[fixes] + else: + return self._param_array_ + @param_array.setter def param_array(self, arr): self._param_array_ = arr diff --git a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py index 85d8cc89..3aeb4fbb 100644 --- a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py +++ b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py @@ -56,7 +56,7 @@ class EPDTC(LatentFunctionInference): self._ep_approximation = None def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): - num_data, output_dim = X.shape + num_data, output_dim = Y.shape assert output_dim ==1, "ep in 1D only (for now!)" Kmm = kern.K(Z) diff --git a/GPy/inference/optimization/__init__.py b/GPy/inference/optimization/__init__.py index 1a8f043b..1590568f 100644 --- a/GPy/inference/optimization/__init__.py +++ b/GPy/inference/optimization/__init__.py @@ -1,2 +1,3 @@ from scg import SCG from optimization import * +from hmc import HMC,HMC_shortcut diff --git a/GPy/inference/optimization/hmc.py b/GPy/inference/optimization/hmc.py new file mode 100644 index 00000000..8c65cdf0 --- /dev/null +++ b/GPy/inference/optimization/hmc.py @@ -0,0 +1,157 @@ +"""HMC implementation""" + +import numpy as np + + +class HMC: + def __init__(self,model,M=None,stepsize=1e-1): + self.model = model + self.stepsize = stepsize + self.p = np.empty_like(model.optimizer_array.copy()) + if M is None: + self.M = np.eye(self.p.size) + else: + self.M = M + self.Minv = np.linalg.inv(self.M) + + def sample(self, m_iters=1000, hmc_iters=20): + params = np.empty((m_iters,self.p.size)) + for i in xrange(m_iters): + self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M) + H_old = self._computeH() + theta_old = self.model.optimizer_array.copy() + params[i] = self.model.unfixed_param_array + #Matropolis + self._update(hmc_iters) + H_new = self._computeH() + + if H_old>H_new: + k = 1. + else: + k = np.exp(H_old-H_new) + if np.random.rand()H_new: + k = 1. + else: + k = np.exp(H_old-H_new) + if np.random.rand()pos: + pos = -1 + i += pos + self.model.optimizer_array = theta_buf[hmc_iters] + self.p[:] = -p_buf[hmc_iters] + else: + pos_new = pos-hmc_iters+i + self.model.optimizer_array = theta_buf[hmc_iters+pos_new] + self.p[:] = -p_buf[hmc_iters+pos_new] + break + else: + Hlist = range(hmc_iters+pos,hmc_iters+pos+self.groupsize) +# print Hlist +# print self._testH(H_buf[Hlist]) + + if self._testH(H_buf[Hlist]): + pos += -1 + else: + # Reverse the trajectory for the 2nd time + r = (hmc_iters - i)%((reversal[0]-pos)*2) + if r>(reversal[0]-pos): + pos_new = 2*reversal[0] - r - pos + else: + pos_new = pos + r + self.model.optimizer_array = theta_buf[hmc_iters+pos_new] + self.p[:] = p_buf[hmc_iters+pos_new] # the sign of momentum might be wrong! +# print reversal[0],pos,pos_new +# print H_buf + break + + def _testH(self, Hlist): + Hstd = np.std(Hlist) +# print Hlist +# print Hstd + if Hstdself.Hstd_th[1]: + return False + else: + return True + + def _computeH(self,): + return self.model.objective_function()+self.p.size*np.log(2*np.pi)/2.+np.log(np.linalg.det(self.M))/2.+np.dot(self.p, np.dot(self.Minv,self.p[:,None]))/2. + diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 12f5d444..ee743f8b 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -10,7 +10,7 @@ class Add(CombinationKernel): """ Add given list of kernels together. propagates gradients through. - + This kernel will take over the active dims of it's subkernels passed in. """ def __init__(self, subkerns, name='add'): @@ -40,7 +40,7 @@ class Add(CombinationKernel): return reduce(np.add, (p.Kdiag(X) for p in which_parts)) def update_gradients_full(self, dL_dK, X, X2=None): - [p.update_gradients_full(dL_dK, X, X2) for p in self.parts] + [p.update_gradients_full(dL_dK, X, X2) for p in self.parts if not p.is_fixed] def update_gradients_diag(self, dL_dK, X): [p.update_gradients_diag(dL_dK, X) for p in self.parts] diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 6354f13d..a16c4ac1 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -84,6 +84,22 @@ class BayesianGPLVM(SparseGP): self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2']) + # This is testing code ------------------------- +# i = np.random.randint(self.X.shape[0]) +# X_ = self.X.mean +# which = np.sqrt(((X_ - X_[i:i+1])**2).sum(1)).argsort()>(max(0, self.X.shape[0]-51)) +# _, _, grad_dict = self.inference_method.inference(self.kern, self.X[which], self.Z, self.likelihood, self.Y[which], self.Y_metadata) +# grad = self.kern.gradients_qX_expectations(variational_posterior=self.X[which], Z=self.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2']) +# +# self.X.mean.gradient[:] = 0 +# self.X.variance.gradient[:] = 0 +# self.X.mean.gradient[which] = grad[0] +# self.X.variance.gradient[which] = grad[1] + + # update for the KL divergence +# self.variational_prior.update_gradients_KL(self.X, which) + # ----------------------------------------------- + # update for the KL divergence self.variational_prior.update_gradients_KL(self.X) diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index 7926410e..46a79ad8 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -150,7 +150,11 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', if plot_raw: m, _ = model._raw_predict(Xgrid) else: - m, _ = model.predict(Xgrid) + if isinstance(model,GPCoregionalizedRegression) or isinstance(model,SparseGPCoregionalizedRegression): + meta = {'output_index': Xgrid[:,-1:].astype(np.int)} + else: + meta = None + m, v = model.predict(Xgrid, full_cov=False, Y_metadata=meta) for d in which_data_ycols: m_d = m[:,d].reshape(resolution, resolution).T plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) diff --git a/setup.py b/setup.py index 40574a76..d9a6ab5e 100644 --- a/setup.py +++ b/setup.py @@ -1,13 +1,13 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -#import os +import os from setuptools import setup # Version number version = '0.4.6' -from pkg_resources import Requirement, resource_string +from pkg_resources import Requirement def read(fname): return open(os.path.join(os.path.dirname(__file__), fname)).read()