Reparameratised in terms of sigma2

2026-06-26 15:49:40 +02:00 · 2013-07-08 15:48:53 +01:00 · 2013-07-08 15:48:53 +01:00 · 4e5cefb4b5
commit 4e5cefb4b5
parent ab6a3a571e
4 changed files with 207 additions and 49 deletions
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -244,9 +244,6 @@ class model(parameterised):
        LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
        prior_gradients = self._transform_gradients(self._log_prior_gradients())
        obj_grads = -LL_gradients - prior_gradients
        print self
        #self.checkgrad(verbose=1)
        #import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
        return obj_f, obj_grads
    def optimize(self, optimizer=None, start=None, **kwargs):
--- a/GPy/examples/laplace_approximations.py
+++ b/GPy/examples/laplace_approximations.py
@ -24,7 +24,7 @@ def timing():
        edited_real_sd = real_sd
        kernel1 = GPy.kern.rbf(X.shape[1])
-        t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
+        t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=edited_real_sd)
        corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='rasm')
        m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel1)
        m.ensure_default_constraints()
@ -53,7 +53,7 @@ def v_fail_test():
    edited_real_sd = real_sd
    print "Clean student t, rasm"
-    t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
+    t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=edited_real_sd)
    stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
    m = GPy.models.GP(X, stu_t_likelihood, kernel1)
    m.constrain_positive('')
@ -92,18 +92,18 @@ def debug_student_t_noise_approx():
    X = np.random.rand(100)[:, None]
    #X = np.random.rand(100)[:, None]
    #X = np.array([0.5, 1])[:, None]
-    Y = np.sin(X*2*np.pi) + np.random.randn(*X.shape)*real_var
+    Y = np.sin(X*2*np.pi) + np.random.randn(*X.shape)*real_var + 1
    #Y = X + np.random.randn(*X.shape)*real_var
    #ty = np.array([1., 9.97733584, 4.17841363])[:, None]
    #Y = ty
    X_full = X
-    Y_full = np.sin(X_full)
+    Y_full = np.sin(X_full) + 1
    Y = Y/Y.max()
    #Add student t random noise to datapoints
-    deg_free = 1000
+    deg_free = 100
    real_sd = np.sqrt(real_var)
    print "Real noise std: ", real_sd
@ -115,7 +115,7 @@ def debug_student_t_noise_approx():
    plt.close('all')
    # Kernel object
-    kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
+    kernel1 = GPy.kern.rbf(X.shape[1]) #+ GPy.kern.white(X.shape[1])
    #kernel1 = GPy.kern.linear(X.shape[1]) + GPy.kern.white(X.shape[1])
    kernel2 = kernel1.copy()
    kernel3 = kernel1.copy()
@ -140,24 +140,24 @@ def debug_student_t_noise_approx():
    #print m
    real_stu_t_std = np.sqrt(real_var*((deg_free - 2)/float(deg_free)))
-    edited_real_sd = real_stu_t_std + 1#initial_var_guess #real_sd
+    edited_real_sd = real_stu_t_std**2 #initial_var_guess #real_sd
    #edited_real_sd = real_sd
    print "Clean student t, rasm"
-    t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
+    t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=edited_real_sd)
    stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
    m = GPy.models.GP(X, stu_t_likelihood, kernel6)
    #m['rbf_len'] = 1.5
    #m.constrain_fixed('rbf_v', 1.0898)
-    #m.constrain_fixed('rbf_l', 1.8651)
+    #m.constrain_fixed('rbf_l', 0.2651)
    #m.constrain_fixed('t_noise_std', edited_real_sd)
    #m.constrain_positive('rbf')
-    #m.constrain_positive('t_noise_std')
+    m.constrain_positive('t_noise_std')
    #m.constrain_positive('')
    #m.constrain_bounded('t_noi', 0.001, 10)
    #m.constrain_fixed('t_noi', real_stu_t_std)
-    m.constrain_fixed('white', 0.01)
+    #m.constrain_fixed('white', 0.01)
    #m.constrain_fixed('t_no', 0.01)
    #m['rbf_var'] = 0.20446332
    #m['rbf_leng'] = 0.85776241
@ -179,7 +179,7 @@ def debug_student_t_noise_approx():
    return m
    #print "Clean student t, ncg"
-    #t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
+    #t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=edited_real_sd)
    #stu_t_likelihood = GPy.likelihoods.Laplace(Y, t_distribution, opt='ncg')
    #m = GPy.models.GP(X, stu_t_likelihood, kernel3)
    #m.ensure_default_constraints()
@ -276,7 +276,7 @@ def student_t_approx():
    edited_real_sd = real_sd #initial_var_guess
    print "Clean student t, rasm"
-    t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
+    t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=edited_real_sd)
    stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
    m = GPy.models.GP(X, stu_t_likelihood, kernel6)
    m.ensure_default_constraints()
@ -291,7 +291,7 @@ def student_t_approx():
    plt.title('Student-t rasm clean')
    print "Corrupt student t, rasm"
-    t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
+    t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=edited_real_sd)
    corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='rasm')
    m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel4)
    m.ensure_default_constraints()
@ -308,7 +308,7 @@ def student_t_approx():
    return m
    #print "Clean student t, ncg"
-    #t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
+    #t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=edited_real_sd)
    #stu_t_likelihood = GPy.likelihoods.Laplace(Y, t_distribution, opt='ncg')
    #m = GPy.models.GP(X, stu_t_likelihood, kernel3)
    #m.ensure_default_constraints()
@ -322,7 +322,7 @@ def student_t_approx():
    #plt.title('Student-t ncg clean')
    #print "Corrupt student t, ncg"
-    #t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
+    #t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=edited_real_sd)
    #corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='ncg')
    #m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel5)
    #m.ensure_default_constraints()
@ -337,7 +337,7 @@ def student_t_approx():
    ###with a student t distribution, since it has heavy tails it should work well
-    ###likelihood_function = student_t(deg_free, sigma=real_var)
+    ###likelihood_function = student_t(deg_free, sigma2=real_var)
    ###lap = Laplace(Y, likelihood_function)
    ###cov = kernel.K(X)
    ###lap.fit_full(cov)
--- a/GPy/likelihoods/Laplace.py
+++ b/GPy/likelihoods/Laplace.py
@ -220,10 +220,10 @@ class Laplace(likelihood):
        self.ln_I_KW_det = pddet(np.eye(self.N) + self.W_12*self.K*self.W_12.T)
        #self.ln_I_KW_det = pddet(np.eye(self.N) + np.dot(self.K, self.W))
-        self.ln_z_hat = (- 0.5*self.f_Ki_f
+        #self.ln_z_hat = (- 0.5*self.f_Ki_f
-                         - self.ln_I_KW_det
+                         #- self.ln_I_KW_det
-                         + self.likelihood_function.link_function(self.data, self.f_hat, extra_data=self.extra_data)
+                         #+ self.likelihood_function.link_function(self.data, self.f_hat, extra_data=self.extra_data)
-                         )
+                         #)
        return self._compute_GP_variables()
@ -308,6 +308,8 @@ class Laplace(likelihood):
        step_size = 1
        rs = 0
        i = 0
        #if self.likelihood_function.sigma < 0.001:
            #import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
        while difference > epsilon and i < MAX_ITER and rs < MAX_RESTART:
            W = -self.likelihood_function.d2lik_d2f(self.data, f, extra_data=self.extra_data)
            if not self.likelihood_function.log_concave:
@ -316,8 +318,6 @@ class Laplace(likelihood):
                                    # To cause the posterior to become less certain than the prior and likelihood,
                                    # This is a property only held by non-log-concave likelihoods
            B, L, W_12 = self._compute_B_statistics(K, W)
            #if i > 30:
                #import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
            W_f = W*f
            grad = self.likelihood_function.dlik_df(self.data, f, extra_data=self.extra_data)
--- a/GPy/likelihoods/likelihood_functions.py
+++ b/GPy/likelihoods/likelihood_functions.py
@ -158,26 +158,26 @@ class student_t(likelihood_function):
    dln p(yi|fi)_dfi
    d2ln p(yi|fi)_d2fifj
    """
-    def __init__(self, deg_free, sigma=2):
+    def __init__(self, deg_free, sigma2=2):
        #super(student_t, self).__init__()
        self.v = deg_free
-        self.sigma = sigma
+        self.sigma2 = sigma2
        self.log_concave = False
-        self._set_params(np.asarray(sigma))
+        self._set_params(np.asarray(sigma2))
    def _get_params(self):
-        return np.asarray(self.sigma)
+        return np.asarray(self.sigma2)
    def _get_param_names(self):
-        return ["t_noise_std"]
+        return ["t_noise_std2"]
    def _set_params(self, x):
-        self.sigma = float(x)
+        self.sigma2 = float(x)
    @property
    def variance(self, extra_data=None):
-        return (self.v / float(self.v - 2)) * (self.sigma**2)
+        return (self.v / float(self.v - 2)) * self.sigma2
    def link_function(self, y, f, extra_data=None):
        """link_function $\ln p(y|f)$
@ -193,12 +193,16 @@ class student_t(likelihood_function):
        """
        assert y.shape == f.shape
        e = y - f
        A = gammaln((self.v + 1) * 0.5)
        B = -gammaln(self.v * 0.5)
        C = - 0.5*np.log(self.sigma2 * self.v * np.pi)
        D = (-(self.v + 1)*0.5)*np.log(1 + (e**2)/(self.v*self.sigma2))
        objective = (+ gammaln((self.v + 1) * 0.5)
                     - gammaln(self.v * 0.5)
-                     - 0.5*np.log((self.sigma**2) * self.v * np.pi)
+                     - 0.5*np.log(self.sigma2 * self.v * np.pi)
-                     #- (self.v + 1) * 0.5 * np.log(1 + (((e / self.sigma)**2) / self.v))
+                     + (-(self.v + 1)*0.5)*np.log(1 + ((e**2)/self.sigma2)/np.float(self.v))
                     - (self.v + 1) * 0.5 * np.log(1 + (e**2)/(self.v*(self.sigma**2)))
                    )
        #print "A: {} B: {} C: {} D: {} obj: {}".format(A,B,C,D.sum(),objective.sum())
        return np.sum(objective)
    def dlik_df(self, y, f, extra_data=None):
@ -215,7 +219,7 @@ class student_t(likelihood_function):
        """
        assert y.shape == f.shape
        e = y - f
-        grad = ((self.v + 1) * e) / (self.v * (self.sigma**2) + (e**2))
+        grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
        return grad
    def d2lik_d2f(self, y, f, extra_data=None):
@ -235,7 +239,7 @@ class student_t(likelihood_function):
        """
        assert y.shape == f.shape
        e = y - f
-        hess = ((self.v + 1)*(e**2 - self.v*(self.sigma**2))) / ((((self.sigma**2)*self.v) + e**2)**2)
+        hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / (((self.sigma2*self.v) + e**2)**2)
        return hess
    def d3lik_d3f(self, y, f, extra_data=None):
@ -246,8 +250,8 @@ class student_t(likelihood_function):
        """
        assert y.shape == f.shape
        e = y - f
-        d3lik_d3f = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*(self.sigma**2))) /
+        d3lik_d3f = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
-                       ((e**2 + (self.sigma**2)*self.v)**3)
+                       ((e**2 + self.sigma2*self.v)**3)
                    )
        return d3lik_d3f
@ -262,10 +266,16 @@ class student_t(likelihood_function):
        """
        assert y.shape == f.shape
        e = y - f
-        dlik_dsigma = ( - (1/self.sigma) +
+        #sigma = np.sqrt(self.sigma2)
-                        ((1+self.v)*(e**2))/((self.sigma**3)*self.v*(1 + ((e**2) / ((self.sigma**2)*self.v)) ) )
+        #dlik_dsigma = ( - (1/sigma) +
-                      )
+                        #((1+self.v)*(e**2))/((sigma*self.sigma2)*self.v*(1 + ((e**2) / (self.sigma2*self.v)) ) )
                      #)
        #dlik_dsigma = ( - 1 +
                        #((1+self.v)*(e**2))/((self.sigma2*self.sigma2)*self.v*(1 + ((e**2) / (self.sigma2*self.v)) ) )
                      #)
        #dlik_dsigma = (((self.v + 1)*(e**2))/((e**2) + self.v*(self.sigma**2))) - 1
        #dlik_dsigma = (self.v*((e**2)-self.sigma2))/(sigma*((e**2)+self.sigma2*self.v))
        dlik_dsigma = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
        return dlik_dsigma
    def dlik_df_dstd(self, y, f, extra_data=None):
@ -276,9 +286,11 @@ class student_t(likelihood_function):
        """
        assert y.shape == f.shape
        e = y - f
-        dlik_grad_dsigma = ((-2*self.sigma*self.v*(self.v + 1)*e) #2 might not want to be here?
+        #sigma = np.sqrt(self.sigma2)
-                            / ((self.v*(self.sigma**2) + e**2)**2)
+        #dlik_grad_dsigma = ((-2*sigma*self.v*(self.v + 1)*e) #2 might not want to be here?
-                           )
+                            #/ ((self.v*self.sigma2 + e**2)**2)
                           #)
        dlik_grad_dsigma = (-self.v*(self.v+1)*e)/((self.sigma2*self.v + e**2)**2)
        return dlik_grad_dsigma
    def d2lik_d2f_dstd(self, y, f, extra_data=None):
@ -289,11 +301,15 @@ class student_t(likelihood_function):
        """
        assert y.shape == f.shape
        e = y - f
-        dlik_hess_dsigma = (  (2*self.sigma*self.v*(self.v + 1)*((self.sigma**2)*self.v - 3*(e**2))) /
+        #sigma = np.sqrt(self.sigma2)
-                              ((e**2 + (self.sigma**2)*self.v)**3)
+        #dlik_hess_dsigma = (  (2*sigma*self.v*(self.v + 1)*(self.sigma2*self.v - 3*(e**2))) /
-                           )
+                              #((e**2 + self.sigma2*self.v)**3)
                           #)
        #dlik_hess_dsigma = ( 2*(self.v + 1)*self.v*(self.sigma**2)*((e**2) + (self.v*(self.sigma**2)) - 4*(e**2))
                             #/ ((e**2 + (self.sigma**2)*self.v)**3) )
        dlik_hess_dsigma = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
                              / (self.sigma2*self.v + (e**2))**3
                           )
        return dlik_hess_dsigma
    def _gradients(self, y, f, extra_data=None):
@ -466,3 +482,148 @@ class weibull_survival(likelihood_function):
        hess = (y**self.shape)*np.exp(f)
        return np.squeeze(hess)
 #class gaussian(likelihood_function):
    #"""
    #Gaussian likelihood - this is a test class for approximation schemes
    #"""
    #def __init__(self, variance):
        #self._set_params(np.asarray(variance))
    #def _get_params(self):
        #return np.asarray(self.sigma2)
    #def _get_param_names(self):
        #return ["noise_variance"]
    #def _set_params(self, x):
        #self.variance = float(x)
    #def link_function(self, y, f, extra_data=None):
        #"""link_function $\ln p(y|f)$
        #$$\ln p(y_{i}|f_{i}) = \ln $$
        #:y: data
        #:f: latent variables f
        #:extra_data: extra_data which is not used in student t distribution
        #:returns: float(likelihood evaluated for this point)
        #"""
        #assert y.shape == f.shape
        #e = y - f
        #objective = -0.5*self.D*
        #return np.sum(objective)
    #def dlik_df(self, y, f, extra_data=None):
        #"""
        #Gradient of the link function at y, given f w.r.t f
        #$$\frac{dp(y_{i}|f_{i})}{df} = \frac{(v+1)(y_{i}-f_{i})}{(y_{i}-f_{i})^{2} + \sigma^{2}v}$$
        #:y: data
        #:f: latent variables f
        #:extra_data: extra_data which is not used in student t distribution
        #:returns: gradient of likelihood evaluated at points
        #"""
        #assert y.shape == f.shape
        #e = y - f
        #grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
        #return grad
    #def d2lik_d2f(self, y, f, extra_data=None):
        #"""
        #Hessian at this point (if we are only looking at the link function not the prior) the hessian will be 0 unless i == j
        #i.e. second derivative link_function at y given f f_j  w.r.t f and f_j
        #Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
        #(the distribution for y_{i} depends only on f_{i} not on f_{j!=i}
        #$$\frac{d^{2}p(y_{i}|f_{i})}{d^{3}f} = \frac{(v+1)((y_{i}-f_{i})^{2} - \sigma^{2}v)}{((y_{i}-f_{i})^{2} + \sigma^{2}v)^{2}}$$
        #:y: data
        #:f: latent variables f
        #:extra_data: extra_data which is not used in student t distribution
        #:returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points)
        #"""
        #assert y.shape == f.shape
        #e = y - f
        #hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / (((self.sigma2*self.v) + e**2)**2)
        #return hess
    #def d3lik_d3f(self, y, f, extra_data=None):
        #"""
        #Third order derivative link_function (log-likelihood ) at y given f f_j w.r.t f and f_j
        #$$\frac{d^{3}p(y_{i}|f_{i})}{d^{3}f} = \frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \sigma^{2} v))}{((y_{i} - f_{i}) + \sigma^{2} v)^3}$$
        #"""
        #assert y.shape == f.shape
        #e = y - f
        #d3lik_d3f = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
                       #((e**2 + self.sigma2*self.v)**3)
                    #)
        #return d3lik_d3f
    #def lik_dstd(self, y, f, extra_data=None):
        #"""
        #Gradient of the likelihood (lik) w.r.t sigma parameter (standard deviation)
        #Terms relavent to derivatives wrt sigma are:
        #-log(sqrt(v*pi)*s) -(1/2)*(v + 1)*log(1 + (1/v)*((y-f)/(s))^2))
        #$$\frac{dp(y_{i}|f_{i})}{d\sigma} = -\frac{1}{\sigma} + \frac{(1+v)(y_{i}-f_{i})^2}{\sigma^3 v(1 + \frac{1}{v}(\frac{(y_{i} - f_{i})}{\sigma^2})^2)}$$
        #"""
        #assert y.shape == f.shape
        #e = y - f
        #sigma = np.sqrt(self.sigma2)
        ##dlik_dsigma = ( - (1/sigma) +
                        ##((1+self.v)*(e**2))/((sigma*self.sigma2)*self.v*(1 + ((e**2) / (self.sigma2*self.v)) ) )
                      ##)
        ##dlik_dsigma = ( - 1 +
                        ##((1+self.v)*(e**2))/((self.sigma2*self.sigma2)*self.v*(1 + ((e**2) / (self.sigma2*self.v)) ) )
                      ##)
        ##dlik_dsigma = (((self.v + 1)*(e**2))/((e**2) + self.v*(self.sigma**2))) - 1
        #dlik_dsigma = (self.v*((e**2)-self.sigma2))/(sigma*((e**2)+self.sigma2*self.v))
        #return dlik_dsigma
    #def dlik_df_dstd(self, y, f, extra_data=None):
        #"""
        #Gradient of the dlik_df w.r.t sigma parameter (standard deviation)
        #$$\frac{d}{d\sigma}(\frac{dp(y_{i}|f_{i})}{df}) = \frac{-2\sigma v(v + 1)(y_{i}-f_{i})}{(y_{i}-f_{i})^2 + \sigma^2 v)^2}$$
        #"""
        #assert y.shape == f.shape
        #e = y - f
        #sigma = np.sqrt(self.sigma2)
        #dlik_grad_dsigma = ((-2*sigma*self.v*(self.v + 1)*e) #2 might not want to be here?
                            #/ ((self.v*self.sigma2 + e**2)**2)
                           #)
        #return dlik_grad_dsigma
    #def d2lik_d2f_dstd(self, y, f, extra_data=None):
        #"""
        #Gradient of the hessian (d2lik_d2f) w.r.t sigma parameter (standard deviation)
        #$$\frac{d}{d\sigma}(\frac{d^{2}p(y_{i}|f_{i})}{d^{2}f}) = \frac{2\sigma v(v + 1)(\sigma^2 v - 3(y-f)^2)}{((y-f)^2 + \sigma^2 v)^3}$$
        #"""
        #assert y.shape == f.shape
        #e = y - f
        #sigma = np.sqrt(self.sigma2)
        #dlik_hess_dsigma = (  (2*sigma*self.v*(self.v + 1)*(self.sigma2*self.v - 3*(e**2))) /
                              #((e**2 + self.sigma2*self.v)**3)
                           #)
        ##dlik_hess_dsigma = ( 2*(self.v + 1)*self.v*(self.sigma**2)*((e**2) + (self.v*(self.sigma**2)) - 4*(e**2))
                             ##/ ((e**2 + (self.sigma**2)*self.v)**3) )
        #return dlik_hess_dsigma
    #def _gradients(self, y, f, extra_data=None):
        ##must be listed in same order as 'get_param_names'
        #derivs = ([self.lik_dstd(y, f, extra_data=extra_data)],
                  #[self.dlik_df_dstd(y, f, extra_data=extra_data)],
                  #[self.d2lik_d2f_dstd(y, f, extra_data=extra_data)]
                 #) # lists as we might learn many parameters
        ## ensure we have gradients for every parameter we want to optimize
        #assert len(derivs[0]) == len(self._get_param_names())
        #assert len(derivs[1]) == len(self._get_param_names())
        #assert len(derivs[2]) == len(self._get_param_names())
        #return derivs