mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-08 15:05:15 +02:00
Moved code from gp.py to model.py, as it's model independent, and making it more general allows us to write more simple test code
This commit is contained in:
parent
dc5320194a
commit
6b68110acf
2 changed files with 196 additions and 192 deletions
192
GPy/core/gp.py
192
GPy/core/gp.py
|
|
@ -13,7 +13,6 @@ from GPy.core.parameterization.variational import VariationalPosterior
|
|||
import logging
|
||||
import warnings
|
||||
from GPy.util.normalizer import MeanNorm
|
||||
from ..util.linalg import pdinv
|
||||
logger = logging.getLogger("GP")
|
||||
|
||||
class GP(Model):
|
||||
|
|
@ -603,195 +602,4 @@ class GP(Model):
|
|||
mu_star, var_star = self._raw_predict(x_test)
|
||||
return self.likelihood.log_predictive_density_sampling(y_test, mu_star, var_star, Y_metadata=Y_metadata, num_samples=num_samples)
|
||||
|
||||
def CCD(self):
|
||||
"""
|
||||
Code is based on implementation within GPStuff, INLA and the original Sanchez and Sanchez paper (2005)
|
||||
"""
|
||||
modal_params = self.optimizer_array[:].copy()
|
||||
num_free_params = modal_params.shape[0]
|
||||
|
||||
# Calculate the numerical hessian for *every* parameter
|
||||
H = self.numerical_parameter_hessian()
|
||||
|
||||
try:
|
||||
curv = pdinv(H)[0]
|
||||
except np.linalg.linalg.LinAlgError:
|
||||
print("Hessian is not positive definite, ensure parameters are at their MAP solution by optimizing, if that doesn't work try modifying the step length parameter")
|
||||
return
|
||||
# CCD points are calculated for unit circle, so we take the principle components and scale them such that we have a unit sphere in our parameter space, which we can then map back to parameter space
|
||||
[w, V] = np.linalg.eig(curv)
|
||||
z = (V*np.sqrt(w[None,:])).T
|
||||
|
||||
# Calculate points, we will do just CCD for now
|
||||
|
||||
# First we build the hadamarx matrix from the paper that encodes *all* the points possible in the num_free_params dimensional space.
|
||||
grow = np.sum(num_free_params >= np.array([1, 2, 3, 4, 6, 7, 9, 12, 18, 22, 30, 39, 53, 70, 93]))
|
||||
|
||||
H0 = 1
|
||||
|
||||
#Build the design matrix that holds *every* corner on the num_free_params dimensional hyper-cube (the hadamard matrix), H in the paper
|
||||
for i in range(grow):
|
||||
H0 = np.vstack([np.hstack([H0, H0]),
|
||||
np.hstack([H0, -H0])])
|
||||
|
||||
# For each additional parameter our dimensional space increases, as does the number of CCD points we require to do the approximate integration. CCD is a method of deciding which points should be included and which can be ignored without a significant effect. It would be ideal but too computationally expensive - when we have many parameters - to use all the corners of the hypercube in our integration
|
||||
# See Sanchez and Sanchez (2005) for details. on how the points are chosen
|
||||
walsh_inds = np.array([1, 2, 4, 8, 15, 16, 32, 51, 64, 85, 106, 128, 150, 171, 219, 237, 247, 256, 279, 297, 455, 512, 537, 557, 597, 643, 803, 863, 898, 1024, 1051, 1070, 1112, 1169, 1333, 1345, 1620, 1866, 2048, 2076, 2085, 2158, 2372, 2456, 2618, 2800, 2873, 3127, 3284, 3483, 3557, 3763, 4096, 4125, 4135, 4176, 4435, 4459, 4469, 4497, 4752, 5255, 5732, 5801, 5915, 6100, 6369, 6907, 7069, 8192, 8263, 8351, 8422, 8458, 8571, 8750, 8858, 9124, 9314, 9500, 10026, 10455, 10556, 11778, 11885, 11984, 13548, 14007, 14514, 14965, 15125, 15554, 16384, 16457, 16517, 16609, 16771, 16853, 17022, 17453, 17891, 18073, 18562, 18980, 19030, 19932, 20075, 20745, 21544, 22633, 23200, 24167, 25700, 26360, 26591, 26776, 28443, 28905, 29577, 32705])
|
||||
|
||||
used_walsh_inds = walsh_inds[:num_free_params]
|
||||
|
||||
# For every additional parameters, we get an additional number of points (which as we decide to drop points using Sanchezes rules, won't necessarily grow at the same rate)
|
||||
ccd_points = H0[:, used_walsh_inds]
|
||||
|
||||
# ccd only gives corner points, so lets add an additional one for the centre point
|
||||
ccd_points = np.vstack([np.zeros((1,num_free_params)), ccd_points])
|
||||
|
||||
# Now we add the points on the unit hyper-sphere that arent on the corners
|
||||
for i in range(num_free_params):
|
||||
top_edge = np.zeros(num_free_params)
|
||||
top_edge[i] = np.sqrt(num_free_params)
|
||||
bottom_edge = np.zeros(num_free_params)
|
||||
bottom_edge[i] = -np.sqrt(num_free_params)
|
||||
ccd_points = np.vstack([ccd_points, top_edge, bottom_edge])
|
||||
|
||||
# Find the appropriate scaling such that the edges lie on a boundry of equal density
|
||||
# First find the density at the mode
|
||||
log_marginal = lambda p: -self._objective(p)
|
||||
mode_density = log_marginal(modal_params) # FIXME: We really want to evaluate the log_marginal not the negative objective as it makes more sense, but they are equivalent in this case
|
||||
|
||||
# Treating the posterior over hyperparameters as a standard normal, moving z*sqrt(2) should make the likelihood drop by 1, as z should be acting as a unit vector along the principle components of the posterior.
|
||||
|
||||
scalings = np.ones_like(ccd_points)
|
||||
# The below code makes me feel dirty. Pythonise it!
|
||||
# This is naive scaling, assuming that it is well approximated by a standard normal, in practice you might want to scale in different directions seperately (split normal approximation)
|
||||
for j in range(num_free_params*2):
|
||||
temp = np.zeros((1, num_free_params))
|
||||
if j % 2:
|
||||
direction = -1
|
||||
else:
|
||||
direction = 1
|
||||
ind = np.ceil(j // 2) # This integer division is required, will not work with python 3
|
||||
temp[0, ind] = direction
|
||||
|
||||
# This is the point mapped onto the contour of a unit gaussian, stretched by the eigenvectors over the marginal likelihood
|
||||
contour_point = modal_params + np.sqrt(2)*temp.dot(z)
|
||||
point_density = log_marginal(contour_point)
|
||||
|
||||
if mode_density > point_density:
|
||||
# The scale is based on the amount this point must be scaled in order to be on the contour of the stretched standard normal (stretched by principle components)
|
||||
scale = np.sqrt(1.0 / (mode_density - point_density))
|
||||
else:
|
||||
print("Contour point is higher than mode, must be multimodal or mode not found")
|
||||
scale = 1 # Print a warning and say "dont scale in this direction"
|
||||
|
||||
# Set the scaling for all dimensions where the point is in this direction, when looking at this dimension, clipped.
|
||||
scalings[ccd_points[:, ind]*direction > 0.0, ind] = np.maximum(np.minimum(scale, 10.0), 1/10.0)
|
||||
|
||||
# Make the points *slightly* further out. Aki Vehtari has shown this to be more accurate
|
||||
over_scale = 1.1
|
||||
ccd_points = over_scale*scalings*ccd_points
|
||||
|
||||
# We have the scaled ccd_points, now we need to map them into parameter space by projecting along principle components and shifting to mode
|
||||
param_points = ccd_points.dot(z) + modal_params
|
||||
|
||||
# Evaluate log marginal at all parameter points
|
||||
point_densities = np.ones(param_points.shape[0])*np.nan
|
||||
for point_ind, param_point in enumerate(param_points):
|
||||
point_densities[point_ind] = log_marginal(param_point)
|
||||
|
||||
# Remove nan densities
|
||||
non_nan_densities = np.isfinite(point_densities).flatten()
|
||||
point_densities = point_densities[non_nan_densities]
|
||||
param_points = param_points[non_nan_densities, :]
|
||||
|
||||
point_densities = point_densities - np.max(point_densities) # Why don't we have to deal with this shift?
|
||||
point_densities = np.exp(point_densities)
|
||||
|
||||
if num_free_params > 0:
|
||||
# Each section that the ccd point represents needs an associated weight. Since every point is on the same contour, all weights are the same, except the centre weight
|
||||
point_weights = 1.0/((2.0*np.pi)**(-num_free_params*0.5) * np.exp(-0.5*num_free_params*over_scale**2) * (param_points.shape[0]-1)*over_scale**2)
|
||||
centre_weight = (2*np.pi)**(num_free_params*0.5) * (1 - 1.0/over_scale**2)
|
||||
# normalize
|
||||
point_weights = point_weights / centre_weight
|
||||
centre_weight = 1.0
|
||||
|
||||
point_densities[1:] *= point_weights
|
||||
|
||||
# Normalize density
|
||||
point_densities /= point_densities.sum()
|
||||
|
||||
# Remove small density points
|
||||
non_small_densities = (point_densities > 0.01/point_densities.size).flatten()
|
||||
point_densities = point_densities[non_small_densities]
|
||||
param_points = param_points[non_small_densities, :]
|
||||
point_densities /= point_densities.sum()
|
||||
|
||||
# for dimension_ind, dimension in enumerate(num_free_params):
|
||||
# # Due to the way we built the ccd_points up, every other one changes to the next direction
|
||||
# ind = np.ceil(dimension_ind / 2.0)
|
||||
# if dimension_ind % 2:
|
||||
# direction = 1
|
||||
# else:
|
||||
# direction = -1
|
||||
# print(z[param_dir,:])
|
||||
# for direction in [1, -1]:
|
||||
# for parameter in range(num_free_params):
|
||||
#If the log likelihood doesn't even drop when we move away from the mode, then we havent found the modal points, or there are multiple modes in the posterior (bad)
|
||||
# upper_density = self._objective(modal_params + np.sqrt(2)*np.dot(direction,z))
|
||||
|
||||
# We want to work out the scaling for every point, so we first figure out which way the vector is scaling from the mode, and scale in that direction
|
||||
|
||||
|
||||
# fig = plt.figure()
|
||||
# if num_free_params == 3:
|
||||
# ax = fig.add_subplot(111, projection='3d')
|
||||
# ax.scatter(ccd_points[:,0], ccd_points[:,1], ccd_points[:,2])
|
||||
# elif num_free_params == 2:
|
||||
# plt.scatter(ccd_points[:,0], ccd_points[:,1])
|
||||
# plt.show()
|
||||
import paramz
|
||||
|
||||
transformed_points = param_points.copy()
|
||||
|
||||
# Need to transform the points to the space of parameters again
|
||||
f = np.ones(self.size).astype(bool)
|
||||
f[self.constraints[paramz.transformations.__fixed__]] = paramz.transformations.FIXED
|
||||
new_t_points = [c.f(transformed_points[:, ind[f[ind]]]) for c, ind in self.constraints.items() if c != paramz.transformations.__fixed__][0]
|
||||
|
||||
return new_t_points, point_densities
|
||||
|
||||
def numerical_parameter_hessian(self, step_length=1e-3):
|
||||
"""
|
||||
Calculate the numerical hessian for the posterior of the parameters
|
||||
|
||||
Often this will want to be done at the modal values of the parameters so the optimizer should be called first
|
||||
|
||||
H = [d^2L_dp1dp1, d^2L_dp1dp2, d^2L_dp1dp3, ...;
|
||||
d^2L_dp2dp1, d^2L_dp2dp2, d^2L_dp3dp3, ...;
|
||||
...
|
||||
]
|
||||
Where the vector p is all of the parameters in param_array, not just parameters of this model class itself
|
||||
"""
|
||||
# We use optimizer array as we want to ignore any parameters with fixed values
|
||||
num_child_params = self.optimizer_array.shape[0]
|
||||
|
||||
H = np.eye(num_child_params)
|
||||
|
||||
initial_params = self.optimizer_array[:].copy()
|
||||
# Calculate gradient of marginal likelihood moving one at a time
|
||||
for i in range(num_child_params):
|
||||
# Pick out the e vector with one in the parameter that we wish to move
|
||||
e = H[:,i]
|
||||
# Get parameters required to evaluate by nudging left and right
|
||||
left = initial_params - step_length*e
|
||||
right = initial_params + step_length*e
|
||||
|
||||
left_grad = self._grads(left)
|
||||
right_grad = self._grads(right)
|
||||
|
||||
finite_diff = (right_grad - left_grad) / (2*step_length)
|
||||
|
||||
#Put in the correct row of the hessian
|
||||
H[:,i] = finite_diff
|
||||
|
||||
return H
|
||||
|
|
|
|||
|
|
@ -3,6 +3,9 @@
|
|||
from .parameterization.priorizable import Priorizable
|
||||
from paramz import Model as ParamzModel
|
||||
|
||||
import numpy as np
|
||||
from ..util.linalg import pdinv
|
||||
|
||||
class Model(ParamzModel, Priorizable):
|
||||
|
||||
def __init__(self, name):
|
||||
|
|
@ -46,3 +49,196 @@ class Model(ParamzModel, Priorizable):
|
|||
probabilistic, just return your *negative* gradient here!
|
||||
"""
|
||||
return -(self._log_likelihood_gradients() + self._log_prior_gradients())
|
||||
|
||||
def CCD(self):
|
||||
"""
|
||||
Code is based on implementation within GPStuff, INLA and the original Sanchez and Sanchez paper (2005)
|
||||
"""
|
||||
modal_params = self.optimizer_array[:].copy()
|
||||
num_free_params = modal_params.shape[0]
|
||||
|
||||
# Calculate the numerical hessian for *every* parameter
|
||||
H = self.numerical_parameter_hessian()
|
||||
|
||||
try:
|
||||
curv = pdinv(H)[0]
|
||||
except np.linalg.linalg.LinAlgError:
|
||||
print("Hessian is not positive definite, ensure parameters are at their MAP solution by optimizing, if that doesn't work try modifying the step length parameter")
|
||||
return
|
||||
# CCD points are calculated for unit circle, so we take the principle components and scale them such that we have a unit sphere in our parameter space, which we can then map back to parameter space
|
||||
[w, V] = np.linalg.eig(curv)
|
||||
z = (V*np.sqrt(w[None,:])).T
|
||||
|
||||
# Calculate points, we will do just CCD for now
|
||||
|
||||
# First we build the hadamarx matrix from the paper that encodes *all* the points possible in the num_free_params dimensional space.
|
||||
grow = np.sum(num_free_params >= np.array([1, 2, 3, 4, 6, 7, 9, 12, 18, 22, 30, 39, 53, 70, 93]))
|
||||
|
||||
H0 = 1
|
||||
|
||||
#Build the design matrix that holds *every* corner on the num_free_params dimensional hyper-cube (the hadamard matrix), H in the paper
|
||||
for i in range(grow):
|
||||
H0 = np.vstack([np.hstack([H0, H0]),
|
||||
np.hstack([H0, -H0])])
|
||||
|
||||
# For each additional parameter our dimensional space increases, as does the number of CCD points we require to do the approximate integration. CCD is a method of deciding which points should be included and which can be ignored without a significant effect. It would be ideal but too computationally expensive - when we have many parameters - to use all the corners of the hypercube in our integration
|
||||
# See Sanchez and Sanchez (2005) for details. on how the points are chosen
|
||||
walsh_inds = np.array([1, 2, 4, 8, 15, 16, 32, 51, 64, 85, 106, 128, 150, 171, 219, 237, 247, 256, 279, 297, 455, 512, 537, 557, 597, 643, 803, 863, 898, 1024, 1051, 1070, 1112, 1169, 1333, 1345, 1620, 1866, 2048, 2076, 2085, 2158, 2372, 2456, 2618, 2800, 2873, 3127, 3284, 3483, 3557, 3763, 4096, 4125, 4135, 4176, 4435, 4459, 4469, 4497, 4752, 5255, 5732, 5801, 5915, 6100, 6369, 6907, 7069, 8192, 8263, 8351, 8422, 8458, 8571, 8750, 8858, 9124, 9314, 9500, 10026, 10455, 10556, 11778, 11885, 11984, 13548, 14007, 14514, 14965, 15125, 15554, 16384, 16457, 16517, 16609, 16771, 16853, 17022, 17453, 17891, 18073, 18562, 18980, 19030, 19932, 20075, 20745, 21544, 22633, 23200, 24167, 25700, 26360, 26591, 26776, 28443, 28905, 29577, 32705])
|
||||
|
||||
used_walsh_inds = walsh_inds[:num_free_params]
|
||||
|
||||
# For every additional parameters, we get an additional number of points (which as we decide to drop points using Sanchezes rules, won't necessarily grow at the same rate)
|
||||
ccd_points = H0[:, used_walsh_inds]
|
||||
|
||||
# ccd only gives corner points, so lets add an additional one for the centre point
|
||||
ccd_points = np.vstack([np.zeros((1,num_free_params)), ccd_points])
|
||||
|
||||
# Now we add the points on the unit hyper-sphere that arent on the corners
|
||||
for i in range(num_free_params):
|
||||
top_edge = np.zeros(num_free_params)
|
||||
top_edge[i] = np.sqrt(num_free_params)
|
||||
bottom_edge = np.zeros(num_free_params)
|
||||
bottom_edge[i] = -np.sqrt(num_free_params)
|
||||
ccd_points = np.vstack([ccd_points, top_edge, bottom_edge])
|
||||
|
||||
# Find the appropriate scaling such that the edges lie on a boundry of equal density
|
||||
# First find the density at the mode
|
||||
log_marginal = lambda p: -self._objective(p)
|
||||
mode_density = log_marginal(modal_params) # FIXME: We really want to evaluate the log_marginal not the negative objective as it makes more sense, but they are equivalent in this case
|
||||
|
||||
# Treating the posterior over hyperparameters as a standard normal, moving z*sqrt(2) should make the likelihood drop by 1, as z should be acting as a unit vector along the principle components of the posterior.
|
||||
|
||||
scalings = np.ones_like(ccd_points)
|
||||
# The below code makes me feel dirty. Pythonise it!
|
||||
# This is naive scaling, assuming that it is well approximated by a standard normal, in practice you might want to scale in different directions seperately (split normal approximation)
|
||||
for j in range(num_free_params*2):
|
||||
temp = np.zeros((1, num_free_params))
|
||||
if j % 2:
|
||||
direction = -1
|
||||
else:
|
||||
direction = 1
|
||||
ind = np.ceil(j // 2) # This integer division is required, will not work with python 3
|
||||
temp[0, ind] = direction
|
||||
|
||||
# This is the point mapped onto the contour of a unit gaussian, stretched by the eigenvectors over the marginal likelihood
|
||||
contour_point = modal_params + np.sqrt(2)*temp.dot(z)
|
||||
point_density = log_marginal(contour_point)
|
||||
|
||||
if mode_density > point_density:
|
||||
# The scale is based on the amount this point must be scaled in order to be on the contour of the stretched standard normal (stretched by principle components)
|
||||
scale = np.sqrt(1.0 / (mode_density - point_density))
|
||||
else:
|
||||
print("Contour point is higher than mode, must be multimodal or mode not found")
|
||||
scale = 1 # Print a warning and say "dont scale in this direction"
|
||||
|
||||
# Set the scaling for all dimensions where the point is in this direction, when looking at this dimension, clipped.
|
||||
scalings[ccd_points[:, ind]*direction > 0.0, ind] = np.maximum(np.minimum(scale, 10.0), 1/10.0)
|
||||
|
||||
# Make the points *slightly* further out. Aki Vehtari has shown this to be more accurate
|
||||
over_scale = 1.1
|
||||
ccd_points = over_scale*scalings*ccd_points
|
||||
|
||||
# We have the scaled ccd_points, now we need to map them into parameter space by projecting along principle components and shifting to mode
|
||||
param_points = ccd_points.dot(z) + modal_params
|
||||
|
||||
# Evaluate log marginal at all parameter points
|
||||
point_densities = np.ones(param_points.shape[0])*np.nan
|
||||
for point_ind, param_point in enumerate(param_points):
|
||||
point_densities[point_ind] = log_marginal(param_point)
|
||||
|
||||
# Remove nan densities
|
||||
non_nan_densities = np.isfinite(point_densities).flatten()
|
||||
point_densities = point_densities[non_nan_densities]
|
||||
param_points = param_points[non_nan_densities, :]
|
||||
|
||||
point_densities = point_densities - np.max(point_densities) # Why don't we have to deal with this shift?
|
||||
point_densities = np.exp(point_densities)
|
||||
|
||||
if num_free_params > 0:
|
||||
# Each section that the ccd point represents needs an associated weight. Since every point is on the same contour, all weights are the same, except the centre weight
|
||||
point_weights = 1.0/((2.0*np.pi)**(-num_free_params*0.5) * np.exp(-0.5*num_free_params*over_scale**2) * (param_points.shape[0]-1)*over_scale**2)
|
||||
centre_weight = (2*np.pi)**(num_free_params*0.5) * (1 - 1.0/over_scale**2)
|
||||
# normalize
|
||||
point_weights = point_weights / centre_weight
|
||||
centre_weight = 1.0
|
||||
|
||||
point_densities[1:] *= point_weights
|
||||
|
||||
# Normalize density
|
||||
point_densities /= point_densities.sum()
|
||||
|
||||
# Remove small density points
|
||||
non_small_densities = (point_densities > 0.01/point_densities.size).flatten()
|
||||
point_densities = point_densities[non_small_densities]
|
||||
param_points = param_points[non_small_densities, :]
|
||||
point_densities /= point_densities.sum()
|
||||
|
||||
# for dimension_ind, dimension in enumerate(num_free_params):
|
||||
# # Due to the way we built the ccd_points up, every other one changes to the next direction
|
||||
# ind = np.ceil(dimension_ind / 2.0)
|
||||
# if dimension_ind % 2:
|
||||
# direction = 1
|
||||
# else:
|
||||
# direction = -1
|
||||
# print(z[param_dir,:])
|
||||
# for direction in [1, -1]:
|
||||
# for parameter in range(num_free_params):
|
||||
#If the log likelihood doesn't even drop when we move away from the mode, then we havent found the modal points, or there are multiple modes in the posterior (bad)
|
||||
# upper_density = self._objective(modal_params + np.sqrt(2)*np.dot(direction,z))
|
||||
|
||||
# We want to work out the scaling for every point, so we first figure out which way the vector is scaling from the mode, and scale in that direction
|
||||
|
||||
|
||||
# fig = plt.figure()
|
||||
# if num_free_params == 3:
|
||||
# ax = fig.add_subplot(111, projection='3d')
|
||||
# ax.scatter(ccd_points[:,0], ccd_points[:,1], ccd_points[:,2])
|
||||
# elif num_free_params == 2:
|
||||
# plt.scatter(ccd_points[:,0], ccd_points[:,1])
|
||||
# plt.show()
|
||||
import paramz
|
||||
|
||||
transformed_points = param_points.copy()
|
||||
|
||||
# Need to transform the points to the space of parameters again
|
||||
f = np.ones(self.size).astype(bool)
|
||||
f[self.constraints[paramz.transformations.__fixed__]] = paramz.transformations.FIXED
|
||||
new_t_points = [c.f(transformed_points[:, ind[f[ind]]]) for c, ind in self.constraints.items() if c != paramz.transformations.__fixed__][0]
|
||||
|
||||
return new_t_points, point_densities
|
||||
|
||||
def numerical_parameter_hessian(self, step_length=1e-3):
|
||||
"""
|
||||
Calculate the numerical hessian for the posterior of the parameters
|
||||
|
||||
Often this will want to be done at the modal values of the parameters so the optimizer should be called first
|
||||
|
||||
H = [d^2L_dp1dp1, d^2L_dp1dp2, d^2L_dp1dp3, ...;
|
||||
d^2L_dp2dp1, d^2L_dp2dp2, d^2L_dp3dp3, ...;
|
||||
...
|
||||
]
|
||||
Where the vector p is all of the parameters in param_array, not just parameters of this model class itself
|
||||
"""
|
||||
# We use optimizer array as we want to ignore any parameters with fixed values
|
||||
num_child_params = self.optimizer_array.shape[0]
|
||||
|
||||
H = np.eye(num_child_params)
|
||||
|
||||
initial_params = self.optimizer_array[:].copy()
|
||||
# Calculate gradient of marginal likelihood moving one at a time
|
||||
for i in range(num_child_params):
|
||||
# Pick out the e vector with one in the parameter that we wish to move
|
||||
e = H[:,i]
|
||||
# Get parameters required to evaluate by nudging left and right
|
||||
left = initial_params - step_length*e
|
||||
right = initial_params + step_length*e
|
||||
|
||||
left_grad = self._grads(left)
|
||||
right_grad = self._grads(right)
|
||||
|
||||
finite_diff = (right_grad - left_grad) / (2*step_length)
|
||||
|
||||
#Put in the correct row of the hessian
|
||||
H[:,i] = finite_diff
|
||||
|
||||
return H
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue