The Levenberg Marquardt algorithm is a modification of the Gauss Newton algorithm and is a fairly widely used method for solving nonlinear regression problems. While the Levenberg Marquardt algorithm tends to converge somewhat slower than the Gauss Newton algorithm, it usually has far fewer problems with divergence and instability.
The Levenberg Marquardt algorithm uses the equation shown below.
(J TJ +λ diag(J TJ))(P1 – P0) = J T Ep0
Where the variable definitions for this equation are the same as in the previous post on the Gauss Newton algorithm.
The Levenberg Marquardt algorithm works by starting with an initial guess of the parameters. These are used as P0. The residual error and the Jacobian are then solved for. The equation above is then used to solve for P1. At this new P1 the residual error is computed. If the new residual error is greater than the original residual error, then the new P1 is rejected, λ is increased, and a new P1 is calculated. If the new residual error is less than the starting residual error, then the new P1 values are accepted and λ is decreased. The new accepted P1 becomes the new P0 and the algorithm is repeated until it either converges or goes unstable.
The code for the Levenberg Marquardt class is shown at the bottom of this post.
The λ factor used in this implementation of the algorithm is similar to that in the Wikipedia article, with λ0 set to 100 and v to 10.
There are two constructors for the LevenbergMarquardt class. The first takes the function to be regressed, an array of regression parameters, a Parameter array of the independent variables, a double array of the data, and an integer that indicates the number of points to use when numerically computing the derivatives in the Jacobian. The second constructor is similar to the first except that it defaults the number of parameters to use when computing the derivatives to 3. The constructor for this class allocates memory for the Jacobian, residual error, and initial regression parameter values that are stored in matrix fields. It also creates the an instance of the derivatives class that will be used to compute the Jacobian.
The Iterate function invokes an iteration of the Levenberg Marquardt algorithm. This function first iterates through the data points and computes the Jacobian matrix using numerical partial derivatives. The Levenberg Marquardt equation is then used to solve for the next set of parameters and the model parameter values are then set to these values.
using System;
using System.Diagnostics;
using NumericalMethods.FourthBlog;
namespace NumericalMethods
{
public class LevenbergMarquardt
{
private Matrix _jacobian;
private Matrix _residuals;
private Matrix _regressionParameters0;
private Derivatives _derivatives;
private Parameter[] _regressionParameters;
private Parameter[] _observedParameters;
private Func<double> _regressionFunction;
private double[,] _data;
private double _l0 = 100.0;
private double _v = 10.0;
public LevenbergMarquardt(Func<double> regressionFunction, Parameter[] regressionParameters, Parameter[] observedParameters, double[,] data, int numberOfDerivativePoints)
{
Debug.Assert(data.GetLength(0) == observedParameters.Length + 1);
_data = data;
_observedParameters = observedParameters;
_regressionParameters = regressionParameters;
_regressionFunction = regressionFunction;
int numberOfParameters = _regressionParameters.Length;
int numberOfPoints = data.GetLength(1);
_derivatives = new Derivatives(numberOfDerivativePoints);
_jacobian = new Matrix(numberOfPoints, numberOfParameters);
_residuals = new Matrix(numberOfPoints, 1);
_regressionParameters0 = new Matrix(numberOfParameters, 1);
}
public LevenbergMarquardt(Func<double> function, Parameter[] regressionParameters, Parameter[] observedParameters, double[,] data) :
this(function, regressionParameters, observedParameters, data, 3)
{
}
public void Iterate()
{
int numberOfPoints = _data.GetLength(1);
int numberOfParameters = _regressionParameters.Length;
double currentResidual = 0.0;
for (int i = 0; i < numberOfPoints; i++)
{
for (int j = 0; j < _observedParameters.Length; j++)
{
_observedParameters[j].Value = _data[j, i];
}
double functionValue = _regressionFunction();
double residual = _data[_observedParameters.Length, i] - functionValue;
_residuals[i, 0] = residual;
currentResidual += residual * residual;
for (int j = 0; j < numberOfParameters; j++)
{
_jacobian[i, j] = _derivatives.ComputePartialDerivative(_regressionFunction, _regressionParameters[j], 1, functionValue);
}
}
for (int i = 0; i < numberOfParameters; i++)
{
_regressionParameters0[i, 0] = _regressionParameters[i];
}
Matrix jacobianTranspose = _jacobian.Transpose();
Matrix jacobianTransposeResiduals = jacobianTranspose * _residuals;
Matrix jacobianTransposeJacobian = jacobianTranspose * _jacobian;
Matrix jacobianTransposeJacobianDiagnol = new Matrix(jacobianTransposeJacobian.RowCount, jacobianTransposeJacobian.RowCount);
for (int i = 0; i < jacobianTransposeJacobian.RowCount; i++)
{
jacobianTransposeJacobianDiagnol[i, i] = jacobianTransposeJacobian[i, i];
}
Matrix delta = null;
Matrix newRegressionParameters = null;
double newResidual = currentResidual + 1.0;
_l0 /= _v;
while (newResidual > currentResidual)
{
newResidual = 0.0;
_l0 *= _v;
Matrix matLHS = jacobianTransposeJacobian + _l0 * jacobianTransposeJacobianDiagnol;
delta = matLHS.SolveFor(jacobianTransposeResiduals); ;
newRegressionParameters = _regressionParameters0 + delta;
for (int i = 0; i < numberOfParameters; i++)
{
_regressionParameters[i].Value = newRegressionParameters[i, 0];
}
for (int i = 0; i < numberOfPoints; i++)
{
for (int j = 0; j < _observedParameters.Length; j++)
{
_observedParameters[j].Value = _data[j, i];
}
double functionValue = _regressionFunction();
double residual = _data[_observedParameters.Length, i] - functionValue;
newResidual += residual * residual;
}
}
_l0 /= _v;
}
}
}