From ba2a9cce037a23a5f3c40c772687322c0cdd750a Mon Sep 17 00:00:00 2001
From: Thomas Capricelli <orzel@freehackers.org>
Date: Tue, 26 Jan 2010 07:36:15 +0100
Subject: some more eigenization

---
 .../NonLinearOptimization/HybridNonLinearSolver.h  | 132 ++++-----------------
 .../Eigen/src/NonLinearOptimization/dogleg.h       |  38 +++---
 2 files changed, 39 insertions(+), 131 deletions(-)

(limited to 'unsupported/Eigen/src')
diff --git a/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h b/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
index 3ad901391..ee5b63c44 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
@@ -186,7 +186,6 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveInit(
     njev = 0;
 
     /*     check the input parameters for errors. */
-
     if (n <= 0 || parameters.xtol < 0. || parameters.maxfev <= 0 || parameters.factor <= 0. )
         return ImproperInputParameters;
     if (mode == 2)
@@ -196,14 +195,12 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveInit(
 
     /*     evaluate the function at the starting point */
     /*     and calculate its norm. */
-
     nfev = 1;
     if ( functor(x, fvec) < 0)
         return UserAksed;
     fnorm = fvec.stableNorm();
 
     /*     initialize iteration counter and monitors. */
-
     iter = 1;
     ncsuc = 0;
     ncfail = 0;
@@ -224,7 +221,6 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(
     jeval = true;
 
     /* calculate the jacobian matrix. */
-
     if ( functor.df(x, fjac) < 0)
         return UserAksed;
     ++njev;
@@ -235,11 +231,8 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(
     /* to the norms of the columns of the initial jacobian. */
     if (iter == 1) {
         if (mode != 2)
-            for (j = 0; j < n; ++j) {
-                diag[j] = wa2[j];
-                if (wa2[j] == 0.)
-                    diag[j] = 1.;
-            }
+            for (j = 0; j < n; ++j)
+                diag[j] = (wa2[j]==0.) ? 1. : wa2[j];
 
         /* on the first iteration, calculate the norm of the scaled x */
         /* and initialize the step bound delta. */
@@ -260,7 +253,6 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(
     for(int ii=0; ii< fjac.cols(); ii++) fjac.col(ii).segment(ii+1, fjac.rows()-ii-1) *= fjac(ii,ii); // rescale vectors
 
     /* form (q transpose)*fvec and store in qtf. */
-
     qtf = fvec;
     for (j = 0; j < n; ++j)
         if (fjac(j,j) != 0.) {
@@ -273,76 +265,54 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(
         }
 
     /* copy the triangular factor of the qr factorization into r. */
-
     R = qrfac.matrixQR();
-    sing = false;
-    for (j = 0; j < n; ++j)
-        if (wa1[j] == 0.) sing = true;
+    sing = wa1.cwiseAbs().minCoeff()==0.;
 
     /* accumulate the orthogonal factor in fjac. */
     ei_qform<Scalar>(n, n, fjac.data(), fjac.rows(), wa1.data());
 
     /* rescale if necessary. */
-
-    /* Computing MAX */
     if (mode != 2)
         diag = diag.cwiseMax(wa2);
 
-    /* beginning of the inner loop. */
-
     while (true) {
-
         /* determine the direction p. */
-
         ei_dogleg<Scalar>(R, diag, qtf, delta, wa1);
 
         /* store the direction p and x + p. calculate the norm of p. */
-
         wa1 = -wa1;
         wa2 = x + wa1;
         wa3 = diag.cwiseProduct(wa1);
         pnorm = wa3.stableNorm();
 
         /* on the first iteration, adjust the initial step bound. */
-
         if (iter == 1)
             delta = std::min(delta,pnorm);
 
         /* evaluate the function at x + p and calculate its norm. */
-
         if ( functor(wa2, wa4) < 0)
             return UserAksed;
         ++nfev;
         fnorm1 = wa4.stableNorm();
 
         /* compute the scaled actual reduction. */
-
         actred = -1.;
         if (fnorm1 < fnorm) /* Computing 2nd power */
             actred = 1. - ei_abs2(fnorm1 / fnorm);
 
         /* compute the scaled predicted reduction. */
-
-        for (i = 0; i < n; ++i) {
-            sum = 0.;
-            for (j = i; j < n; ++j)
-                sum += R(i,j) * wa1[j];
-            wa3[i] = qtf[i] + sum;
-        }
+        wa3 = R.template triangularView<Upper>()*wa1 + qtf;
         temp = wa3.stableNorm();
         prered = 0.;
         if (temp < fnorm) /* Computing 2nd power */
             prered = 1. - ei_abs2(temp / fnorm);
 
-        /* compute the ratio of the actual to the predicted */
-        /* reduction. */
-
+        /* compute the ratio of the actual to the predicted reduction. */
         ratio = 0.;
         if (prered > 0.)
             ratio = actred / prered;
 
         /* update the step bound. */
-
         if (ratio < Scalar(.1)) {
             ncsuc = 0;
             ++ncfail;
@@ -350,7 +320,7 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(
         } else {
             ncfail = 0;
             ++ncsuc;
-            if (ratio >= Scalar(.5) || ncsuc > 1) /* Computing MAX */
+            if (ratio >= Scalar(.5) || ncsuc > 1)
                 delta = std::max(delta, pnorm / Scalar(.5));
             if (ei_abs(ratio - 1.) <= Scalar(.1)) {
                 delta = pnorm / Scalar(.5);
@@ -358,7 +328,6 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(
         }
 
         /* test for successful iteration. */
-
         if (ratio >= Scalar(1e-4)) {
             /* successful iteration. update x, fvec, and their norms. */
             x = wa2;
@@ -370,7 +339,6 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(
         }
 
         /* determine the progress of the iteration. */
-
         ++nslow1;
         if (actred >= Scalar(.001))
             nslow1 = 0;
@@ -380,12 +348,10 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(
             nslow2 = 0;
 
         /* test for convergence. */
-
         if (delta <= parameters.xtol * xnorm || fnorm == 0.)
             return RelativeErrorTooSmall;
 
         /* tests for termination and stringent tolerances. */
-
         if (nfev >= parameters.maxfev)
             return TooManyFunctionEvaluation;
         if (Scalar(.1) * std::max(Scalar(.1) * delta, pnorm) <= epsilon<Scalar>() * xnorm)
@@ -396,37 +362,27 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(
             return NotMakingProgressIterations;
 
         /* criterion for recalculating jacobian. */
-
         if (ncfail == 2)
             break; // leave inner loop and go for the next outer loop iteration
 
         /* calculate the rank one modification to the jacobian */
         /* and update qtf if necessary. */
-
-        for (j = 0; j < n; ++j) {
-            sum = wa4.dot(fjac.col(j));
-            wa2[j] = (sum - wa3[j]) / pnorm;
-            wa1[j] = diag[j] * (diag[j] * wa1[j] / pnorm);
-            if (ratio >= Scalar(1e-4))
-                qtf[j] = sum;
-        }
+        wa1 = diag.cwiseProduct( diag.cwiseProduct(wa1)/pnorm );
+        wa2 = fjac.transpose() * wa4;
+        if (ratio >= Scalar(1e-4))
+            qtf = wa2;
+        wa2 = (wa2-wa3)/pnorm;
 
         /* compute the qr factorization of the updated jacobian. */
-
         ei_r1updt<Scalar>(n, n, R, wa1.data(), wa2.data(), wa3.data(), &sing);
         ei_r1mpyq<Scalar>(n, n, fjac.data(), fjac.rows(), wa2.data(), wa3.data());
         ei_r1mpyq<Scalar>(1, n, qtf.data(), 1, wa2.data(), wa3.data());
 
-        /* end of the inner loop. */
-
         jeval = false;
     }
-    /* end of the outer loop. */
-
     return Running;
 }
 
-
 template<typename FunctorType, typename Scalar>
 typename HybridNonLinearSolver<FunctorType,Scalar>::Status
 HybridNonLinearSolver<FunctorType,Scalar>::solve(
@@ -493,7 +449,6 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffInit(
     njev = 0;
 
     /*     check the input parameters for errors. */
-
     if (n <= 0 || parameters.xtol < 0. || parameters.maxfev <= 0 || parameters.nb_of_subdiagonals< 0 || parameters.nb_of_superdiagonals< 0 || parameters.factor <= 0. )
         return ImproperInputParameters;
     if (mode == 2)
@@ -503,14 +458,12 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffInit(
 
     /*     evaluate the function at the starting point */
     /*     and calculate its norm. */
-
     nfev = 1;
     if ( functor(x, fvec) < 0)
         return UserAksed;
     fnorm = fvec.stableNorm();
 
     /*     initialize iteration counter and monitors. */
-
     iter = 1;
     ncsuc = 0;
     ncfail = 0;
@@ -544,11 +497,8 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(
     /* to the norms of the columns of the initial jacobian. */
     if (iter == 1) {
         if (mode != 2)
-            for (j = 0; j < n; ++j) {
-                diag[j] = wa2[j];
-                if (wa2[j] == 0.)
-                    diag[j] = 1.;
-            }
+            for (j = 0; j < n; ++j)
+                diag[j] = (wa2[j]==0.) ? 1. : wa2[j];
 
         /* on the first iteration, calculate the norm of the scaled x */
         /* and initialize the step bound delta. */
@@ -569,7 +519,6 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(
     for(int ii=0; ii< fjac.cols(); ii++) fjac.col(ii).segment(ii+1, fjac.rows()-ii-1) *= fjac(ii,ii); // rescale vectors
 
     /* form (q transpose)*fvec and store in qtf. */
-
     qtf = fvec;
     for (j = 0; j < n; ++j)
         if (fjac(j,j) != 0.) {
@@ -583,74 +532,53 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(
 
     /* copy the triangular factor of the qr factorization into r. */
     R = qrfac.matrixQR();
-    sing = false;
-    for (j = 0; j < n; ++j)
-        if (wa1[j] == 0.) sing = true;
+    sing = wa1.cwiseAbs().minCoeff()==0.;
 
     /* accumulate the orthogonal factor in fjac. */
     ei_qform<Scalar>(n, n, fjac.data(), fjac.rows(), wa1.data());
 
     /* rescale if necessary. */
-
-    /* Computing MAX */
     if (mode != 2)
         diag = diag.cwiseMax(wa2);
 
-    /* beginning of the inner loop. */
-
     while (true) {
-
         /* determine the direction p. */
-
         ei_dogleg<Scalar>(R, diag, qtf, delta, wa1);
 
         /* store the direction p and x + p. calculate the norm of p. */
-
         wa1 = -wa1;
         wa2 = x + wa1;
         wa3 = diag.cwiseProduct(wa1);
         pnorm = wa3.stableNorm();
 
         /* on the first iteration, adjust the initial step bound. */
-
         if (iter == 1)
             delta = std::min(delta,pnorm);
 
         /* evaluate the function at x + p and calculate its norm. */
-
         if ( functor(wa2, wa4) < 0)
             return UserAksed;
         ++nfev;
         fnorm1 = wa4.stableNorm();
 
         /* compute the scaled actual reduction. */
-
         actred = -1.;
         if (fnorm1 < fnorm) /* Computing 2nd power */
             actred = 1. - ei_abs2(fnorm1 / fnorm);
 
         /* compute the scaled predicted reduction. */
-
-        for (i = 0; i < n; ++i) {
-            sum = 0.;
-            for (j = i; j < n; ++j)
-                sum += R(i,j) * wa1[j];
-            wa3[i] = qtf[i] + sum;
-        }
+        wa3 = R.template triangularView<Upper>()*wa1 + qtf;
         temp = wa3.stableNorm();
         prered = 0.;
         if (temp < fnorm) /* Computing 2nd power */
             prered = 1. - ei_abs2(temp / fnorm);
 
-        /* compute the ratio of the actual to the predicted */
-        /* reduction. */
-
+        /* compute the ratio of the actual to the predicted reduction. */
         ratio = 0.;
         if (prered > 0.)
             ratio = actred / prered;
 
         /* update the step bound. */
-
         if (ratio < Scalar(.1)) {
             ncsuc = 0;
             ++ncfail;
@@ -658,7 +586,7 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(
         } else {
             ncfail = 0;
             ++ncsuc;
-            if (ratio >= Scalar(.5) || ncsuc > 1) /* Computing MAX */
+            if (ratio >= Scalar(.5) || ncsuc > 1)
                 delta = std::max(delta, pnorm / Scalar(.5));
             if (ei_abs(ratio - 1.) <= Scalar(.1)) {
                 delta = pnorm / Scalar(.5);
@@ -666,7 +594,6 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(
         }
 
         /* test for successful iteration. */
-
         if (ratio >= Scalar(1e-4)) {
             /* successful iteration. update x, fvec, and their norms. */
             x = wa2;
@@ -678,7 +605,6 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(
         }
 
         /* determine the progress of the iteration. */
-
         ++nslow1;
         if (actred >= Scalar(.001))
             nslow1 = 0;
@@ -688,12 +614,10 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(
             nslow2 = 0;
 
         /* test for convergence. */
-
         if (delta <= parameters.xtol * xnorm || fnorm == 0.)
             return RelativeErrorTooSmall;
 
         /* tests for termination and stringent tolerances. */
-
         if (nfev >= parameters.maxfev)
             return TooManyFunctionEvaluation;
         if (Scalar(.1) * std::max(Scalar(.1) * delta, pnorm) <= epsilon<Scalar>() * xnorm)
@@ -703,35 +627,25 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(
         if (nslow1 == 10)
             return NotMakingProgressIterations;
 
-        /* criterion for recalculating jacobian approximation */
-        /* by forward differences. */
-
+        /* criterion for recalculating jacobian. */
         if (ncfail == 2)
             break; // leave inner loop and go for the next outer loop iteration
 
         /* calculate the rank one modification to the jacobian */
         /* and update qtf if necessary. */
-
-        for (j = 0; j < n; ++j) {
-            sum = wa4.dot(fjac.col(j));
-            wa2[j] = (sum - wa3[j]) / pnorm;
-            wa1[j] = diag[j] * (diag[j] * wa1[j] / pnorm);
-            if (ratio >= Scalar(1e-4))
-                qtf[j] = sum;
-        }
+        wa1 = diag.cwiseProduct( diag.cwiseProduct(wa1)/pnorm );
+        wa2 = fjac.transpose() * wa4;
+        if (ratio >= Scalar(1e-4))
+            qtf = wa2;
+        wa2 = (wa2-wa3)/pnorm;
 
         /* compute the qr factorization of the updated jacobian. */
-
         ei_r1updt<Scalar>(n, n, R, wa1.data(), wa2.data(), wa3.data(), &sing);
         ei_r1mpyq<Scalar>(n, n, fjac.data(), fjac.rows(), wa2.data(), wa3.data());
         ei_r1mpyq<Scalar>(1, n, qtf.data(), 1, wa2.data(), wa3.data());
 
-        /* end of the inner loop. */
-
         jeval = false;
     }
-    /* end of the outer loop. */
-
     return Running;
 }
 
diff --git a/unsupported/Eigen/src/NonLinearOptimization/dogleg.h b/unsupported/Eigen/src/NonLinearOptimization/dogleg.h
index 36ed962f1..18b0963b2 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/dogleg.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/dogleg.h
@@ -8,46 +8,45 @@ void ei_dogleg(
         Matrix< Scalar, Dynamic, 1 >  &x)
 {
     /* Local variables */
-    int i, j, k;
+    int i, j;
     Scalar sum, temp, alpha, bnorm;
     Scalar gnorm, qnorm;
     Scalar sgnorm;
 
     /* Function Body */
     const Scalar epsmch = epsilon<Scalar>();
-    const int n = diag.size();
-    Matrix< Scalar, Dynamic, 1 >  wa1(n), wa2(n);
+    const int n = qrfac.cols();
     assert(n==qtb.size());
     assert(n==x.size());
+    assert(n==diag.size());
+    Matrix< Scalar, Dynamic, 1 >  wa1(n), wa2(n);
 
-    for (k = 0; k < n; ++k) {
-        j = n - k - 1;
-        sum = 0.;
-        for (i = j+1; i < n; ++i) {
-            sum += qrfac(j,i) * x[i];
-        }
+    /* first, calculate the gauss-newton direction. */
+    for (j = n-1; j >=0; --j) {
         temp = qrfac(j,j);
         if (temp == 0.) {
-            for (i = 0; i <= j; ++i)
-                temp = std::max(temp,ei_abs(qrfac(i,j)));
-            temp = epsmch * temp;
+            temp = epsmch * qrfac.col(j).head(j+1).maxCoeff();
             if (temp == 0.)
                 temp = epsmch;
         }
-        x[j] = (qtb[j] - sum) / temp;
+        if (j==n-1)
+            x[j] = qtb[j] / temp;
+        else
+            x[j] = (qtb[j] - qrfac.row(j).tail(n-j-1).dot(x.tail(n-j-1))) / temp;
     }
 
     /* test whether the gauss-newton direction is acceptable. */
-
-    wa1.fill(0.);
     wa2 = diag.cwiseProduct(x);
     qnorm = wa2.stableNorm();
     if (qnorm <= delta)
         return;
 
+    // TODO : this path is not tested by Eigen unit tests
+
     /* the gauss-newton direction is not acceptable. */
     /* next, calculate the scaled gradient direction. */
 
+    wa1.fill(0.);
     for (j = 0; j < n; ++j) {
         temp = qtb[j];
         for (i = j; i < n; ++i)
@@ -57,7 +56,6 @@ void ei_dogleg(
 
     /* calculate the norm of the scaled gradient and test for */
     /* the special case in which the scaled gradient is zero. */
-
     gnorm = wa1.stableNorm();
     sgnorm = 0.;
     alpha = delta / qnorm;
@@ -66,8 +64,9 @@ void ei_dogleg(
 
     /* calculate the point along the scaled gradient */
     /* at which the quadratic is minimized. */
-
     wa1.array() /= (diag*gnorm).array();
+    // TODO : once unit tests cover this part,:
+    // wa2 = qrfac.template triangularView<Upper>() * wa1;
     for (j = 0; j < n; ++j) {
         sum = 0.;
         for (i = j; i < n; ++i) {
@@ -79,7 +78,6 @@ void ei_dogleg(
     sgnorm = gnorm / temp / temp;
 
     /* test whether the scaled gradient direction is acceptable. */
-
     alpha = 0.;
     if (sgnorm >= delta)
         goto algo_end;
@@ -87,18 +85,14 @@ void ei_dogleg(
     /* the scaled gradient direction is not acceptable. */
     /* finally, calculate the point along the dogleg */
     /* at which the quadratic is minimized. */
-
     bnorm = qtb.stableNorm();
     temp = bnorm / gnorm * (bnorm / qnorm) * (sgnorm / delta);
-    /* Computing 2nd power */
     temp = temp - delta / qnorm * ei_abs2(sgnorm / delta) + ei_sqrt(ei_abs2(temp - delta / qnorm) + (1.-ei_abs2(delta / qnorm)) * (1.-ei_abs2(sgnorm / delta)));
-    /* Computing 2nd power */
     alpha = delta / qnorm * (1. - ei_abs2(sgnorm / delta)) / temp;
 algo_end:
 
     /* form appropriate convex combination of the gauss-newton */
     /* direction and the scaled gradient direction. */
-
     temp = (1.-alpha) * std::min(sgnorm,delta);
     x = temp * wa1 + alpha * x;
 }
-- 
cgit v1.2.3