/*
    Theseus - maximum likelihood superpositioning of macromolecular structures

    Copyright (C) 2004-2014 Douglas L. Theobald

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the:

    Free Software Foundation, Inc.,
    59 Temple Place, Suite 330,
    Boston, MA  02111-1307  USA

    -/_|:|_|_\-
*/

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <math.h>
#include <time.h>
#include <float.h>
#include <unistd.h>
#include "DLTmath.h"
#include "eigen.h"

double
pythag(double a, double b)
{
    double          absa, absb;

    absa = fabs(a);
    absb = fabs(b);

    if (absa > absb)
        return (absa * sqrt(1.0 + mysquare(absb / absa)));
    else
        return (absb == 0.0 ? 0.0 : absb * sqrt(1.0 + mysquare(absa / absb)));
}


/*
    Householder reduction:
    Reuction to tridiagonal of real, symmetric matrix a with dimension 3.
    z = input matrix -- is replace by orthogonal matrix Q of transformation on output
    eigenval = diagonal elements of Q (order 3)

    This part of the function takes less than 60% of the time of the Numerical Recipes
    tred2 function for a matrix of order 3.

    Tridiagonal QL implicit:
    returns eigenvalues and eigenvectors of a real, symmetric,
    tridiagonal matrix (such as output by tred2)

    eigenval = holds diagonal elements of tridiagonal matrix, order n;
               returns eigenvalues
    z = tred2 output matrix z (else identity matrix to get eigenvectors)

    z[k] returns normalized eigenvector for eigenvalue eigenval[k]
*/

/* normalize a vector and return a pointer to it */
/* It changes the value of the vector!!          */
void
eigen3(double **z, double *eigenval)
{
    int             iter;
    double          s, r, p, g, f, dd, c, b, z00, z20, z21, z10, z01, z11;
    double          scale, hh, h, g1, g2, f1, f2, f3, c1, c2, m, n, p1, p2, e0;
    double          e[3]; /* off diagonal elements of Q (order 3, e[0] = 0) */
                          /* input subdiagonal elements of input matrix,    */
                          /* e[0] arbitrary, destroyed on output.           */

    scale = fabs(z[2][0]) + fabs(z[2][1]);
    z[2][0] /= scale;
    z[2][1] /= scale;

    h = z[2][0] * z[2][0] + z[2][1] * z[2][1];

    m = z[2][1];

    if (m >= 0.0)
        g1 = -sqrt(h);
    else
        g1 = sqrt(h);

    /* make sure we don't divide by zero */
    if (scale == 0.0)
        z[2][0] = z[2][1] = h = g1 = m = 0.0;

    e[2] = scale * g1;
    h -= m * g1;
    z[2][1] = m - g1;
    z20 = z[2][0];
    z21 = z[2][1];
    z[0][2] = z20 / h;
    z[1][2] = z21 / h;
    z10 = z[1][0];
    p1 = z[0][0] * z20;
    p2 = z10 * z20;
    p1 += z10 * z21;
    p2 += z[1][1] * z21;
    c1 = p1 / h;
    c2 = p2 / h;

    n = c1 * z20;
    n += c2 * z21;

    hh = n / (h + h);

    f1 = z20;
    f2 = z21;

    g1 = c1 - hh * f1;
    g2 = c2 - hh * f2;

    /* make sure we don't divide by zero */
    if (scale == 0.0)
        z[2][0] = z[2][1] = z[0][2] = z[1][2] = f1 = f2 = g1 = g2 = 0.0;

    z[0][0] -= (2.0 * f1 * g1);
    z[1][1] -= (2.0 * f2 * g2);
    z[1][0] -= (f2 * g1 + g2 * f1);

    e[0] = 0.0;
    e[1] = z[1][0];

    eigenval[0] = z[0][0];
    eigenval[1] = z[1][1];
    eigenval[2] = z[2][2];
    z[0][0] = 1.0 - z[2][0] * z[0][2];
    z[1][1] = 1.0 - z[2][1] * z[1][2];
    z[1][0] = -z[2][0] * z[1][2];
    z[0][1] = -z[2][1] * z[0][2];

    z[2][2] = 1.0;
    z[0][2] = z[2][0] = z[1][2] = z[2][1] = 0.0;

    /* //////////////////////////////////////////////////////////////// */
    /* TQLI                                                             */
    /* //////////////////////////////////////////////////////////////// */
    e[0] = e[1];
    e[1] = e[2];
    e[2] = 0.0;

    iter = 0;
    while (1)
    {
        dd = fabs(eigenval[0]) + fabs(eigenval[1]);
        if (fabs(e[0]) + dd == dd)
            break;

        dd = fabs(eigenval[1]) + fabs(eigenval[2]);
        if (fabs(e[1]) + dd == dd)
            break;

        if (iter++ == 300)
        {
            fprintf(stderr,
                    "\n ERROR: Too many iterations in tqli eigen3() in eigen3.c \n");
            exit(EXIT_FAILURE);
        }

        g = (eigenval[1] - eigenval[0]) / (2.0 * e[0]);
        r = pythag(g, 1.0);
        g = eigenval[2] - eigenval[0] + e[0] / (g + SIGN(r, g));
        s = c = 1.0;
        p = 0.0;

        f = s * e[1];
        b = c * e[1];
        e[2] = (r = pythag(f, g));

        if (r == 0.0)
        {
            eigenval[2] -= p;
            e[2] = 0.0;
            break;
        }

        s = f / r;
        c = g / r;
        g = eigenval[2] - p;
        r = (eigenval[1] - g) * s + 2.0 * c * b;
        eigenval[2] = g + (p = s * r);
        g = c * r - b;

        f1 = z[0][2];
        f2 = z[1][2];
        f3 = z[2][2];

        z01 = z[0][1];
        z11 = z[1][1];
        z21 = z[2][1];

        z[0][2] = s * z01 + c * f1;
        z[1][2] = s * z11 + c * f2;
        z[2][2] = s * z21 + c * f3;
        z[0][1] = c * z01 - s * f1;
        z[1][1] = c * z11 - s * f2;
        z[2][1] = c * z21 - s * f3;

        e0 = e[0];
        f = s * e0;
        b = c * e0;
        e[1] = (r = pythag(f, g));

        if (r == 0.0)
        {
            eigenval[1] -= p;
            e[2] = 0.0;
            break;
        }

        s = f / r;
        c = g / r;
        g = eigenval[1] - p;
        r = (eigenval[0] - g) * s + 2.0 * c * b;
        eigenval[1] = g + (p = s * r);
        g = c * r - b;

        f1 = z[0][1];
        z00 = z[0][0];
        z[0][1] = s * z00 + c * f1;
        z[0][0] = c * z00 - s * f1;
        f2 = z[1][1];
        z10 = z[1][0];
        z[1][1] = s * z10 + c * f2;
        z[1][0] = c * z10 - s * f2;
        f3 = z[2][1];
        z20 = z[2][0];
        z[2][1] = s * z20 + c * f3;
        z[2][0] = c * z20 - s * f3;

        if (r == 0.0)
            continue;

        eigenval[0] -= p;
        e[0] = g;
        e[2] = 0.0;
    }
    /* /////////////////////////////////////////////////////////////////// */

    iter = 0;
    while (1)
    {
        dd = fabs(eigenval[1]) + fabs(eigenval[2]);
        if (fabs(e[1]) + dd == dd)
            break;

        if (iter++ == 300)
        {
            fprintf(stderr,
                    "\n ERROR: Too many iterations in tqli eigen3() in eigen3.c \n");
            exit(EXIT_FAILURE);
        }

        g = (eigenval[2] - eigenval[1]) / (2.0 * e[1]);
        r = pythag(g, 1.0);
        g = eigenval[2] - eigenval[1] + e[1] / (g + SIGN(r, g));
        s = c = 1.0;
        p = 0.0;

        f = s * e[1];
        b = c * e[1];
        e[2] = (r = pythag(f, g));

        if (r == 0.0)
        {
            eigenval[2] -= p;
            e[2] = 0.0;
            break;
        }

        s = f / r;
        c = g / r;
        g = eigenval[2] - p;
        r = (eigenval[1] - g) * s + 2.0 * c * b;
        eigenval[2] = g + (p = s * r);
        g = c * r - b;

        f1 = z[0][2];
        z01 = z[0][1];
        z[0][2] = s * z01 + c * f1;
        z[0][1] = c * z01 - s * f1;

        f2 = z[1][2];
        z11 = z[1][1];
        z[1][2] = s * z11 + c * f2;
        z[1][1] = c * z11 - s * f2;

        f3 = z[2][2];
        z21 = z[2][1];
        z[2][2] = s * z21 + c * f3;
        z[2][1] = c * z21 - s * f3;

        if (r == 0.0)
            continue;

        eigenval[1] -= p;
        e[1] = g;
        e[2] = 0.0;
    }
}


/*
    Householder reduction:
    Reduction to tridiagonal of real, symmetric matrix a with dimension 4.

    z = input matrix -- is replace by orthogonal matrix Q
    of transformation on output
    eigenval = diagonal elements of Q (order 4)

    Tridiagonal QL implicit = tqli():
    returns eigenvalues and eigenvectors of a real, symmetric,
    tridiagonal matrix (such as output by tred2)

    eigenval = holds diagonal elements of tridiagonal matrix, order n;
               returns eigenvalues
    z = tred2() output matrix z (else identity matrix to get eigenvectors)

    z[k] or *evals returns normalized eigenvector for eigenvalue eigenval[k]

    eigen4() uses only lower left half of matrix, whereas jacobi4() needs
    upper right (at least)
*/
void
eigen4(double **Q, double *evals)
{
    double          e[4];

    tred24(Q, evals, e);
    tqli4(evals, e, Q);
}


/* computes only eigenvalues, not the eigenvectors */
void
eigenval4(double **Q, double *evals)
{
    double          e[4];

    tred24vals(Q, evals, e);
    tqli4vals(evals, e, Q);
}


void
tred24(double **a, double *d, double *e)
{
    int             l, k, j, i;
    int             n = 4;
    double          scale, hh, h, g, f;

    for (i = n - 1; i > 0; i--)
    {
        l = i - 1;
        h = scale = 0.0;
        if (l > 0)
        {
            for (k = 0; k < l + 1; k++)
                scale += fabs(a[i][k]);

            if (scale == 0.0)
                e[i] = a[i][l];
            else
            {
                for (k = 0; k < l + 1; k++)
                {
                    a[i][k] /= scale;
                    h += a[i][k] * a[i][k];
                }

                f = a[i][l];
                g = (f >= 0.0 ? -sqrt(h) : sqrt(h));
                e[i] = scale * g;
                h -= f * g;
                a[i][l] = f - g;

                f = 0.0;
                for (j = 0; j < l + 1; j++)
                {
                    a[j][i] = a[i][j] / h;
                    g = 0.0;

                    for (k = 0; k < j + 1; k++)
                        g += a[j][k] * a[i][k];

                    for (k = j + 1; k < l + 1; k++)
                        g += a[k][j] * a[i][k];

                    e[j] = g / h;
                    f += e[j] * a[i][j];
                }

                hh = f / (h + h);
                for (j = 0; j < l + 1; j++)
                {
                    f = a[i][j];
                    e[j] = g = e[j] - hh * f;

                    for (k = 0; k < j + 1; k++)
                        a[j][k] -= (f * e[k] + g * a[i][k]);
                }
            }
        }
        else
            e[i] = a[i][l];

        d[i] = h;
    }

    d[0] = 0.0;
    e[0] = 0.0;

    for (i = 0; i < n; i++)
    {
        l = i;
        if (d[i] != 0.0)
        {
            for (j = 0; j < l; j++)
            {
                g = 0.0;
                for (k = 0; k < l; k++)
                    g += a[i][k] * a[k][j];

                for (k = 0; k < l; k++)
                    a[k][j] -= g * a[k][i];
            }
        }

        d[i] = a[i][i];
        a[i][i] = 1.0;

        for (j = 0; j < l; j++)
            a[j][i] = a[i][j] = 0.0;
    }
}


/* computes only eigenvalues, not the eigenvectors */
void
tred24vals(double **a, double *d, double *e)
{
    int             l, k, j, i;
    int             n = 4;
    double          scale, hh, h, g, f;

    for (i = n - 1; i > 0; i--)
    {
        l = i - 1;
        h = scale = 0.0;
        if (l > 0)
        {
            for (k = 0; k < l + 1; k++)
                scale += fabs(a[i][k]);

            if (scale == 0.0)
                e[i] = a[i][l];
            else
            {
                for (k = 0; k < l + 1; k++)
                {
                    a[i][k] /= scale;
                    h += a[i][k] * a[i][k];
                }

                f = a[i][l];
                g = (f >= 0.0 ? -sqrt(h) : sqrt(h));
                e[i] = scale * g;
                h -= f * g;
                a[i][l] = f - g;

                f = 0.0;
                for (j = 0; j < l + 1; j++)
                {
                    g = 0.0;
                    for (k = 0; k < j + 1; k++)
                        g += a[j][k] * a[i][k];

                    for (k = j + 1; k < l + 1; k++)
                        g += a[k][j] * a[i][k];
                    e[j] = g / h;
                    f += e[j] * a[i][j];
                }

                hh = f / (h + h);
                for (j = 0; j < l + 1; j++)
                {
                    f = a[i][j];
                    e[j] = g = e[j] - hh * f;

                    for (k = 0; k < j + 1; k++)
                        a[j][k] -= (f * e[k] + g * a[i][k]);
                }
            }
        }
        else
            e[i] = a[i][l];
        d[i] = h;
    }

    d[0] = 0.0;
    e[0] = 0.0;

    for (i = 0; i < n; i++)
        d[i] = a[i][i];
}


void
tqli4(double *d, double *e, double **z)
{
    int             m, l, iter, i, k;
    double          s, r, p, g, f, dd, c, b;
    int             n = 4;

    for (i = 1; i < n; i++)
        e[i - 1] = e[i];
    e[n - 1] = 0.0;

    for (l = 0; l < n; l++)
    {
        iter = 0;
        do
        {
            for (m = l; m < n - 1; m++)
            {
                dd = fabs(d[m]) + fabs(d[m + 1]);
                if (fabs(e[m]) + dd == dd)
                    break;
            }
            if (m != l)
            {
                if (iter++ == 300)
                {
                    fprintf(stderr,
                            "\n ERROR: Too many iterations in tqli4() from eigen4.c \n");
                    exit(EXIT_FAILURE);
                }

                g = (d[l + 1] - d[l]) / (2.0 * e[l]);
                r = pythag(g, 1.0);
                g = d[m] - d[l] + e[l] / (g + SIGN(r, g));
                s = c = 1.0;
                p = 0.0;

                for (i = m - 1; i >= l; i--)
                {
                    f = s * e[i];
                    b = c * e[i];
                    e[i + 1] = (r = pythag(f, g));

                    if (r == 0.0)
                    {
                        d[i + 1] -= p;
                        e[m] = 0.0;
                        break;
                    }

                    s = f / r;
                    c = g / r;
                    g = d[i + 1] - p;
                    r = (d[i] - g) * s + 2.0 * c * b;
                    d[i + 1] = g + (p = s * r);
                    g = c * r - b;

                    for (k = 0; k < n; k++)
                    {
                        f = z[k][i + 1];
                        z[k][i + 1] = s * z[k][i] + c * f;
                        z[k][i] = c * z[k][i] - s * f;
                    }
                }

                if (r == 0.0 && i >= l)
                    continue;

                d[l] -= p;
                e[l] = g;
                e[m] = 0.0;
            }
        } while (m != l);
    }
}


/* computes only eigenvalues, not the eigenvectors */
void
tqli4vals(double *d, double *e, double **z)
{
    int             m, l, iter, i;
    double          s, r, p, g, f, dd, c, b;
    int             n = 4;

    for (i = 1; i < n; i++)
        e[i - 1] = e[i];
    e[n - 1] = 0.0;

    for (l = 0; l < n; l++)
    {
        iter = 0;
        do
        {
            for (m = l; m < n - 1; m++)
            {
                dd = fabs(d[m]) + fabs(d[m + 1]);
                if (fabs(e[m]) + dd == dd)
                    break;
            }
            if (m != l)
            {
                if (iter++ == 300)
                {
                    fprintf(stderr,
                            "\n ERROR: Too many iterations in tqli() from eigen4.c \n");
                    exit(EXIT_FAILURE);
                }

                g = (d[l + 1] - d[l]) / (2.0 * e[l]);
                r = pythag(g, 1.0);
                g = d[m] - d[l] + e[l] / (g + SIGN(r, g));
                s = c = 1.0;
                p = 0.0;

                for (i = m - 1; i >= l; i--)
                {
                    f = s * e[i];
                    b = c * e[i];
                    e[i + 1] = (r = pythag(f, g));

                    if (r == 0.0)
                    {
                        d[i + 1] -= p;
                        e[m] = 0.0;
                        break;
                    }

                    s = f / r;
                    c = g / r;
                    g = d[i + 1] - p;
                    r = (d[i] - g) * s + 2.0 * c * b;
                    d[i + 1] = g + (p = s * r);
                    g = c * r - b;
                }

                if (r == 0.0 && i >= l)
                    continue;

                d[l] -= p;
                e[l] = g;
                e[m] = 0.0;
            }
        } while (m != l);
    }
}


/* Computes all eigenvalues and eigenvectors of a real symmetric matrix
   a[n][n].  On output, elements of a above the diagonal are
   destroyed (i.e. The matrix a gets trashed.  If you need to keep the
   values of a, call jacobi with a copy of a).  d[n] returns the
   eigenvalues of a.  v[n][n] is a matrix whose columns contain,
   on output, the normalized eigenvectors of a.

   eigen4() uses only lower left half of matrix, whereas jacobi4() needs
   upper right (at least)

   Loosely adapted from Numerical Recipes in C.
*/
void
jacobi4(double **a, double *d, double **v)
{
    int             i, j, k, m, nrot;
    double          theta, tau, t, sm, s, h, g, c;
    double          b[4], z[4];
    int             n = 4;

    for (i = 0; i < n; i++)
    {
        for (j = 0; j < n; j++)
            v[i][j] = 0.0;
        v[i][i] = 1.0;
    }

    for (i = 0; i < n; i++)
    {
        b[i] = d[i] = a[i][i];
        z[i] = 0.0;
    }

    nrot = 0;

    for (i = 0; i < 60; i++)
    {
        sm = 0.0;
        for (j = 0; j < 3; j++)
        {
            for (k = j + 1; k < 4; k++)
                sm += fabs(a[j][k]);
        }

        if (sm == 0.0)
            return;

        for (j = 0; j < 3; j++)
        {
            for (k = j + 1; k < 4; k++)
            {
                if (fabs(a[j][k]) > 0.0)
                {
                    h = d[k] - d[j];
                    g = 100.0 * fabs(a[j][k]);

                    if ((fabs(h) + g) == fabs(h))
                    {
                        t = (a[j][k]) / h;
                    }
                    else
                    {
                        theta = 0.5 * h / (a[j][k]);
                        t = 1.0 / (fabs(theta) + sqrt(1.0 + theta * theta));
                        if (theta < 0.0)
                            t = -t;
                    }

                    c = 1.0 / sqrt(1 + t * t);
                    s = t * c;
                    tau = s / (1.0 + c);
                    h = t * a[j][k];
                    z[j] -= h;
                    z[k] += h;
                    d[j] -= h;
                    d[k] += h;
                    a[j][k] = 0.0;

                    for (m = 0; m < j; m++)
                        rotate(a, s, tau, m, j, m, k);
                    for (m = j + 1; m < k; m++)
                        rotate(a, s, tau, j, m, m, k);
                    for (m = k + 1; m < n; m++)
                        rotate(a, s, tau, j, m, k, m);
                    for (m = 0; m < n; m++)
                        rotate(v, s, tau, m, j, m, k);

                    ++nrot;
                }
            }
        }

        for (j = 0; j < 4; j++)
        {
            b[j] += z[j];
            d[j] = b[j];
            z[j] = 0.0;
        }
    }

    printf("\n  ERROR: Too many iterations (more than %d) in routine jacobi() \n", nrot);
    exit(EXIT_FAILURE);
}

#define ROTATE(a, i, j, k, l) \
{\
    g = a[i][j];\
    h = a[k][l];\
    a[i][j] -= s * (h + g * theta);\
    a[k][l] += s * (g - h * theta);\
}


/* Symmetric Schur decomposition for a Jacobi 2x2 sub-matrix.
   Adapted from Golub and Van Loan _Matrix Computations_ 3rd 1996 Johns Hopkins.
   Algorithm 8.4.2, Section 8.4 "Jacobi Methods", pp. 426-428.
   Eigenvalue accumulation based on _Numerical Recipes_ 2nd 1992
   jacobi() Section 11.1, pp. 462-469.
*/
void
SymSchur2(double **a, const int j, const int k,
          double *z, double *d, double *s, double *theta)
{
    double              ajk = a[j][k];
    double              h = d[k] - d[j];
    double              tau = 0.5 * h / ajk;
    double              c, t;

    if (tau >= 0.0)
        t = 1.0 / (tau + sqrt(1.0 + tau * tau));
    else
        t = -1.0 / (-tau + sqrt(1.0 + tau * tau));

    c = 1.0 / sqrt(1.0 + t * t);
    *s = t * c;
    *theta = *s / (1.0 + c);
    h = t * ajk;
    z[j] -= h;
    z[k] += h;
    d[j] -= h;
    d[k] += h;
    a[j][k] = 0.0;
}


/* Computes all eigenvalues and eigenvectors of a real symmetric matrix
   a[3][3] using the classical Jacobi algorithm which chooses the largest
   off-diagonal element to annihilate. For 3x3 this is more efficient
   than the cyclic Jacobi.
   On output, elements of a above the diagonal are destroyed
   (i.e. The matrix a gets trashed.  If you need to keep the
   values of a, call jacobi with a copy of a).
   d[3] returns the eigenvalues of a.
   v[3][3] is a matrix whose rows contain,
   on output, the normalized eigenvectors of a.

   Adapted from Golub and Van Loan _Matrix Computations_ 3rd 1996 Johns Hopkins.
   Algorithms 8.4.1 and 8.4.2, Section 8.4 "Jacobi Methods", pp. 426-429.

   Eigenvalue accumulation based on _Numerical Recipes_ 2nd 1992
   jacobi() Section 11.1, pp. 462-469.

   Puts eigenvectors in rows of v (unlike NR routine where in columns)
*/
int
jacobi3(double **a, double *d, double **v, double tol)
{
    int             i, j, k, m, nrot, maxj, maxk;
    double          theta, s, ajk, eps, offA;
    double          b[3], z[3], largest;
/*     double          g; */

    /* V = I_3 */
    for (i = 0; i < 3; i++)
    {
        for (j = 0; j < 3; j++)
            v[i][j] = 0.0;
        v[i][i] = 1.0;
        b[i] = d[i] = a[i][i];
        z[i] = 0.0;
    }

    eps = tol * tol * (b[0] + b[1] + b[2]);

    /* calc Frobenius norm^2 of off-diagonal elements */
    offA = 0.0;
    for (j = 0; j < 2; j++)
        for (k = j+1; k < 3; k++)
            offA += a[j][k] * a[j][k];

    nrot = 0;
    while (offA > eps)
    {
        maxj = 0;
        maxk = 1;
        largest = -DBL_MAX;
        for (j = 0; j < 3; ++j)
        {
            for (k = j+1; k < 3; ++k)
            {
                if (largest < fabs(a[j][k]) && j != k)
                {
                    largest = fabs(a[j][k]);
                    maxj = j;
                    maxk = k;
                }
            }
        }

        j = maxj;
        k = maxk;
        ajk = a[j][k];
        if (ajk * ajk > eps)
        {
            /* 2x2 symmetric Schur decomposition */
            SymSchur2(a, j, k, z, d, &s, &theta);

            /* update the A matrix by Jacobi/Givens rotation: A = J' A J */
            for (m = 0; m < j; m++)
                rotate(a, s, theta, m, j, m, k);

            for (m = j + 1; m < k; m++)
                rotate(a, s, theta, j, m, m, k);

            for (m = k + 1; m < 3; m++)
                rotate(a, s, theta, j, m, k, m);

            for (m = 0; m < 3; m++)
                rotate(v, s, theta, j, m, k, m); /* to put evecs in cols, m,j,m,k */

            ++nrot;
        }

        for (j = 0; j < 3; j++)
        {
            b[j] += z[j];
            d[j] = b[j];
            z[j] = 0.0;
        }

        /* calc norm of off-diagonal elements */
        offA = 0.0;
        for (j = 0; j < 2; j++)
            for (k = j+1; k < 3; k++)
                offA += a[j][k] * a[j][k];
    }

    return(nrot);
}


/* Computes all eigenvalues and eigenvectors of a real symmetric matrix
   a[3][3] using the cyclic Jacobi algorithm which marches through the
   off-diagonal elements and annihilates them. For 3x3 nearly as efficient
   as the cyclic Jacobi.
   On output, elements of a above the diagonal are destroyed
   (i.e. The matrix a gets trashed.  If you need to keep the
   values of a, call jacobi with a copy of a).
   d[3] returns the eigenvalues of a.
   v[3][3] is a matrix whose rows contain,
   on output, the normalized eigenvectors of a.

   Adapted from Golub and Van Loan _Matrix Computations_ 3rd 1996 Johns Hopkins.
   Algorithms 8.4.1 and 8.4.3, Section 8.4 "Jacobi Methods", pp. 426-430.

   Eigenvalue accumulation based on _Numerical Recipes_ 2nd 1992
   jacobi() Section 11.1, pp. 462-469.

   Puts eigenvectors in rows of v (unlike NR routine where in columns)
*/
int
jacobi3_cyc(double **a, double *d, double **v, double tol)
{
    int             i, j, k, m, nrot;
    double          tau, theta, t, s, h, c, ajk, eps, offA;
    double          b[3], z[3];
/*     double          g; */

    /* V = I_3 */
    for (i = 0; i < 3; i++)
    {
        for (j = 0; j < 3; j++)
            v[i][j] = 0.0;
        v[i][i] = 1.0;
        b[i] = d[i] = a[i][i];
        z[i] = 0.0;
    }

    eps = tol * tol * (b[0] + b[1] + b[2]);

    /* calc Frobenius norm^2 of off-diagonal elements */
    offA = 0.0;
    for (j = 0; j < 2; j++)
        for (k = j+1; k < 3; k++)
            offA += a[j][k] * a[j][k];

    nrot = 0;
    while (offA > eps)
    {
        for (j = 0; j < 2; j++)
        {
            for (k = j+1; k < 3; k++)
            {
                ajk = a[j][k];
                if (ajk * ajk > eps)
                {
                    /* 2x2 symmetric Schur decomposition */
                    h = d[k] - d[j];
                    tau = 0.5 * h / ajk;

                    if (tau >= 0.0)
                        t = 1.0 / (tau + sqrt(1.0 + tau * tau));
                    else
                        t = -1.0 / (-tau + sqrt(1.0 + tau * tau));

                    c = 1.0 / sqrt(1.0 + t * t);
                    s = t * c;
                    theta = s / (1.0 + c);
                    h = t * ajk;
                    z[j] -= h;
                    z[k] += h;
                    d[j] -= h;
                    d[k] += h;
                    a[j][k] = 0.0;

                    /* update the A matrix by Jacobi/Givens rotation: A = J' A J */
                    for (m = 0; m < j; m++)
                        rotate(a, s, theta, m, j, m, k);

                    for (m = j + 1; m < k; m++)
                        rotate(a, s, theta, j, m, m, k);

                    for (m = k + 1; m < 3; m++)
                        rotate(a, s, theta, j, m, k, m);

                    for (m = 0; m < 3; m++)
                        rotate(v, s, theta, j, m, k, m); /* to put evecs in cols, m,j,m,k */

                    ++nrot;
                }
            }
        }

        for (j = 0; j < 3; j++)
        {
            b[j] += z[j];
            d[j] = b[j];
            z[j] = 0.0;
        }

        /* calc norm of off-diagonal elements */
        offA = 0.0;
        for (j = 0; j < 2; j++)
            for (k = j+1; k < 3; k++)
                offA += a[j][k] * a[j][k];
    }

    return(nrot);
}


void
rotate(double **a, double s, double theta,
       int i, int j, int k, int l)
{
    double          g = a[i][j];
    double          h = a[k][l];

    a[i][j] -= s * (h + g * theta);
    a[k][l] += s * (g - h * theta);
}


/* Calculates the Moore-Penrose pseudoinverse of a symmetric matrix
   (if necessary). Returns the condition number of the matrix.
   */
double
InvSymEigenOp(double **invmat, const double **mat, int n,
              double *evals, double **evecs, const double tol)
{
    double                   cond;
    int                      i, j, k;
    double                 **tmpmat = MatAlloc(n, n);

    memcpy(tmpmat[0], mat[0], n * n * sizeof(double));

    EigenGSLDest(tmpmat, n, evals, evecs, 0);

    cond = evals[n-1] / evals[0];

    for (i = 0; i < n; ++i)
    {
        if (evals[i] > tol)
            evals[i] = 1.0 / evals[i];
        else
            evals[i] = 0.0;
    }

    memset(invmat[0], 0, n * n * sizeof(double));

    for (i = 0; i < n; ++i)
        for (j = 0; j <= i; ++j)
            for (k = 0; k < n; ++k)
                invmat[i][j] += (evecs[i][k] * evals[k] * evecs[j][k]);

    for (i = 0; i < n; ++i)
        for (j = i + 1; j < n; ++j)
            invmat[i][j] = invmat[j][i];

    MatDestroy(&tmpmat);

    return(cond);
}



/* Calculates A = LDL^t, where L is a matrix of right eigenvectors and D is
   a diagonal matrix of eigenvalues, in one fell swoop. Except here the
   eigenvalues are delivered as a 1 x n vector. */
/* This function is consistent with eigensym() above - 2006-05-10 */
void
EigenReconSym(double **mat, const double **evecs, const double *evals, const int n)
{
    int             i, j, k;

    /* (i x k)(k x j) = (i x j) */
    for (i = 0; i < n; ++i)
    {
        for (j = 0; j < n; ++j)
        {
            mat[i][j] = 0.0;
            for (k = 0; k < n; ++k)
                mat[i][j] += (evecs[i][k] * evals[k] * evecs[j][k]);
        }
    }
}
