/* +---------------------------------------------------------------------------+
   |          The Mobile Robot Programming Toolkit (MRPT) C++ library          |
   |                                                                           |
   |                   http://mrpt.sourceforge.net/                            |
   |                                                                           |
   |   Copyright (C) 2005-2009  University of Malaga                           |
   |                                                                           |
   |    This software was written by the Machine Perception and Intelligent    |
   |      Robotics Lab, University of Malaga (Spain).                          |
   |    Contact: Jose-Luis Blanco  <jlblanco@ctima.uma.es>                     |
   |                                                                           |
   |  This file is part of the MRPT project.                                   |
   |                                                                           |
   |     MRPT is free software: you can redistribute it and/or modify          |
   |     it under the terms of the GNU General Public License as published by  |
   |     the Free Software Foundation, either version 3 of the License, or     |
   |     (at your option) any later version.                                   |
   |                                                                           |
   |   MRPT is distributed in the hope that it will be useful,                 |
   |     but WITHOUT ANY WARRANTY; without even the implied warranty of        |
   |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         |
   |     GNU General Public License for more details.                          |
   |                                                                           |
   |     You should have received a copy of the GNU General Public License     |
   |     along with MRPT.  If not, see <http://www.gnu.org/licenses/>.         |
   |                                                                           |
   +---------------------------------------------------------------------------+ */

#include <mrpt/precomp_core.h>  // Only for precomp. headers, include all libmrpt-core headers.

#include <mrpt/math/CMatrixTemplateNumeric.h>
#include <mrpt/math/CVectorTemplate.h>

using std::string;
using std::cout;
using std::endl;
using namespace mrpt;
using namespace mrpt::math;
using namespace mrpt::utils;


template <class T>
CMatrixTemplateNumeric<T>::CMatrixTemplateNumeric(size_t row , size_t col ) :  CMatrixTemplate<T>( row, col )
{
}

template <class T>
CMatrixTemplateNumeric<T>& CMatrixTemplateNumeric<T>::operator = (const CMatrixTemplateNumeric<T>& m)
{
	CMatrixTemplate<T>::realloc( m.getRowCount(), m.getColCount() );
	for (size_t i=0; i < CMatrixTemplate<T>::getRowCount(); i++)
		for (size_t j=0; j < CMatrixTemplate<T>::getColCount(); j++)
			CMatrixTemplate<T>::m_Val[i][j] = m.m_Val[i][j];
	return *this;
}


/*---------------------------------------------------------------
						setSize
 ---------------------------------------------------------------*/
template <class T>
void CMatrixTemplateNumeric<T>::setSize(size_t row, size_t col)
{
	CMatrixTemplate<T>::realloc(row,col,true);
}

/*---------------------------------------------------------------
						resize
 ---------------------------------------------------------------*/
template <class T>
void CMatrixTemplateNumeric<T>::resize(size_t row, size_t col)
{
	CMatrixTemplate<T>::realloc(row,col,true);
}

/*---------------------------------------------------------------
						pivot
 ---------------------------------------------------------------*/
template <class T>
int CMatrixTemplateNumeric<T>::pivot(size_t row)
{
	size_t k = row;
	double amax,temp;

	amax = -1;
	for (size_t i=row; i < CMatrixTemplate<T>::m_Rows; i++)
		if ( (temp = fabs( CMatrixTemplate<T>::m_Val[i][row])) > amax && temp != 0)
		{
			amax = temp;
			k = i;
		}
	if (CMatrixTemplate<T>::m_Val[k][row] == T(0))
		return -1;
	if (k != row)
	{
		T* rowptr = CMatrixTemplate<T>::m_Val[k];
		CMatrixTemplate<T>::m_Val[k] = CMatrixTemplate<T>::m_Val[row];
		CMatrixTemplate<T>::m_Val[row] = rowptr;
		return static_cast<int>( k );
	}
	return 0;
}



/**
	Householder reduction of a real, symmetric matrix a[1..n][1..n]. On output, a is replaced
	by the orthogonal matrix Q reflecting the transformation. d[1..n] returns the diagonal elements
	of the tridiagonal matrix, and e[1..n] the off-diagonal elements, with e[1]=0. Several
	statements, as noted in comments, can be omitted if only eigenvalues are to be found, in which
	case a contains no useful information on output. Otherwise they are to be included.
*/
template <class T>
void  tred2(T **a, size_t nn, T d[], T e[])
{
	int		l,k,j,i;
	int     n = static_cast<int> (nn);
	T		scale,hh,h,g,f;

	for (i=n;i>=2;i--)
	{
		l=i-1;
		h=scale=0.0;
		if (l > 1)
		{
			for (k=1;k<=l;k++)
			scale += fabs(a[i][k]);
			if (scale == 0)				// Skip transformation.
				e[i]=a[i][l];
			else
			{
				for (k=1;k<=l;k++)
				{
					a[i][k] /= scale;		// Use scaled a's for transformation.
					h += a[i][k]*a[i][k];	// Form sigma in h.
				}
				f=a[i][l];
				g=(f >= 0 ? -::sqrt(h) : (::sqrt(h)));
				e[i]=scale*g;
				h -= f*g;						// Now h is equation (11.2.4).
				a[i][l]=f-g;					// Store u in the ith row of a.
				f=0;
				for (j=1;j<=l;j++)
				{
					a[j][i]=a[i][j]/h;			// Store u=H in ith column of a.
					g=0.0;						// Form an element of A . u in g.
					for (k=1;k<=j;k++)
						g += a[j][k]*a[i][k];
					for (k=j+1;k<=l;k++)
						g += a[k][j]*a[i][k];
					e[j]=g/h;					// Form element of p in temporarily unused element of e.
					f += e[j]*a[i][j];
				}
				hh=f/(h+h);						// Form K, equation (11.2.11).
				for (j=1;j<=l;j++)
				{								// Form q and store in e overwriting p.
					f=a[i][j];
					e[j]=g=e[j]-hh*f;
					for (k=1;k<=j;k++)			// Reduce a, equation (11.2.13).
						a[j][k] -= (f*e[k]+g*a[i][k]);
				}
			}
		} else
			e[i]=a[i][l];
		d[i]=h;
	}

	/* Next statement can be omitted if eigenvectors not wanted */
	d[1]=0;
	e[1]=0;

	/* Contents of this loop can be omitted if eigenvectors not
	wanted except for statement d[i]=a[i][i]; */
	for (i=1;i<=n;i++)	// Begin accumulation of transformationmatrices
	{
		l= i-1;
		if (d[i]) // This block skipped when i=1.
			for (j=1;j<=l;j++)
			{
				g=0.0;
				for (k=1;k<=l;k++) // Use u and u=H stored in a to form P.Q.
					g += a[i][k]*a[k][j];
				for (k=1;k<=l;k++)
					a[k][j] -= g*a[k][i];
			}
		d[i]=a[i][i];			// This statement remains.
		a[i][i]=1;				// Reset row and column of a to identity
		for (j=1;j<=l;j++)
			a[j][i]=a[i][j]= 0; // matrix for next iteration.
	}
}


/** QL algorithm with implicit shifts, to determine the eigenvalues and eigenvectors of a real, symmetric,
	tridiagonal matrix, or of a real, symmetric matrix previously reduced by tred2 x11.2. On
	input, d[1..n] contains the diagonal elements of the tridiagonal matrix. On output, it returns
	the eigenvalues. The vector e[1..n] inputs the subdiagonal elements of the tridiagonal matrix,
	with e[1] arbitrary. On output e is destroyed. When finding only the eigenvalues, several lines
	may be omitted, as noted in the comments. If the eigenvectors of a tridiagonal matrix are desired,
	the matrix z[1..n][1..n] is input as the identity matrix. If the eigenvectors of a matrix
	that has been reduced by tred2 are required, then z is input as the matrix output by tred2.
	In either case, the kth column of z returns the normalized eigenvector corresponding to d[k].
*/
template <class T>
void  tqli(T d[], T e[], size_t nn, T **z)
{
	int			m,l,iter,i,k;
	int 		n = static_cast<int> (nn);
	T			s,r,p,g,f,dd,c,b;
	//T volatile	temp;
	const T     EPS= std::numeric_limits<T>::epsilon();

	for (i=2;i<=n;i++)
		e[i-1]=e[i];				// Convenient to renumber the elements of e.

	e[n]=0.0;
	for (l=1;l<=n;l++)
	{
		iter=0;
		do
		{
			for (m=l;m<=(n-1);m++)	// Look for a single small subdiagonal element to split the matrix.
			{
				dd=static_cast<T>(( fabs(d[m])+fabs(d[m+1]) ) );
				// ----------------------------------------------------------------------
				// Fix added 14/DEC/2006: Avoid a convergence problem due to compiler
				//  optimization. See http://www.nr.com/forum/showthread.php?p=1803
				// ----------------------------------------------------------------------
				// Fixed again 25/JAN/2008: Using EPS is a better approach!
				// ----------------------------------------------------------------------
				//temp = fabs(e[m])+dd;
				//if (fabs(e[m])+dd == dd) break;
				if (fabs(e[m]) <= EPS*dd)  break;
			}
			if (m != l)
			{
				if (iter++ == 60) THROW_EXCEPTION("tqli: Too many iterations in tqli!")

				g=static_cast<T>(( (d[l+1]-d[l])/(2.0*e[l])));			// Form shift.
				r=pythag(g,static_cast<T>(1.0));
				g=d[m]-d[l]+e[l]/(g+SIGN(r,g));		// This is dm - ks.
				s = c = 1;
				p = 0;
				for (i=m-1;i>=l;i--)				// A plane rotation as in the original QL, followed by Givens rotations to restore tridiagonal	form.
				{
					f=s*e[i];
					b=c*e[i];
					e[i+1]=(r=pythag(f,g));
					if (r == 0.0)						// Recover from underflow.
					{
						d[i+1] -= p;
						e[m]=0.0;
						break;
					}
					s=f/r;
					c=g/r;
					g=d[i+1]-p;
					r=(d[i]-g)*s+2.0f*c*b;
					d[i+1]=g+(p=s*r);
					g=c*r-b;
					/* Next loop can be omitted if eigenvectors not wanted*/
					for (k=1;k<=n;k++)  // Form eigenvectors.
					{
						f=z[k][i+1];
						z[k][i+1]=s*z[k][i]+c*f;
						z[k][i]=c*z[k][i]-s*f;
					}
				}

				if (r == 0.0 && i >= l) continue;
				d[l] -= p;
				e[l]=g;
				e[m]=0.0;
			}
		} while (m != l);
	}
}

/** Auxiliary function (internal use only)
  */
template <class T>
T  pythag(T a, T b)
{
	static T at, bt, ct;
	return static_cast<T>(( ((at = fabs(a)) > (bt = fabs(b)) ?
			(ct = bt/at, at*(::sqrt(1.0+ct*ct))) :
			(bt ? (ct = at/bt, bt*(::sqrt(1.0+ct*ct))): 0)) ) );
}

/** Auxiliary function (internal use only)
  */
template <class T>
T  SIGN(T a,T b)
{
	return static_cast<T>(( ((b) >= 0 ? fabs(a) : -fabs(a)) ));
}

/** SVD-Decomposition.
 *   Extracted from "Numerical recipes in C++" eBook at http://www.nr.com<br>
  */
template <class T>
void svdcmp(T* a[], int m, int n, T w[], T* v[])
{
	// Given a matrix a[m][n], this routine computes its singular value
	// decomposition, A = U*W*V'.  The matrix U replaces a on output.
	// The diagonal matrix of singular values W is output as a vector w[n].
	// The matrix V  is output as v[n][n].
	// m must be greater or equal to n;  if it is smaller, then a should be
	// filled up to square with zero rows.

	int		flag, i, its, j, jj, k, l, nm;
	double c, f, h, s, x, y, z;
	double anorm = 0.0, g = 0.0, scale = 0.0;

	if (m < n) THROW_EXCEPTION("svdcmp(): Matrix is not augmented with extra rows of zeros");
	std::vector<double> rv1(n);
	const T     EPS= std::numeric_limits<T>::epsilon();


	// Householder reduction to bidiagonal form.
	l = 0;    // added by T. Wang to avoid warning in g++
	nm = 0;   // added by T. Wang to avoid warning in g++
	for (i = 0; i < n; i++)
	{
		l = i + 1;
		rv1[i] = scale*g;
		g = s = scale = 0.0;
		if (i < m) {
			for (k = i; k < m; k++) scale += fabs(a[k][i]);
				if (scale) {
				for (k = i; k < m; k++) {
					a[k][i] /= static_cast<T>(scale);
					s += a[k][i]*a[k][i];
				}
				f = a[i][i];
				g = -SIGN(static_cast<T>(::sqrt(s)),static_cast<T>(f));
				h = f*g - s;
				a[i][i] = static_cast<T>((f - g));
				if (i != n - 1) {
					for (j = l; j < n; j++) {
						for (s  = 0.0, k = i; k < m; k++) s += a[k][i]*a[k][j];
						f = s/h;
						for ( k = i; k < m; k++) a[k][j] += static_cast<T>((f*a[k][i]));
					}
				}
				for (k = i; k < m; k++) a[k][i] *= static_cast<T>(scale);
				}
			}
			w[i] = static_cast<T>((scale*g));
			g = s= scale = 0.0;
			if (i < m && i != n - 1) {
				for (k = l; k < n; k++)  scale += fabs(a[i][k]);
				if (scale) {
				for (k = l; k < n; k++) {
					a[i][k] /= static_cast<T>(scale);
					s += a[i][k]*a[i][k];
				}
				f = a[i][l];
				g = -SIGN(static_cast<T>(::sqrt(s)), static_cast<T>(f));
				h = f*g - s;
				a[i][l] = static_cast<T>((f - g));
				for (k = l; k < n; k++)  rv1[k] = a[i][k]/h;
				if (i != m - 1) {
					for (j = l; j < m; j++) {
						for (s = 0.0, k = l; k < n; k++) s += a[j][k]*a[i][k];
						for (k = l; k < n; k++) a[j][k] += static_cast<T>((s*rv1[k]));
					}
				}
				for (k = l; k < n; k++) a[i][k] *= static_cast<T>(scale);
				}
			}
			anorm = std::max(static_cast<T>(anorm), static_cast<T>(fabs(w[i]) + fabs(rv1[i])));
		}
			/* Accumulation of right-hand transformations.  */
		for (i = n - 1; 0 <= i; i--) {
			if (i < n - 1) {
				if (g) {
				for (j = l; j < n; j++)  v[j][i] = static_cast<T>(((a[i][j]/a[i][l])/g));
					/* Double division to avoid possible underflow: */
				for (j = l; j < n; j++) {
					for (s = 0.0, k = l; k < n; k++) s += a[i][k]*v[k][j];
					for (k = l; k < n; k++)  v[k][j] += static_cast<T>((s*v[k][i]));
				}
				}
				for (j = l; j < n; j++) v[i][j] = v[j][i] = 0.0;
	}
			v[i][i] = 1.0;
			g = rv1[i];
			l = i;
		}
			/* Accumulation of left-hand transformations.   */
		for (i = n - 1; 0 <= i; i--) {
			l = i + 1;
			g = w[i];
			if (i < n - 1) for (j = l; j < n; j++) a[i][j] = 0.0;
			if (g) {
				g = 1.0/g;
				if (i != n - 1) {
				for (j = l; j < n; j++) {
					for (s = 0.0, k = l; k < m; k++) s += a[k][i]*a[k][j];
					f = (s/a[i][i])*g;
					for (k = i; k < m; k++) a[k][j] += static_cast<T>((f*a[k][i]));
				}
				}
				for (j = i; j < m; j++)  a[j][i] *= static_cast<T>(g);
			}
			else {
				for (j = i; j < m; j++) a[j][i] = 0.0;
			}
			a[i][i] += 1.0;   // ++a[i][i]
		}
		/* Diagonalization of the bidiagonal form.  */
		for (k = n - 1; 0 <= k; k--) {        /* Loop over singular values. */
			for (its = 0; its < 30; its++) {    /* Loop over allowed iterations.*/
				flag = 1;
				for (l = k; 0 <= l; l--) {     // Test for splitting:
				nm = l - 1;                 // Note that rv1[0] is always zero

				//if (fabs(rv1[l]) + anorm == anorm) {
				if (fabs(rv1[l]) <= EPS*anorm)
				{
					flag = 0;
					break;
				}
				if ( fabs(w[nm]) + anorm == anorm) break;
				}
				if (flag) {
				c = 0.0;                       /* Cancellation of rv1[l], if l>0:*/
				s = 1.0;
				for (i = l; i <= k; i++) {
				f = s*rv1[i];
				if (fabs(f) + anorm != anorm) {
					g = w[i];
					h = pythag(static_cast<T>(f), static_cast<T>(g));
					w[i] = static_cast<T>(h);
					h = static_cast<T>((1.0/h));
					c = static_cast<T>((g*h));
					s = (-f*h);
					for (j = 0; j < m; j++) {
						y = a[j][nm];
						z = a[j][i];
						a[j][nm] = static_cast<T>((y*c + z*s));
						a[j][i]  = static_cast<T>((z*c - y*s));
					}
				}
				}
			}
			z = w[k];
			if (l == k) {       /* Convergence.  */
				if (z < 0.0) {        /* Singular value is made non-negative. */
				w[k] = static_cast<T>(-z);
				for (j = 0; j < n; j++) v[j][k] = (-v[j][k]);
				}
				break;
			}
			if (its == 29)
			THROW_EXCEPTION("svdcmp(): Not convergence in 30 SVDCMP iterations!");

			x = w[l];               /* Shift from bottom 2-by-2 minor. */
			nm = k - 1;
			y = w[nm];
			g = rv1[nm];
			h = rv1[k];
			f = ((y - z)*(y + z) + (g - h)*(g + h))/(2.0*h*y);
			g = pythag(static_cast<T>(f), static_cast<T>(1.0));
			f = ((x - z)*(x + z) + h*((y/(f + SIGN(static_cast<T>(g), static_cast<T>(f)))) - h))/x;
				/* Next QR transformation:    */
			c = s = 1.0;
			for (j = l; j <= nm; j++) {
				i = j + 1;
				g = rv1[i];
				y = w[i];
				h = s*g;
				g = c*g;
				z = pythag(static_cast<T>(f), static_cast<T>(h));
				rv1[j] = z;
				c = f/z;
				s = h/z;
				f = x*c + g*s;
				g = g*c - x*s;
				h = y*s;
				y = y*c;
				for (jj = 0; jj < n;  jj++) {
				x = v[jj][j];
				z = v[jj][i];
				v[jj][j] = static_cast<T>(x*c + z*s);
				v[jj][i] = static_cast<T>(z*c - x*s);
				}
				z = pythag(static_cast<T>(f), static_cast<T>(h));
				w[j] = static_cast<T>(z);        /* Rotation can be arbitrary if z = 0.*/
				if (z) {
				z = 1.0/z;
				c = f*z;
				s = h*z;
				}
				f = (c*g) + (s*y);
				x = (c*y) - (s*g);
				for (jj = 0; jj < m; jj++) {
				y = a[jj][j];
				z = a[jj][i];
				a[jj][j] = static_cast<T>(y*c + z*s);
				a[jj][i] = static_cast<T>(z*c - y*s);
				}
			}
			rv1[l] = 0.0;
			rv1[k] = f;
			w[k] = static_cast<T>(x);
		}
	}
}


/** Computes the eigenvalues/eigenvector decomposition of a symmetric matrix.
 *    The decomposition is: M = Z · D · Z<sup>T</sup>, where columns in Z are the
 *	  eigenvectors and the diagonal matrix D contains the eigenvalues
 *    as diagonal elements, sorted in <i>ascending</i> order.
 *    The algorithm is taken from "Numerical recipes in C", freely available online.
 */
template <class T>
void CMatrixTemplateNumeric<T>::eigenVectors(CMatrixTemplateNumeric<T> &Z, CMatrixTemplateNumeric<T> &D) const
{
	if (CMatrixTemplate<T>::m_Rows != CMatrixTemplate<T>::m_Cols)
		THROW_EXCEPTION( "eigenVectors: Only for square matrixes!");

	std::vector<unsigned int>	indxs;
	std::vector<bool>	already;

	CMatrixTemplateNumeric<T>		save( *this );
	CMatrixTemplateNumeric<T>		E;			// Diagonal.
	size_t							i,j;
	const size_t					n = CMatrixTemplate<T>::m_Cols;
	T								**a;
	T								*d,*e;

	MRPT_TRY_START;

	// Algorithm from "Numerical recipes in C"
	// -----------------------------------------------

	// Check for symmetry
	// --------------------------------------
	for (i=0;i<n;i++)
		for (j=i;j<n;j++)
			if (CMatrixTemplate<T>::m_Val[i][j]!=CMatrixTemplate<T>::m_Val[j][i])
			{
				THROW_EXCEPTION(format("eigenVectors: The matrix is not symmetric! m(%lu,%lu)=%.16e != m(%lu,%lu)=%.16e\n",
					static_cast<unsigned long>(i),static_cast<unsigned long>(j), static_cast<double> ( CMatrixTemplate<T>::m_Val[i][j] ),
					static_cast<unsigned long>(j),static_cast<unsigned long>(i), static_cast<double> ( CMatrixTemplate<T>::m_Val[j][i]) ) )
			}

	// Copy the matrix content to "a":
	// --------------------------------------
	typedef T* matrix_type_ptr;

	a = new matrix_type_ptr[n+1];
	for (i=1;i<=n;i++)	a[i] = new T[n+1];
	d = new T[n+1];
	e = new T[n+1];

	for (i=1;i<=n;i++)
		for (j=1;j<=n;j++)
			a[i][j] = CMatrixTemplate<T>::m_Val[i-1][j-1];

	// Algorithm
	// --------------------------------------
	tred2( a, n, d, e);

	tqli(d,e,n,a);

	// In "d" are the eigenvalues
	// In "a" are the eigenvectors as columns:

	// SORT: Build a list of the n index in
	//   ascending order of eigenvalue:
	// --------------------------------------
	indxs.resize(n+1);
	already.resize(n+1, false);

	for (i=1;i<=n;i++)
	{
		size_t		minIndx = std::numeric_limits<size_t>::max();
		for (j=1;j<=n;j++)
			if (!already[j])
			{
				if (minIndx==std::numeric_limits<size_t>::max())		minIndx = j;
				else
					if (d[j]<d[minIndx])	minIndx = j;
			}

		// The i'th sorted element:
		indxs[i] = static_cast<unsigned int> ( minIndx );
		already[minIndx] = true;
	}

	for (i=1;i<=n;i++)
		ASSERT_(already[i]);

	// Copy results to matrices classes
	// --------------------------------------
	Z.setSize(n,n);
	D.setSize(n,n);
	for (i=1;i<=n;i++)
		for (j=1;j<=n;j++)
		{
			Z(i-1,j-1) = a[i][indxs[j]];
			if (i==j)
			{
				if (d[indxs[j]]<0)
						D(i-1,i-1) = -d[indxs[j]];
				else	D(i-1,i-1) = d[indxs[j]];
			}
			else		D(i-1,j-1) = 0;

			// Debug:
			ASSERT_( !(system::isNaN( Z(i-1,j-1) )));
			ASSERT_( !(system::isNaN( D(i-1,i-1) )));
		}

	// Free
	// --------------------------------------
	for (i=1;i<=n;i++)	delete[] a[i];
	delete[]	a;
	delete[]	d;
	delete[]	e;

	// Restore contents (it has to be symmetric):
	for (i=0;i<n;i++)
		for (j=i;j<n;j++)
			CMatrixTemplate<T>::m_Val[i][j] = CMatrixTemplate<T>::m_Val[j][i] = save.get_unsafe(i,j);

	MRPT_TRY_END_WITH_CLEAN_UP( \
		std::cout << "[eigenVectors] The matrix leading to exception is:" << std::endl << save << std::endl; \
		for (i=0;i<n;i++) \
			for (j=i;j<n;j++)  \
				CMatrixTemplate<T>::m_Val[i][j] = CMatrixTemplate<T>::m_Val[j][i] = save.get_unsafe(i,j); \
		);

}

template <class T>
CMatrixTemplateNumeric<T>  CMatrixTemplateNumeric<T>::largestEigenvector(
	T			resolution,
	size_t			maxIterations ,
	int			*out_Iterations,
	float		*out_estimatedResolution ) const
{
	// Apply the iterative Power Method:
	// -------------------------------------
	size_t						i, iter=0, n = CMatrixTemplate<T>::m_Rows;
	CMatrixTemplateNumeric<T>	x,xx;		// The iterative vector
	T							norm,dif;

	// Initially, set to ones for example.
	x.ones(n,1);

	// Iterative loop:
	do
	{
		xx = (*this) * x;

		// Normalize:
		norm = 0;
		for (i=0;i<n;i++)
			norm+= mrpt::utils::square(xx(i,0));
		xx *= static_cast<T>((1.0/::sqrt(norm)));

		// Compute diference between iterations:
		dif = 0;
		for (i=0;i<n;i++)
			dif+=static_cast<T>(( mrpt::utils::square(fabs(xx(i,0)-x(i,0)))));
		dif=static_cast<T>(::sqrt(dif));

		// Set as current estimation:
		x = xx;

		// Iteration counter:
		iter++;

	} while (iter<maxIterations && dif>resolution);

	if (out_Iterations) *out_Iterations=static_cast<int>(iter);
	if (out_estimatedResolution) *out_estimatedResolution=dif;

	// OK:
	return x;
}


/** Save matrix to a text file, compatible with MATLAB text format.
	* \param fileFormat 0(default)=engineering format '%e', 1=Floating number format '%f', 2=Integers '%i'
	* \sa loadFromTextFile, DEBUG_SAVE_MATRIX
	*/
template <class T>
void  CMatrixTemplateNumeric<T>::saveToTextFile(
	const std::string &file,
	TMatrixTextFileFormat fileFormat,
	bool    appendMRPTHeader,
	const std::string &userHeader ) const
{
	MRPT_TRY_START;

	FILE	*f=os::fopen(file.c_str(),"wt");
	if (!f)
		THROW_EXCEPTION_CUSTOM_MSG1("saveToTextFile: Error opening file '%s' for writing a matrix as text.", file.c_str());

	if (!userHeader.empty())
		fprintf(f,"%s",userHeader.c_str() );

	if (appendMRPTHeader)
		fprintf(f,"%% File generated with MRPT %s at %s\n%%-----------------------------------------------------------------\n",
			mrpt::system::MRPT_getVersion().c_str(),
			mrpt::system::dateTimeLocalToString( mrpt::system::now() ).c_str() );

	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
	{
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
		{
			switch(fileFormat)
			{
			case MATRIX_FORMAT_ENG: os::fprintf(f,"%.16e",static_cast<double>(this->operator() (i,j)) ); break;
			case MATRIX_FORMAT_FIXED: os::fprintf(f,"%.16f",static_cast<double>(this->operator() (i,j)) ); break;
			case MATRIX_FORMAT_INT: os::fprintf(f,"%i",static_cast<int>(this->operator() (i,j)) ); break;
			default:
				THROW_EXCEPTION("Unsupported value for the parameter 'fileFormat'!");
			};
			// Separating blank space
			if (j<(CMatrixTemplate<T>::m_Cols-1)) os::fprintf(f," ");
		}
		os::fprintf(f,"\n");
	}
	os::fclose(f);

	MRPT_TRY_END;
}

/** Load matrix from a text file, compatible with MATLAB text format.
  *  Lines starting with '%' or '#' are interpreted as comments and ignored.
  * \sa saveToTextFile
  */
template <class T>
void  CMatrixTemplateNumeric<T>::loadFromTextFile(const std::string &file)
{
	std::ifstream	f(file.c_str());
	if (f.fail()) THROW_EXCEPTION_CUSTOM_MSG1("loadFromTextFile: can't open file:'%s'",file.c_str());

	std::string		str;
	std::vector<double>	fil(512);

	const char	*ptr;
	char		*ptrEnd;
	size_t	i,j;
	size_t	nCols = std::numeric_limits<size_t>::max();
	size_t	nRows = 0;

	CMatrixTemplate<T>::realloc(0,0);

	while ( !f.eof() )
	{
		std::getline(f,str);

		if (str.size() && str[0]!='#' && str[0]!='%')
		{
			// Parse row to floats:
			ptr = str.c_str();

			ptrEnd = NULL;
			i=0;

			// Process each number in this row:
			while ( ptr[0] && ptr!=ptrEnd )
			{
				// Find next number: (non white-space character):
				while (ptr[0] && (ptr[0]==' ' || ptr[0]=='\t' || ptr[0]=='\r' || ptr[0]=='\n'))
					ptr++;

				if (fil.size()<=i)	fil.resize(fil.size()+512);

				// Convert to "double":
				fil[i] = strtod(ptr,&ptrEnd);

				// A valid conversion has been done?
				if (ptr!=ptrEnd)
				{
					i++;	// Yes
					ptr = ptrEnd;
					ptrEnd = NULL;
				}
			}; // end while procesing this row

			if (nCols==std::numeric_limits<size_t>::max())
			{
				// First row:
				nCols = i;
				CMatrixTemplate<T>::realloc(nCols,nCols);
			}
			else
			{
				// Same elements count in each row?
				if (CMatrixTemplate<T>::getColCount()!=i )
					THROW_EXCEPTION("The matrix in the text file must have the same number of elements in each row!");
			}

			// Copy row to matrix:
			for (j=0;j<nCols;j++)
				CMatrixTemplate<T>::m_Val[nRows][j] = static_cast<T>(fil[j]);

			nRows++;
			if (nRows >= CMatrixTemplate<T>::getRowCount() )
				CMatrixTemplate<T>::realloc( nRows+10, nCols );

		} // end if fgets

	} // end while not feof

	if (nRows && nCols)
		CMatrixTemplate<T>::realloc( nRows, nCols );

	// Report error as exception
	if ( !CMatrixTemplate<T>::getRowCount() || !CMatrixTemplate<T>::getColCount() )
		THROW_EXCEPTION("loadFromTextFile: Error loading from text file");
}

template <class T>
void CMatrixTemplateNumeric<T>::laplacian( CMatrixTemplateNumeric<T> &ret ) const
{
	if ( CMatrixTemplate<T>::m_Rows != CMatrixTemplate<T>::m_Cols ) THROW_EXCEPTION( "laplacian: Defined for square matrixes only!");

	size_t							i,j,size;

	size = CMatrixTemplate<T>::m_Rows;

	// Compute the "degree" of each node:
	// -------------------------------------
	std::vector<T> deg(size);

	for (i=0;i<size;i++)
	{
		deg[i] = 0;
		for (j=0;j<size;j++)
				deg[i] += CMatrixTemplate<T>::m_Val[j][i];
	}

	// Compute lapplacian
	//   LAPLACIAN = D - W
	// -----------------------------------
	ret.realloc(size,size);

	for(i=0;i<size;i++)
	{
		ret(i,i) = deg[i] - CMatrixTemplate<T>::m_Val[i][i];

		for(j=i+1;j<size;j++)
		{
			ret(i,j) =
			ret(j,i) = -CMatrixTemplate<T>::m_Val[i][j];
		}
	}
}

/** Computes the SVD (Singular Value Decomposition) of the matrix.
  *  If "this" matrix is named A with dimensions M x N, this method computes: <br>
  *			A = U * W * V' <br>
  * <br>
  *  , where U is a M x N column orthogonal matrix, W is a diagonal matrix
  *  containing the singular values, and V is a NxN matrix. <br>
  * This method returns the U matrix, the N elements in the diagonal of W as a vector,
  *  and the matrix V, NOT TRANSPOSED.
  */
template <class T>
void  CMatrixTemplateNumeric<T>::svd(CMatrixTemplateNumeric<T> &U, std::vector<T> &W,CMatrixTemplateNumeric<T> &V) const
{
	// Algorithm from "Numerical recipes in C"
	// void  CMatrixD::svdcmp(double* a[], int m, int n, double w[], double* v[])
	//
	//  a <-- this,
	//    execute svdcmp
	//  U <-- a
	//  W <-- w
	//  V <-- v
	// -----------------------------------------------

	size_t		i,j;
	T			**a, **v;
	T			*w;
	size_t		m = CMatrixTemplate<T>::getRowCount(),n=CMatrixTemplate<T>::getColCount();

	// Copy the matrix content to "a":
	// --------------------------------------
	typedef T* matrix_type_ptr;

	w = new T[n];
	a = new matrix_type_ptr[m];
	v = new matrix_type_ptr[n];
	for (i=0;i<m;i++)	a[i] = new T[n];
	for (i=0;i<n;i++)	v[i] = new T[n];


	for (i=0;i<m;i++)
		for (j=0;j<n;j++)
			a[i][j] = (*this)(i,j);

	// Algorithm
	// --------------------------------------
	svdcmp(a,m,n,w,v);

	// Copy results to matrices classes
	// --------------------------------------
	U.setSize( m,n );
	W.resize( n );
	V.setSize(n,n);

	for (i=0;i<m;i++)
		for (j=0;j<n;j++)
			U(i,j)= a[i][j];

	for (i=0;i<n;i++)
		W[i] = w[i];

	for (i=0;i<n;i++)
		for (j=0;j<n;j++)
			V(i,j)=v[i][j];

	// Free
	// --------------------------------------
	for (i=0;i<m;i++)	delete[] a[i];
	for (i=0;i<n;i++)	delete[] v[i];
	delete[]	a;
	delete[]	v;
	delete[]	w;
}


/** Multiply 2 matrices and save the result in "this" object.
*/
template <class T>
void CMatrixTemplateNumeric<T>::multiply(const CMatrixTemplateNumeric<T>& m1, const CMatrixTemplateNumeric<T>& m2)
{
	MRPT_TRY_START

	size_t M1R = m1.getRowCount();
	size_t M1C = m1.getColCount();
	size_t M2C = m2.getColCount();

#if defined(_DEBUG) || (MRPT_ALWAYS_CHECKS_DEBUG_MATRICES)
	if (m1.getColCount() != m2.getRowCount())
		THROW_EXCEPTION( "multiply: Inconsistent matrix sizes in multiplication!");
#endif
	// If one of the matrices is me, make a copy:
	if (&m1==this || &m2==this)
	{
		// Save result in a temporal matrix:
		T  *temp= new T[M1R*M2C];
		size_t i;

		T  *ptr = temp;
		for (i=0; i < M1R; i++)
		{
			for (size_t j=0; j < M2C; j++)
			{
				T accum = 0;
				for (size_t k=0; k < M1C; k++)
					accum += m1.get_unsafe(i,k) * m2.get_unsafe(k,j);
				*(ptr++) = accum;
			}
		}

		// Copy from temp:
		//*this = temp;
		setSize(M1R,M2C);
		ptr = temp;
		for (i=0; i < M1R; i++)
			for (size_t j=0; j < M2C; j++)
				set_unsafe(i,j,  *(ptr++) );

		delete[] temp;
	}
	else
	{
		// Work directly over the data:
		setSize( M1R,M2C );

		for (size_t i=0; i < M1R; i++)
		{
			for (size_t j=0; j < M2C; j++)
			{
				T accum = 0;
				for (size_t k=0; k < M1C; k++)
					accum += m1.get_unsafe(i,k) * m2.get_unsafe(k,j);
				set_unsafe(i,j,accum);
			}
		}

	}

	MRPT_TRY_END
}

/** Multiply one matrix by a column vector and save the result in "this" object.
*/
template <class T>
void CMatrixTemplateNumeric<T>::multiply(const CMatrixTemplateNumeric<T>& m1, const CVectorTemplate<T>& m2)
{
	MRPT_TRY_START

	size_t M1R = m1.getRowCount();
	size_t M1C = m1.getColCount();

#if defined(_DEBUG) || (MRPT_ALWAYS_CHECKS_DEBUG_MATRICES)
	if (m1.getColCount() != m2.size())
		THROW_EXCEPTION( "multiply: Inconsistent matrix sizes in multiplication!");
#endif
	// If one of the matrices is me, make a copy:
	if (&m1==this)
	{
		// Save result in a temporal matrix:
		T  *temp= new T[M1R];

		T  *ptr = temp;
		size_t i;
		for (i=0; i < M1R; i++)
		{
			T accum = 0;
			for (size_t k=0; k < M1C; k++)
				accum += m1.get_unsafe(i,k) * m2[k];
			*(ptr++) = accum;
		}

		// Copy from temp:
		//*this = temp;
		setSize(M1R,1);
		ptr = temp;
		for (i=0; i < M1R; i++)
			set_unsafe(i,0,  *(ptr++) );
		delete[] temp;
	}
	else
	{
		// Work directly over the data:
		setSize( M1R,1 );

		for (size_t i=0; i < M1R; i++)
		{
			T accum = 0;
			for (size_t k=0; k < M1C; k++)
				accum += m1.get_unsafe(i,k) * m2[k];
			set_unsafe(i,0,accum);
		}
	}

	MRPT_TRY_END
}

/** Makes this = M1 * M2^T
*/
template <class T>
void CMatrixTemplateNumeric<T>::multiply_ABt(const CMatrixTemplateNumeric<T>& m1, const CMatrixTemplateNumeric<T>& m2)
{
	MRPT_TRY_START

	size_t M1R = m1.getRowCount();
	size_t M1C = m1.getColCount();
	size_t M2C = m2.getRowCount(); //.getColCount();

#if defined(_DEBUG) || (MRPT_ALWAYS_CHECKS_DEBUG_MATRICES)
	if (m1.getColCount() != m2.getColCount()) //getRowCount())
		THROW_EXCEPTION( "multiply: Inconsistent matrix sizes in multiplication!");
#endif
	// If one of the matrices is me, make a copy:
	if (&m1==this || &m2==this)
	{
		// Save result in a temporal matrix:
		T  *temp= new T[M1R*M2C];

		T  *ptr = temp;
		size_t i;
		for (i=0; i < M1R; i++)
		{
			for (size_t j=0; j < M2C; j++)
			{
				T accum = 0;
				for (size_t k=0; k < M1C; k++)
					accum += m1.get_unsafe(i,k) * m2.get_unsafe(j,k); //(k,j);
				*(ptr++) = accum;
			}
		}

		// Copy from temp:
		//*this = temp;
		setSize(M1R,M2C);
		ptr = temp;
		for (i=0; i < M1R; i++)
			for (size_t j=0; j < M2C; j++)
				set_unsafe(i,j,  *(ptr++) );

		delete[] temp;
	}
	else
	{
		// Work directly over the data:
		setSize( M1R,M2C );

		for (size_t i=0; i < M1R; i++)
		{
			for (size_t j=0; j < M2C; j++)
			{
				T accum = 0;
				for (size_t k=0; k < M1C; k++)
					accum += m1.get_unsafe(i,k) * m2.get_unsafe(j,k); //(k,j);
				set_unsafe(i,j,accum);
			}
		}
	}

	MRPT_TRY_END
}


/** Makes this = M1 * M1^T, taking into account that the result is symmetric and only half the computations must be done.
  */
template <class T>
void CMatrixTemplateNumeric<T>::multiply_AAt( const CMatrixTemplateNumeric<T>& m1 )
{
	MRPT_TRY_START

	const size_t M1R = m1.getRowCount();
	const size_t M1C = m1.getColCount();
	const size_t M2C = m1.getRowCount();

	// If m1 is me, make a copy:
	if (&m1==this)
	{
		// Save result in a temporal matrix:
		T  *temp= new T[M1R*M2C];

		T  *ptr = temp;
		size_t i;
		for (i=0; i < M1R; i++)
		{
			for (size_t j=i; j < M2C; j++)
			{
				T accum = 0;
				for (size_t k=0; k < M1C; k++)
					accum += m1.get_unsafe(i,k) * m1.get_unsafe(j,k);
				*(ptr++) = accum;
			}
		}

		// Copy from temp:
		setSize(M1R,M2C);
		ptr = temp;
		for (i=0; i < M1R; i++)
			for (size_t j=i; j < M2C; j++)
			{
				set_unsafe(i,j,  * ptr );
				set_unsafe(j,i,  *(ptr++) );
			}
		delete[] temp;
	}
	else
	{
		// Work directly over the data:
		setSize( M1R,M2C );

		for (size_t i=0; i < M1R; i++)
		{
			for (size_t j=i; j < M2C; j++)
			{
				T accum = 0;
				for (size_t k=0; k < M1C; k++)
					accum += m1.get_unsafe(i,k) * m1.get_unsafe(j,k);
				set_unsafe(i,j,accum);
				set_unsafe(j,i,accum);
			}
		}
	}

	MRPT_TRY_END
}


/** Computes the sqrt of each element in the matrix, replacing current values;
	*/
template <class T>
CMatrixTemplateNumeric<T>&  CMatrixTemplateNumeric<T>::Sqrt()
{
	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
	{
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
		{
		if (CMatrixTemplate<T>::m_Val[i][j]>0)
			CMatrixTemplate<T>::m_Val[i][j] = static_cast<T>( ::sqrt(CMatrixTemplate<T>::m_Val[i][j]));
		else	CMatrixTemplate<T>::m_Val[i][j] = 0;
		}
	}
	return *this;
}

/** Computes the absolute value of each element in the matrix, replacing current values.
	*/
template <class T>
CMatrixTemplateNumeric<T>&  CMatrixTemplateNumeric<T>::Abs()
{
	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
			CMatrixTemplate<T>::m_Val[i][j] = static_cast<T>( ::fabs(CMatrixTemplate<T>::m_Val[i][j]));
	return *this;
}


/** Computes the square of each element in the matrix, replacing current values.
  * \return A reference to THIS object.
  */
template <class T>
CMatrixTemplateNumeric<T>&  CMatrixTemplateNumeric<T>::Square()
{
	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
			CMatrixTemplate<T>::m_Val[i][j] = square(CMatrixTemplate<T>::m_Val[i][j]);
	return *this;
}

/** Unary operator
*/
template <class T>
CMatrixTemplateNumeric<T> CMatrixTemplateNumeric<T>::operator + ()
{
   return (*this);
}

/** Unary operator
*/
template <class T>
CMatrixTemplateNumeric<T>  CMatrixTemplateNumeric<T>::operator - ()
{
	CMatrixTemplateNumeric<T>	temp(CMatrixTemplate<T>::m_Rows,CMatrixTemplate<T>::m_Cols);

	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
			temp.m_Val[i][j] = - CMatrixTemplate<T>::m_Val[i][j];

	return temp;
}

/** combined addition and assignment operator
*/
template<class T>
CMatrixTemplateNumeric<T>&  CMatrixTemplateNumeric<T>::operator += (const CMatrixTemplateNumeric<T>& m)
{
#if defined(_DEBUG) || (MRPT_ALWAYS_CHECKS_DEBUG_MATRICES)
	if (CMatrixTemplate<T>::m_Rows != m.m_Rows || CMatrixTemplate<T>::m_Cols != m.m_Cols)
		THROW_EXCEPTION( "operator+= : Inconsistent matrix sizes in addition!");
#endif

	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
			CMatrixTemplate<T>::m_Val[i][j] += m.m_Val[i][j];
	return *this;
}

/** Add to this matrix the transpose of A.
*/
template <class T>
CMatrixTemplateNumeric<T>&  CMatrixTemplateNumeric<T>::addAt(const CMatrixTemplateNumeric<T>& m)
{
#if defined(_DEBUG) || (MRPT_ALWAYS_CHECKS_DEBUG_MATRICES)
	if (CMatrixTemplate<T>::m_Rows != m.m_Cols || CMatrixTemplate<T>::m_Cols != m.m_Rows)
		THROW_EXCEPTION( "Inconsistent matrix sizes in addition!");
#endif

	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
			CMatrixTemplate<T>::m_Val[i][j] += m.m_Val[j][i];
	return *this;
}

/** Add to this matrix A and its transpose (this = this + A + At)
*/
template <class T>
CMatrixTemplateNumeric<T>&  CMatrixTemplateNumeric<T>::addAAt(const CMatrixTemplateNumeric<T>& m)
{
#if defined(_DEBUG) || (MRPT_ALWAYS_CHECKS_DEBUG_MATRICES)
	if (CMatrixTemplate<T>::m_Rows != CMatrixTemplate<T>::m_Cols || m.m_Cols!=m.m_Rows || CMatrixTemplate<T>::m_Cols != m.m_Rows)
		THROW_EXCEPTION( "Inconsistent matrix sizes in addition!");
#endif
	const size_t N = CMatrixTemplate<T>::m_Rows;

	for (size_t i=0; i < N; i++)
		for (size_t j=i; j < N; j++)
			CMatrixTemplate<T>::m_Val[i][j] += m.m_Val[j][i] + m.m_Val[i][j];
	return *this;
}


/** combined subtraction and assignment operator
*/
template <class T>
CMatrixTemplateNumeric<T>&  CMatrixTemplateNumeric<T>::operator -= (const CMatrixTemplateNumeric<T>& m)
{
#if defined(_DEBUG) || (MRPT_ALWAYS_CHECKS_DEBUG_MATRICES)
	if (CMatrixTemplate<T>::m_Rows != m.m_Rows || CMatrixTemplate<T>::m_Cols != m.m_Cols)
		THROW_EXCEPTION( "operator+= : Inconsistent matrix sizes in addition!");
#endif
	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
			CMatrixTemplate<T>::m_Val[i][j] -= m.m_Val[i][j];
	return *this;
}


/** combined scalar multiplication and assignment operator
*/
template <class T>
CMatrixTemplateNumeric<T>&  CMatrixTemplateNumeric<T>::operator *= (const T& c)
{
	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
			CMatrixTemplate<T>::m_Val[i][j] *= c;
	return *this;
}

/** combined matrix multiplication and assignment operator
*/
template <class T>
CMatrixTemplateNumeric<T>&  CMatrixTemplateNumeric<T>::operator *= (const CMatrixTemplateNumeric<T>& m)
{
#if defined(_DEBUG) || (MRPT_ALWAYS_CHECKS_DEBUG_MATRICES)
	if (CMatrixTemplate<T>::m_Cols != m.m_Rows)
		THROW_EXCEPTION( "operator*= : Inconsistent matrix sizes in multiplication!");
#endif
	CMatrixTemplateNumeric<T>	temp(CMatrixTemplate<T>::m_Rows,m.m_Cols);

	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < m.m_Cols; j++)
		{
			temp.m_Val[i][j] = 0;
			for (size_t k=0; k < CMatrixTemplate<T>::m_Cols; k++)
			temp.m_Val[i][j] += CMatrixTemplate<T>::m_Val[i][k] * m.m_Val[k][j];
		}
	*this = temp;
	return *this;
}




/** Makes this = M1^T * M1, taking into account that the result is symmetric and only half the computations must be done.
  */
template <class T>
void CMatrixTemplateNumeric<T>::multiply_AtA( const CMatrixTemplateNumeric<T>& m1 )
{
	MRPT_TRY_START

	const size_t M1R = m1.getColCount();
	const size_t M1C = m1.getRowCount();
	const size_t M2C = m1.getColCount();

	// If m1 is me, make a copy:
	if (&m1==this)
	{
		// Save result in a temporal matrix:
		T  *temp= new T[M1R*M2C];

		T  *ptr = temp;
		size_t i;
		for (i=0; i < M1R; i++)
		{
			for (size_t j=i; j < M2C; j++)
			{
				T accum = 0;
				for (size_t k=0; k < M1C; k++)
					accum += m1.get_unsafe(k,i) * m1.get_unsafe(k,j);
				*(ptr++) = accum;
			}
		}

		// Copy from temp:
		setSize(M1R,M2C);
		ptr = temp;
		for (i=0; i < M1R; i++)
			for (size_t j=i; j < M2C; j++)
			{
				set_unsafe(i,j,  * ptr );
				set_unsafe(j,i,  *(ptr++) );
			}
		delete[] temp;
	}
	else
	{
		// Work directly over the data:
		setSize( M1R,M2C );

		for (size_t i=0; i < M1R; i++)
		{
			for (size_t j=i; j < M2C; j++)
			{
				T accum = 0;
				for (size_t k=0; k < M1C; k++)
					accum += m1.get_unsafe(k,i) * m1.get_unsafe(k,j);
				set_unsafe(i,j,accum);
				set_unsafe(j,i,accum);
			}
		}
	}

	MRPT_TRY_END
}

/** Computes the vector v = this * a, where "a" is a column vector of the appropriate length.
  */
template <class T>
void CMatrixTemplateNumeric<T>::multiply_Ab( const std::vector<T>& a, std::vector<T>& out_v )
{
	MRPT_TRY_START
	// This matrix is    NxM
	//  the vector a is  Mx1
	//  the output v is  Nx1
	const size_t N = CMatrixTemplate<T>::getRowCount();
	const size_t M = CMatrixTemplate<T>::getColCount();

	ASSERT_( a.size() == M )
	out_v.resize(N);

	T 								accum;
	typename std::vector<T>::const_iterator 	a_it;
	typename std::vector<T>::iterator 		v_it;
	size_t  						i,j;

	for (i=0, v_it=out_v.begin(); i < N; i++)
	{
		accum = 0;
		for (j=0, a_it=a.begin(); j < M; j++)
			accum += *a_it++ * CMatrixTemplate<T>::get_unsafe(i,j);

		*v_it++ = accum;
	}

	MRPT_TRY_END
}

/** Computes the vector v = this^T * a, where "a" is a column vector of the appropriate length.
  */
template <class T>
void CMatrixTemplateNumeric<T>::multiply_Atb( const std::vector<T>& a, std::vector<T>& out_v )
{
	MRPT_TRY_START
	// This matrix is    NxM^T -> MxN
	//  the vector a is  Nx1
	//  the output v is  Mx1
	const size_t N = CMatrixTemplate<T>::getRowCount();
	const size_t M = CMatrixTemplate<T>::getColCount();

	ASSERT_( a.size() == N )
	out_v.resize(M);

	T 								accum;
	typename std::vector<T>::const_iterator 	a_it;
	typename std::vector<T>::iterator 		v_it;
	size_t  						i,j;

	for (i=0, v_it=out_v.begin(); i < M; i++)
	{
		accum = 0;
		for (j=0, a_it=a.begin(); j < N; j++)
			accum += *a_it++ * CMatrixTemplate<T>::get_unsafe(j,i);

		*v_it++ = accum;
	}

	MRPT_TRY_END
}



/** Multiply 2 matrices and save the result in "this" object, for the cases in which we know in advance that the result will be a symmetrical matrix (DO NOT USE OTHERWISE!!).
*/
template <class T>
void CMatrixTemplateNumeric<T>::multiply_result_is_symmetric(const CMatrixTemplateNumeric<T>& m1, const CMatrixTemplateNumeric<T>& m2)
{
	MRPT_TRY_START

	size_t M1R = m1.getRowCount();
	size_t M1C = m1.getColCount();
	size_t M2C = m2.getColCount();

#if defined(_DEBUG) || (MRPT_ALWAYS_CHECKS_DEBUG_MATRICES)
	if (m1.getColCount() != m2.getRowCount())
		THROW_EXCEPTION( "multiply: Inconsistent matrix sizes in multiplication!");
#endif
	// If one of the matrices is me, make a copy:
	if (&m1==this || &m2==this)
	{
		// Save result in a temporal matrix:
		T  *temp= new T[M1R*M2C];

		T  *ptr = temp;
		size_t i;
		for (i=0; i < M1R; i++)
		{
			for (size_t j=i; j < M2C; j++)
			{
				T accum = 0;
				for (size_t k=0; k < M1C; k++)
					accum += m1.get_unsafe(i,k) * m2.get_unsafe(k,j);
				*(ptr++) = accum;
			}
		}

		// Copy from temp:
		//*this = temp;
		setSize(M1R,M2C);
		ptr = temp;
		for (i=0; i < M1R; i++)
			for (size_t j=i; j < M1C; j++)
			{
				set_unsafe(i,j,  *ptr );
				set_unsafe(j,i,  *ptr );
				ptr++;
			}

		delete[] temp;
	}
	else
	{
		// Work directly over the data:
		setSize( M1R,M2C );

		size_t i;

		for (i=0; i < M1R; i++)
		{
			for (size_t j=i; j < M2C; j++)
			{
				T accum = 0;
				for (size_t k=0; k < M1C; k++)
					accum += m1.get_unsafe(i,k) * m2.get_unsafe(k,j);
				 set_unsafe(i,j,accum);
			}
		}
		for (i=0; i < M1R; i++)
			for (size_t j=0; j<i; j++)
				CMatrixTemplate<T>::set_unsafe(j,i, CMatrixTemplate<T>::get_unsafe(i,j));
	}

	MRPT_TRY_END
}


/** Matrix multiplication of this matrix with a submatrix of 'A', saving the result in a third matrix.
  *   OUT = THIS * A
  */
template <class T>
void CMatrixTemplateNumeric<T>::multiplySubMatrix (
	const CMatrixTemplateNumeric<T> &A,
	CMatrixTemplateNumeric<T>       &outResult,
	const size_t                    &A_cols_offset,
	const size_t                    &A_rows_offset,
	const size_t                    &A_col_count )
{
	MRPT_TRY_START

	// The output will be NxM:
	size_t  N = CMatrixTemplate<T>::m_Rows;
	size_t  M = A_col_count;

#if defined(_DEBUG) || (MRPT_ALWAYS_CHECKS_DEBUG_MATRICES)
	ASSERT_( A.m_Cols >= A_col_count + A_cols_offset );
	ASSERT_( A.m_Rows >= N + A_rows_offset );
#endif
	outResult.setSize(N,M);

	for (size_t i=0; i < N; i++)
	{
		for (size_t j=0; j < M; j++)
		{
			T tmp = 0;
			for (size_t k=0; k < CMatrixTemplate<T>::m_Cols; k++)
				tmp += CMatrixTemplate<T>::m_Val[i][k] * A.m_Val[k+A_rows_offset][j+A_cols_offset];
			outResult.m_Val[i][j] = tmp;
		}
	}
	MRPT_TRY_END
}

/** Dot division by another matrix (element by element division)
  */
template <class T>
CMatrixTemplateNumeric<T>&  CMatrixTemplateNumeric<T>::operator /= (const CMatrixTemplateNumeric<T>& m)
{
#if defined(_DEBUG) || (MRPT_ALWAYS_CHECKS_DEBUG_MATRICES)
	if (CMatrixTemplate<T>::m_Cols != m.m_Cols || CMatrixTemplate<T>::m_Rows != m.m_Rows )
		THROW_EXCEPTION( "operator/= : Matrixes are not of the same dimensions!");
#endif
	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < m.m_Cols; j++)
			if (m.m_Val[i][j]!=0)
			{
				CMatrixTemplate<T>::m_Val[i][j] /= m.m_Val[i][j];
			}
	return *this;
}


/** combined scalar division and assignment operator
*/
template <class T>
CMatrixTemplateNumeric<T>& CMatrixTemplateNumeric<T>::operator /= (const T& c)
{
	MRPT_TRY_START;
	if (c==0) THROW_EXCEPTION("Aborted! Trying to divide by zero!");

	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
			CMatrixTemplate<T>::m_Val[i][j] /= c;
	return *this;

	MRPT_TRY_END;
}

/** Combined scalar addition and assignment operator
*/
template <class T>
CMatrixTemplateNumeric<T>& CMatrixTemplateNumeric<T>::operator += (const T& c)
{
	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
			CMatrixTemplate<T>::m_Val[i][j] += c;
	return *this;
}

/** Combined scalar substraction and assignment operator
*/
template <class T>
CMatrixTemplateNumeric<T>& CMatrixTemplateNumeric<T>::operator -= (const T& c)
{
	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
			CMatrixTemplate<T>::m_Val[i][j] -= c;
	return *this;
}

/** combined power and assignment operator
*/
template <class T>
CMatrixTemplateNumeric<T>& CMatrixTemplateNumeric<T>:: operator ^= (const unsigned int& pow)
{
	CMatrixTemplateNumeric<T> temp(*this);

	for (size_t i=2; i <= pow; i++)
		*this = *this * temp;

	return *this;
}

/** Scalar power of all elements to a given power, this is diferent of ^ operator.
	*/
template <class T>
void CMatrixTemplateNumeric<T>::scalarPow(T s)
{
	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
			CMatrixTemplate<T>::m_Val[i][j] = pow(CMatrixTemplate<T>::m_Val[i][j],s);
}

/** Set all elements to zero
*/
template <class T>
void CMatrixTemplateNumeric<T>::zeros(const size_t& row, const size_t& col)
{
   setSize(row,col);
   zeros();
}

/** Set all elements to zero
*/
template <class T>
void CMatrixTemplateNumeric<T>::zeros()
{
	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
			CMatrixTemplate<T>::m_Val[i][j] = 0;
}

/** Set all elements to one
*/
template <class T>
void CMatrixTemplateNumeric<T>::ones(const size_t& row, const size_t& col)
{
   setSize(row,col);
   ones();
}

/** Set all elements to one
*/
template <class T>
void CMatrixTemplateNumeric<T>::ones()
{
	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
			CMatrixTemplate<T>::m_Val[i][j] = 1;
}

/** Build an unit matrix.
*/
template <class T>
void CMatrixTemplateNumeric<T>::unit (const size_t& row)
{
   setSize(row,row);
   unit();
}

/** Build an unit matrix.
*/
template <class T>
void CMatrixTemplateNumeric<T>::unit()
{
	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
			CMatrixTemplate<T>::m_Val[i][j] = (i==j) ? 1 : 0;
}

/** Solve the matrix as linear equations system.
*/
template <class T>
CMatrixTemplateNumeric<T> CMatrixTemplateNumeric<T>::solve (const CMatrixTemplateNumeric<T>& v)
{
	size_t	i,j,k;
	T				a1;

	if (!(CMatrixTemplate<T>::m_Rows == CMatrixTemplate<T>::m_Cols && CMatrixTemplate<T>::m_Cols == v.m_Rows))
		THROW_EXCEPTION( "solve:Inconsistent matrices!");

	CMatrixTemplateNumeric<T>	temp(CMatrixTemplate<T>::m_Rows,CMatrixTemplate<T>::m_Cols+v.m_Cols);
	for (i=0; i < CMatrixTemplate<T>::m_Rows; i++)
	{
		for (j=0; j < CMatrixTemplate<T>::m_Cols; j++)
			temp.m_Val[i][j] = CMatrixTemplate<T>::m_Val[i][j];
		for (k=0; k < v.m_Cols; k++)
			temp.m_Val[i][CMatrixTemplate<T>::m_Cols+k] = v.m_Val[i][k];
	}
	for (k=0; k < CMatrixTemplate<T>::m_Rows; k++)
	{
		int indx = temp.pivot(k);
		if (indx == -1)
		{
			std::cout << "[solve] Matrix that leaded to error is:" << std::endl << (*this) << std::endl;
			THROW_EXCEPTION( "solve: Singular matrix!");
		}

		a1 = temp.m_Val[k][k];
		for (j=k; j < temp.m_Cols; j++)
			temp.m_Val[k][j] /= a1;

		for (i=k+1; i < CMatrixTemplate<T>::m_Rows; i++)
		{
			a1 = temp.m_Val[i][k];
			for (j=k; j < temp.m_Cols; j++)
				temp.m_Val[i][j] -= a1 * temp.m_Val[k][j];
		}
	}

	CMatrixTemplateNumeric<T>	s(v.m_Rows,v.m_Cols);
	for (k=0; k < v.m_Cols; k++)
		for (int m=int(CMatrixTemplate<T>::m_Rows)-1; m >= 0; m--)
		{
			s.m_Val[m][k] = temp.m_Val[m][CMatrixTemplate<T>::m_Cols+k];
			for (j=m+1; j < CMatrixTemplate<T>::m_Cols; j++)
				s.m_Val[m][k] -= temp.m_Val[m][j] * s.m_Val[j][k];
		}
	return s;
}

/** Computes the adjunt of matrix.
*/
template <class T>
CMatrixTemplateNumeric<T> CMatrixTemplateNumeric<T>:: adj() const
{
	if (CMatrixTemplate<T>::m_Rows != CMatrixTemplate<T>::m_Cols)
		THROW_EXCEPTION( "adj: Adjoin of a non-square matrix.");

	CMatrixTemplateNumeric<T>	temp(CMatrixTemplate<T>::m_Rows,CMatrixTemplate<T>::m_Cols);

	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
			temp.m_Val[j][i] = cofact(i,j);
	return temp;
}

/** Computes the Inverse of matrix.
*	Return the inverse of a matrix without modifying the original matrix
*/
template <class T>
CMatrixTemplateNumeric<T> CMatrixTemplateNumeric<T>:: inv() const
{
	size_t	i,j,k;
	T				a1,a2,*rowptr;

	if (CMatrixTemplate<T>::m_Rows != CMatrixTemplate<T>::m_Cols)
		THROW_EXCEPTION( "operator!: Inversion of a non-square matrix");

	CMatrixTemplateNumeric<T>	temp(CMatrixTemplate<T>::m_Rows,CMatrixTemplate<T>::m_Cols);
	CMatrixTemplateNumeric<T>	temp2( *this );

	temp.unit();
	for (k=0; k < CMatrixTemplate<T>::m_Rows; k++)
	{
		int indx = temp2.pivot(k);
		if (indx == -1)
		{
			std::cerr << "[inv] Matrix that leaded to error is (also in 'err_mat.txt'):" << std::endl << (*this) << std::endl;
			saveToTextFile("err_mat.txt");
			THROW_EXCEPTION( "operator!: Inversion of a singular matrix");
		}

		if (indx != 0)
		{
			rowptr = temp.m_Val[k];
			temp.m_Val[k] = temp.m_Val[indx];
			temp.m_Val[indx] = rowptr;
		}
		a1 = temp2.m_Val[k][k];
		for (j=0; j < temp2.m_Rows; j++)
		{
			temp2.m_Val[k][j] /= a1;
			temp.m_Val[k][j] /= a1;
		}
		for (i=0; i < temp2.m_Rows; i++)
		{
			if (i != k)
			{
				a2 = temp2.m_Val[i][k];
				for (j=0; j < temp2.m_Rows; j++)
				{
					temp2.m_Val[i][j] -= a2 * temp2.m_Val[k][j];
					temp.m_Val[i][j] -= a2 * temp.m_Val[k][j];
				}
			}
		}
	}
	return temp;
}

/** Computes the Inverse of matrix, DESTROYING the current matrix and returning the inverse in an user-supplied matrix.
*  By AJOGD/JLBC
*/
template <class T>
void CMatrixTemplateNumeric<T>::inv_fast( CMatrixTemplateNumeric<T> &out_inv )
{
	size_t	i,j,k;
	T				a1,a2,*rowptr;

	if (CMatrixTemplate<T>::m_Rows != CMatrixTemplate<T>::m_Cols)
		THROW_EXCEPTION( "operator!: Inversion of a non-square matrix");

	out_inv.setSize(CMatrixTemplate<T>::m_Rows,CMatrixTemplate<T>::m_Cols);
	out_inv.unit();
	for (k=0; k < CMatrixTemplate<T>::m_Rows; k++)
	{
		int indx = (*this).pivot(k);
		if (indx == -1)
		{
			std::cerr << "[inv] Matrix that leaded to error is:" << std::endl << (*this) << std::endl;
			THROW_EXCEPTION( "operator!: Inversion of a singular matrix");
		}

		if (indx != 0)
		{
			rowptr = out_inv.m_Val[k];
			out_inv.m_Val[k] = out_inv.m_Val[indx];
			out_inv.m_Val[indx] = rowptr;
		}
		a1 = CMatrixTemplate<T>::m_Val[k][k];
		for (j=0; j < CMatrixTemplate<T>::m_Rows; j++)
		{
			CMatrixTemplate<T>::m_Val[k][j] /= a1;
			out_inv.m_Val[k][j] /= a1;
		}
		for (i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		{
			if (i != k)
			{
				a2 = CMatrixTemplate<T>::m_Val[i][k];
				for (j=0; j < CMatrixTemplate<T>::m_Rows; j++)
				{
					CMatrixTemplate<T>::m_Val[i][j] -= a2 * CMatrixTemplate<T>::m_Val[k][j];
					out_inv.m_Val[i][j] -= a2 * out_inv.m_Val[k][j];
				}
			}
		}
	}
}

/** Computes the determinant of matrix.
*/
template <class T>
T CMatrixTemplateNumeric<T>::det() const
{
	size_t	i,j,k;
	T				piv,detVal = T(1);

	if (CMatrixTemplate<T>::m_Rows != CMatrixTemplate<T>::m_Cols)
		THROW_EXCEPTION( "det: Determinant a non-square matrix!");

	CMatrixTemplateNumeric<T>	temp(*this);

	for (k=0; k < CMatrixTemplate<T>::m_Rows; k++)
	{
		int		indx = temp.pivot(k);
		if (indx == -1)
			return 0;
		if (indx != 0)
			detVal = - detVal;
		detVal = detVal * temp.m_Val[k][k];

		for (i=k+1; i < CMatrixTemplate<T>::m_Rows; i++)
		{
			piv = temp.m_Val[i][k] / temp.m_Val[k][k];
			for (j=k+1; j < CMatrixTemplate<T>::m_Rows; j++)
				temp.m_Val[i][j] -= piv * temp.m_Val[k][j];
		}
	}
	return detVal;
}

/** Computes the norm of matrix.
*/
template <class T>
T CMatrixTemplateNumeric<T>::norm() const
{
	T	retVal = 0;

	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
			retVal += CMatrixTemplate<T>::m_Val[i][j] * CMatrixTemplate<T>::m_Val[i][j];
	retVal = ::sqrt( retVal );
	return retVal;
}

/** Computes the cofact.
*/
template <class T>
T CMatrixTemplateNumeric<T>::cofact (size_t row, size_t col) const
{
	size_t i,i1,j,j1;

	if (CMatrixTemplate<T>::m_Rows != CMatrixTemplate<T>::m_Cols)
		THROW_EXCEPTION( "cofact: Cofactor of a non-square matrix!");

	if (row > CMatrixTemplate<T>::m_Rows || col > CMatrixTemplate<T>::m_Cols)
		THROW_EXCEPTION( "cofact: Index out of range!");

	CMatrixTemplateNumeric<T> temp (CMatrixTemplate<T>::m_Rows-1,CMatrixTemplate<T>::m_Cols-1);

	for (i=i1=0; i < CMatrixTemplate<T>::m_Rows; i++)
	{
		if (i == row)
			continue;
		for (j=j1=0; j < CMatrixTemplate<T>::m_Cols; j++)
		{
			if (j == col)
				continue;
			temp.m_Val[i1][j1] = CMatrixTemplate<T>::m_Val[i][j];
			j1++;
		}
		i1++;
	}
	T	cof = temp.det();
	if ((row+col)%2 == 1)
		cof = -cof;

	return cof;
}

/** Computes the cond.
*/
template <class T>
T CMatrixTemplateNumeric<T>::cond()
{
	CMatrixTemplateNumeric<T> the_inv = inv();
	return norm() * the_inv.norm();
}

/** Checks for matrix type
  */
template <class T>
bool CMatrixTemplateNumeric<T>::isSingular() const
{
	if (CMatrixTemplate<T>::m_Rows != CMatrixTemplate<T>::m_Cols)
		return false;
	return (det() == T(0));
}

/** Checks for matrix type
  */
template <class T>
bool CMatrixTemplateNumeric<T>::isDiagonal() const
{
	if (CMatrixTemplate<T>::m_Rows != CMatrixTemplate<T>::m_Cols)
		return false;
	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
			if (i != j && CMatrixTemplate<T>::m_Val[i][j] != T(0))
				return false;
	return true;
}

/** Checks for matrix type
  */
template <class T>
bool CMatrixTemplateNumeric<T>::isScalar() const
{
	if (!isDiagonal())
		return false;
	T	v = CMatrixTemplate<T>::m_Val[0][0];
	for (size_t i=1; i < CMatrixTemplate<T>::m_Rows; i++)
		if (CMatrixTemplate<T>::m_Val[i][i] != v)
			return false;
	return true;
}

/** Checks for matrix type
  */
template <class T>
bool CMatrixTemplateNumeric<T>::isUnit() const
{
	return (isScalar() && CMatrixTemplate<T>::m_Val[0][0] == T(1));
}

/** Checks for matrix type
  */
template <class T>
bool CMatrixTemplateNumeric<T>::isNull() const
{
	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
			if (CMatrixTemplate<T>::m_Val[i][j] != T(0))
				return false;
	return true;
}

/** Checks for matrix type
  */
template <class T>
bool CMatrixTemplateNumeric<T>::isSymmetric() const
{
	if (CMatrixTemplate<T>::m_Rows != CMatrixTemplate<T>::m_Cols)
		return false;
	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
		if (CMatrixTemplate<T>::m_Val[i][j] != CMatrixTemplate<T>::m_Val[j][i])
			return false;
	return true;
}

/** Checks for matrix type
  */
template <class T>
bool CMatrixTemplateNumeric<T>::isSkewSymmetric() const
{
	if (CMatrixTemplate<T>::m_Rows != CMatrixTemplate<T>::m_Cols)
		return false;
	for (size_t i=0; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < CMatrixTemplate<T>::m_Cols; j++)
		if (CMatrixTemplate<T>::m_Val[i][j] != -CMatrixTemplate<T>::m_Val[j][i])
			return false;
	return true;
}

/** Checks for matrix type
  */
template <class T>
bool CMatrixTemplateNumeric<T>::isUpperTriangular() const
{
	if (CMatrixTemplate<T>::m_Rows != CMatrixTemplate<T>::m_Cols)
		return false;
	for (size_t i=1; i < CMatrixTemplate<T>::m_Rows; i++)
		for (size_t j=0; j < i-1; j++)
		if (CMatrixTemplate<T>::m_Val[i][j] != T(0))
			return false;
	return true;
}

/** Checks for matrix type
  */
template <class T>
bool CMatrixTemplateNumeric<T>::isLowerTriangular() const
{
	if (CMatrixTemplate<T>::m_Rows != CMatrixTemplate<T>::m_Cols)
		return false;

	for (size_t j=1; j < CMatrixTemplate<T>::m_Cols; j++)
		for (size_t i=0; i < j-1; i++)
		if (CMatrixTemplate<T>::m_Val[i][j] != T(0))
			return false;

	return true;
}


/** Round towards minus infinity modifying the matrix
  * (by AJOGD @ JAN-2007)
  */
template <class T>
void CMatrixTemplateNumeric<T>::matrix_floor()
{
	for (size_t i=0;i<CMatrixTemplate<T>::m_Rows;i++)
		for (size_t j=0;j<CMatrixTemplate<T>::m_Cols;j++)
			CMatrixTemplate<T>::m_Val[i][j] = floor(CMatrixTemplate<T>::m_Val[i][j]);
}

/** Round towards minus infinity
  * (by AJOGD @ JAN-2007)
  */
template <class T>
void CMatrixTemplateNumeric<T>::matrix_floor(CMatrixTemplateNumeric<T> &out)
{
	out.setSize(CMatrixTemplate<T>::m_Rows,CMatrixTemplate<T>::m_Cols);
	for (size_t i=0;i<CMatrixTemplate<T>::m_Rows;i++)
		for (size_t j=0;j<CMatrixTemplate<T>::m_Cols;j++)
			out(i,j) = floor(CMatrixTemplate<T>::m_Val[i][j]);
}

/** Round towards plus infinity
  * (by AJOGD @ JAN-2007)
  */
template <class T>
void CMatrixTemplateNumeric<T>::matrix_ceil()
{
	for (size_t i=0;i<CMatrixTemplate<T>::m_Rows;i++)
		for (size_t j=0;j<CMatrixTemplate<T>::m_Cols;j++)
			CMatrixTemplate<T>::m_Val[i][j] = ceil(CMatrixTemplate<T>::m_Val[i][j]);
}

/** Finds the maximum value in the matrix, and returns its position.
  * (by AJOGD @ JAN-2007)
  */
template <class T>
void CMatrixTemplateNumeric<T>::find_index_max_value(size_t &umax, size_t &vmax, T &max_val) const
{
	max_val = CMatrixTemplate<T>::get_unsafe(0,0);
	umax = vmax = 0;
	for (size_t i=0;i<CMatrixTemplate<T>::getRowCount();i++)
	{
		for(size_t j=0;j<CMatrixTemplate<T>::getColCount();j++)
		{
			if (max_val<CMatrixTemplate<T>::get_unsafe(i,j))
			{
				max_val=CMatrixTemplate<T>::get_unsafe(i,j);
				umax=j;
				vmax=i;
			}
		}
	}
}

/** Finds the maximum value in the diagonal of the matrix.
  */
template <class T>
T CMatrixTemplateNumeric<T>::maximumDiagonal() const
{
	if (!CMatrixTemplate<T>::getRowCount() || !CMatrixTemplate<T>::getColCount()) return static_cast<T>(0);
	ASSERT_( CMatrixTemplate<T>::getRowCount() == CMatrixTemplate<T>::getColCount() );
	T max_val = (*this)(0,0);
	for (size_t i=0;i<CMatrixTemplate<T>::getRowCount();i++)
		if (max_val<CMatrixTemplate<T>::get_unsafe(i,i))
				max_val=CMatrixTemplate<T>::get_unsafe(i,i);

	return max_val;
}

/** Finds the maximum value in the matrix.
  */
template <class T>
T CMatrixTemplateNumeric<T>::maximum() const
{
	if (!CMatrixTemplate<T>::getRowCount() || !CMatrixTemplate<T>::getColCount()) return static_cast<T>(0);
	T max_val = (*this)(0,0);
	for (size_t i=0;i<CMatrixTemplate<T>::getRowCount();i++)
		for(size_t j=0;j<CMatrixTemplate<T>::getColCount();j++)
			if (max_val<CMatrixTemplate<T>::get_unsafe(i,j))
				max_val=CMatrixTemplate<T>::get_unsafe(i,j);

	return max_val;
}

/** Finds the minimum value in the matrix.
  */
template <class T>
T CMatrixTemplateNumeric<T>::minimum() const
{
	if (!CMatrixTemplate<T>::getRowCount() || !CMatrixTemplate<T>::getColCount()) return static_cast<T>(0);
	T min_val = (*this)(0,0);
	for (size_t i=0;i<CMatrixTemplate<T>::getRowCount();i++)
		for(size_t j=0;j<CMatrixTemplate<T>::getColCount();j++)
			if (min_val>CMatrixTemplate<T>::get_unsafe(i,j))
				min_val =CMatrixTemplate<T>::get_unsafe(i,j);
	return min_val;
}

/** Finds the minimum value in the matrix, and returns its position.
  * (by AJOGD @ JAN-2007)
  */
template <class T>
void CMatrixTemplateNumeric<T>::find_index_min_value(size_t  &umin, size_t  &vmin, T &min_val) const
{
	ASSERT_(CMatrixTemplate<T>::getRowCount()>0 && CMatrixTemplate<T>::getColCount()>0);
	min_val = (*this)(0,0);
	for (size_t i=0;i<CMatrixTemplate<T>::getRowCount();i++)
	{
		for(size_t j=0;j<CMatrixTemplate<T>::getColCount();j++)
		{
			if (min_val>(*this)(i,j))
			{
				min_val=(*this)(i,j);
				umin=j;
				vmin=i;
			}
		}
	}
}

/** Force symmetry in the matrix
  * (by AJOGD @ JAN-2007)
  */
template <class T>
void CMatrixTemplateNumeric<T>::force_symmetry()
{
	if (CMatrixTemplate<T>::m_Rows!=CMatrixTemplate<T>::m_Cols)
		THROW_EXCEPTION("Error in force_symmetry. The matrix is not square");

	for (size_t i=0;i<CMatrixTemplate<T>::m_Rows-1;i++)
		for(size_t j=i+1;j<CMatrixTemplate<T>::m_Cols;j++)
			CMatrixTemplate<T>::set_unsafe(j,i,  CMatrixTemplate<T>::get_unsafe(i,j) );
}


/** Computes a row with the mean values of each column in the matrix.
  * \sa meanAndStdAll
  */
template <class T>
void CMatrixTemplateNumeric<T>::mean( std::vector<T> &outMeanVector ) const
{
	MRPT_TRY_START;

	size_t	nCols = CMatrixTemplate<T>::getColCount();
	size_t	nRows = CMatrixTemplate<T>::getRowCount();
	ASSERT_(nCols!=0);
	ASSERT_(nRows!=0);

	// Compute the mean row:
	outMeanVector.resize(nCols);

	for (size_t c=0;c<nCols;c++)
	{
		T	accum = 0;
		for (size_t r=0;r<nRows;r++) accum += (*this)(r,c);
		outMeanVector[c] = accum / nRows;
	}

	MRPT_TRY_END;
}

/** Computes a row with the mean values of each column in the matrix and the associated vector with the standard deviation of each column.
  * \sa mean,meanAndStdAll
  */
template <class T>
void CMatrixTemplateNumeric<T>::meanAndStd(
	std::vector<T> &outMeanVector,
	std::vector<T> &outStdVector ) const
{
	MRPT_TRY_START;

	size_t	nCols = CMatrixTemplate<T>::getColCount();
	size_t	nRows = CMatrixTemplate<T>::getRowCount();
	ASSERT_(nCols>=1);
	ASSERT_(nRows>=1);

	// Compute the mean row:
	outMeanVector.resize(nCols);

	for (size_t c=0;c<nCols;c++)
	{
		T	accum = 0;
		for (size_t r=0;r<nRows;r++)
			accum += (*this)(r,c);
		outMeanVector[c] = accum / nRows;
	}

	// Compute the STD:
	outStdVector.resize(nCols);
	for (size_t c=0;c<nCols;c++)
	{
		T	accum = 0, thisMean = outMeanVector[c];
		for (size_t r=0;r<nRows;r++)
			accum += mrpt::utils::square( (*this)(r,c) - thisMean );
		if (nRows>1)
				outStdVector[c] = ::sqrt(accum / (nRows-1));
		else	outStdVector[c] = ::sqrt(accum / nRows);
	}

	MRPT_TRY_END;
}

/** Computes the mean and standard deviation of all the elements in the matrix as a whole.
  * \sa mean,meanAndStd
  */
template <class T>
void CMatrixTemplateNumeric<T>::meanAndStdAll(
	T &outMean,
	T &outStd )  const
{
	MRPT_TRY_START;

	size_t	nCols = CMatrixTemplate<T>::getColCount();
	size_t	nRows = CMatrixTemplate<T>::getRowCount();
	size_t	c;
	ASSERT_(nCols!=0);
	ASSERT_(nRows!=0);

	// Compute the mean:
	outMean=0;

	for (c=0;c<nCols;c++)
		for (size_t r=0;r<nRows;r++)
			outMean += (*this)(r,c);
	outMean /= nRows*nCols;

	// Compute the STD:
	outStd=0;
	for (c=0;c<nCols;c++)
		for (size_t r=0;r<nRows;r++)
			outStd += mrpt::utils::square( (*this)(r,c) - outMean );
	outStd = ::sqrt(outStd  / (nRows*nCols) );

	MRPT_TRY_END;
}

template <class T>
void CMatrixTemplateNumeric<T>::asCol(CMatrixTemplateNumeric<T>	&aux) const
{
	aux.setSize(CMatrixTemplate<T>::m_Cols*CMatrixTemplate<T>::m_Rows,1);

	for (size_t i=0;i<CMatrixTemplate<T>::m_Rows;i++)
		for (size_t j=0;j<CMatrixTemplate<T>::m_Cols;j++)
			aux(j+i*CMatrixTemplate<T>::m_Cols,0)=CMatrixTemplate<T>::m_Val[i][j];
}


template <class T>
void CMatrixTemplateNumeric<T>::asRow(CMatrixTemplateNumeric<T>	&aux) const
{
	aux.setSize(1,CMatrixTemplate<T>::m_Cols*CMatrixTemplate<T>::m_Rows);

	for (size_t i=0;i<CMatrixTemplate<T>::m_Rows;i++)
		for (size_t j=0;j<CMatrixTemplate<T>::m_Rows;j++)
			aux(0,j+i*CMatrixTemplate<T>::m_Cols)=CMatrixTemplate<T>::m_Val[i][j];
}


/** Finds elements whose values are a given number of times above (or below) the mean, in 1D Mahalanobis distance.
  *  This returns two lists with the "row" and "column" indexes (i,j) of those elements m[i][j] such as:
  *    m[i][j] > mean(matrix) + stdTimes·std(matrix)
  *  The elements below the threshold
  *    mean(matrix) - stdTimes·std(matrix)
  *  can also be obtained setting "below" to "true".
  */
template <class T>
void CMatrixTemplateNumeric<T>::findElementsPassingMahalanobisThreshold(
	double					stdTimes,
	std::vector<size_t>		&rowIndexes,
	std::vector<size_t>		&colIndexes,
	bool					below ) const
{
	MRPT_TRY_START;

	size_t	nCols = CMatrixTemplate<T>::getColCount();
	size_t	nRows = CMatrixTemplate<T>::getRowCount();

	rowIndexes.clear();
	colIndexes.clear();

	// Find mean and std:
	T	mean,std;
	meanAndStdAll(mean,std);

	// Compute threshold:
	double	thres = mean + stdTimes * std * (below ? (-1):1);

	if (below)
	{
		for (size_t c=0;c<nCols;c++)
			for (size_t r=0;r<nRows;r++)
				if ( (*this)(r,c) < thres )
				{
					rowIndexes.push_back(r);
					colIndexes.push_back(c);
				}
	}
	else
	{
		for (size_t c=0;c<nCols;c++)
			for (size_t r=0;r<nRows;r++)
				if ( (*this)(r,c) > thres )
				{
					rowIndexes.push_back(r);
					colIndexes.push_back(c);
				}
	}

	MRPT_TRY_END;
}

/** Adjusts the range of elements in the matrix such as the minimum and maximum values being those supplied by the user.
  */
template <class T>
void CMatrixTemplateNumeric<T>::adjustRange( T minVal,T maxVal)
{
	MRPT_TRY_START;
	if (!CMatrixTemplate<T>::getRowCount() || !CMatrixTemplate<T>::getColCount()) return;
	T	curMin = minimum();
	T	curMax = maximum();
	T	curRan = curMax-curMin;
	(*this) -= (curMin+minVal);
	if (curRan!=0) (*this) *= (maxVal-minVal)/curRan;
	MRPT_TRY_END;
}

/** Returns the sum of all the elements in the matrix
  * \sa sum
  */
template <class T>
T CMatrixTemplateNumeric<T>::sumAll() const
{
	MRPT_TRY_START;
	size_t	nCols = CMatrixTemplate<T>::getColCount();
	size_t	nRows = CMatrixTemplate<T>::getRowCount();
	T		accum=0;

	for (size_t c=0;c<nCols;c++)
		for (size_t r=0;r<nRows;r++)
			accum += CMatrixTemplate<T>::m_Val[r][c];

	return accum;
	MRPT_TRY_END;
}

/** Returns the sum of a given part of the matrix.
  *  The default value (std::numeric_limits<size_t>::max()) for the last column/row means to sum up to the last column/row.
  * \sa sumAll
  */
template <class T>
T CMatrixTemplateNumeric<T>::sum(
	size_t firstRow ,
	size_t firstCol ,
	size_t lastRow  ,
	size_t lastCol  ) const
{
	MRPT_TRY_START;
	size_t	col_1 = CMatrixTemplate<T>::getColCount();
	size_t	row_1 = CMatrixTemplate<T>::getRowCount();

	if (col_1==0 || row_1==0) return 0;

	col_1--; row_1--; // Limits are inclusive.

	if (lastCol!=std::numeric_limits<size_t>::max())
	{
		ASSERT_(lastCol>= firstCol);
		col_1=lastCol;
	}
	if (lastRow!=std::numeric_limits<size_t>::max())
	{
		ASSERT_(lastRow>= firstRow );
		row_1=lastRow;
	}

	ASSERT_( row_1 < CMatrixTemplate<T>::getRowCount() );
	ASSERT_( col_1 < CMatrixTemplate<T>::getColCount() );

	T accum=0;

	for (size_t c=firstCol;c<=col_1;c++)
		for (size_t r=firstRow;r<=row_1;r++)
			accum += CMatrixTemplate<T>::m_Val[r][c];

	return accum;
	MRPT_TRY_END;
}

/** Computes:  R = H * C * H^t , where H is this matrix.
  *
  */
template <class T>
void CMatrixTemplateNumeric<T>::multiplyByMatrixAndByTransposeNonSymmetric(
	const CMatrixTemplateNumeric<T>		&C,
	CMatrixTemplateNumeric<T>			&R,
	bool								accumOnOutput,
	bool								substractInsteadOfSum
	) const
{
	MRPT_TRY_START;
	ASSERT_( (C.m_Rows == C.m_Cols) && (C.m_Rows == this->CMatrixTemplate<T>::m_Cols) );
	ASSERT_( &C != this );
	ASSERT_( &R != this );
	size_t						N = CMatrixTemplate<T>::m_Rows;
	size_t						M = CMatrixTemplate<T>::m_Cols;
	size_t						i,j,k,l;
	T							sumAccumInner;

	if (accumOnOutput)
			R.setSize(N,N);
	else 	R.zeros(N,N);

	if (substractInsteadOfSum)  // Duplicating code is faster than branchin' within inner loops!
	{
		for (i=0;i<N;i++)
		{
			for (l=0;l<M;l++)
			{
				sumAccumInner = 0;
				for (k=0;k<M;k++)
					sumAccumInner += CMatrixTemplate<T>::m_Val[i][k] * C.m_Val[k][l];
				for (j=0;j<N;j++)
					R.m_Val[i][j] -= sumAccumInner * CMatrixTemplate<T>::m_Val[j][l];
			}
		}
	}
	else
	{
		for (i=0;i<N;i++)
		{
			for (l=0;l<M;l++)
			{
				sumAccumInner = 0;
				for (k=0;k<M;k++)
					sumAccumInner += CMatrixTemplate<T>::m_Val[i][k] * C.m_Val[k][l];
				for (j=0;j<N;j++)
					R.m_Val[i][j] += sumAccumInner * CMatrixTemplate<T>::m_Val[j][l];
			}
		}
	}

	MRPT_TRY_END;
}

/**	Calculate the operation S = ABC where S is this object
* \sa multiplyABCt
* By AJOGD
*/
template <class T>
void CMatrixTemplateNumeric<T>::multiplyABC(
	const CMatrixTemplateNumeric<T>		&A,
	const CMatrixTemplateNumeric<T>		&B,
	const CMatrixTemplateNumeric<T>		&C)
{
	MRPT_TRY_START;
	if ((A.m_Cols != B.m_Rows)||(B.m_Cols != C.m_Rows))
		THROW_EXCEPTION("Wrong Matrix sizes");

	size_t						i,j,k,l;
	T							sumAccumInner;

	(*this).zeros(A.m_Rows,C.m_Cols);

	for (i=0;i<A.m_Rows;i++)
		for (l=0;l<B.m_Cols;l++)
		{
			sumAccumInner = 0;
			for (k=0;k<A.m_Cols;k++)
				sumAccumInner += A.m_Val[i][k] * B.m_Val[k][l];

			for (j=0;j<C.m_Cols;j++)
				CMatrixTemplate<T>::m_Val[i][j] += sumAccumInner * C.m_Val[l][j];
		}

	MRPT_TRY_END;
}

/**	Calculate the operation S = ABCt where S is this object
* \sa multiplyABC
*/
template <class T>
void CMatrixTemplateNumeric<T>::multiplyABCt(
	const CMatrixTemplateNumeric<T>		&A,
	const CMatrixTemplateNumeric<T>		&B,
	const CMatrixTemplateNumeric<T>		&C)
{
	MRPT_TRY_START;
	if ((A.m_Cols != B.m_Rows)||(B.m_Cols != C.m_Cols))
		THROW_EXCEPTION("Wrong Matrix sizes");

	size_t						i,j,k,l;
	T							sumAccumInner;

	zeros(A.m_Rows,C.m_Rows);

	for (i=0;i<A.m_Rows;i++)
		for (l=0;l<B.m_Cols;l++)
		{
			sumAccumInner = 0;
			for (k=0;k<A.m_Cols;k++)
				sumAccumInner += A.m_Val[i][k] * B.m_Val[k][l];

			for (j=0;j<C.m_Rows;j++)
				CMatrixTemplate<T>::m_Val[i][j] += sumAccumInner * C.m_Val[j][l];
		}

	MRPT_TRY_END;
}

/** This executes the operation \f$ \mathbf{R} = \mathbf{H} \mathbf{C} \mathbf{H}^t \f$, where 'this' matrix is \f$ \mathbf{H} \f$ and \f$ \mathbf{C} \f$ is symmetric, in an efficient and numerically stable way.
  *  If 'this' matrix is \f$ N \times M \f$, then \f$ \mathbf{C} \f$ must be \f$ M \times M \f$, and the result matrix \f$ R \f$ will be \f$ N \times N \f$.
  * The result from this method is assured to be symmetric (if \f$ \mathbf{C} \f$ is symmetric), whereas executing:
  \code
	 R = H * C * (~H);
  \endcode
  * may lead to non-symmetric matrixes due to numerical rounding errors. In addition, this method is more efficient that the code above (see the MRPT's code examples on matrixes).
  *
  *  If allowSubMatrixMultiplication=true, the multiplication will be performed with a \f$ M \times M \f$ submatrix of C only, with M being the number of columns of H, and C being possibly larger. This is useful in some SLAM problems.
  *  In this case, an optional offset 'subMatrixOffset' can be supplied such as a submatrix from the diagonal of C is used, starting at a given column and row index (first=0).

  *  If accumResultInOutput=true, the contents of the output matrix will not be cleared, but added to the result of the operations. In this case it must have the correct size
  *   before calling or an exception will be raised since this probably is a bug.
  *
  * \sa multiplyByMatrixAndByTransposeScalar
  */
template <class T>
void CMatrixTemplateNumeric<T>::multiplyByMatrixAndByTranspose(
	const CMatrixTemplateNumeric<T>		&C,
	CMatrixTemplateNumeric<T>			&R,
	bool                                 allowSubMatrixMultiplication,
	size_t                               subMatrixOffset,
	bool                                 accumResultInOutput ) const
{
	MRPT_TRY_START;

	size_t  C_start = allowSubMatrixMultiplication ? subMatrixOffset : 0;

	ASSERT_( C.m_Rows == C.m_Cols);

	if (!allowSubMatrixMultiplication )
	{
		ASSERT_( C.m_Rows == this->CMatrixTemplate<T>::m_Cols );
	}
	else
	{
		ASSERT_( C_start + this->CMatrixTemplate<T>::m_Cols <= C.m_Rows  );
	}

	ASSERT_( &C != this );
	ASSERT_( &R != this );
	size_t						N = CMatrixTemplate<T>::m_Rows;
	size_t						M = CMatrixTemplate<T>::m_Cols;
	size_t						i,j,k,l;
	T							sumAccum,sumAccumInner;

	if ( accumResultInOutput )
	{
		ASSERT_( R.getColCount()==N && R.getRowCount()==N )
	}
	else
	{
		R.zeros(N,N);
	}

	if (N*M>100)
	{
		CMatrixTemplateNumeric<T>	R_(N,M);

		// First compute R_ = this * C:
		for (i=0;i<N;i++)
			for (j=0;j<M;j++)
			{
				sumAccum = 0;
				for (l=0;l<M;l++)
					sumAccum += CMatrixTemplate<T>::m_Val[i][l]*C.m_Val[l+C_start][j+C_start];
				R_.m_Val[i][j] = sumAccum;
			}

		// Now compute R = R_ * (~this):
		for (i=0;i<N;i++)
			for (j=i;j<N;j++)
			{
				sumAccum = R.m_Val[i][j];
				for (l=0;l<M;l++)
					sumAccum += R_.m_Val[i][l] * CMatrixTemplate<T>::m_Val[j][l];
				R.m_Val[i][j] = R.m_Val[j][i] = sumAccum;
			}
	}
	else
	{
		// This method is faster for small matrixes:
		for (i=0;i<N;i++)
		{
			for (j=i;j<N;j++)
			{
				sumAccum = R.m_Val[i][j];
				for (l=0;l<M;l++)
				{
					sumAccumInner = 0;
					for (k=0;k<M;k++)
						sumAccumInner += CMatrixTemplate<T>::m_Val[i][k] * C.m_Val[k+C_start][l+C_start];
					sumAccum += sumAccumInner * CMatrixTemplate<T>::m_Val[j][l];
				}
				R.m_Val[i][j] = R.m_Val[j][i] = sumAccum;
			}
		}
	}

	MRPT_TRY_END;
}

/** An special case of multiplyByMatrixAndByTranspose for the case of the resulting matrix being a scalar (that is, a 1x1 matrix) - This method directly returns this as a scalar avoiding the construction of a 1x1 matrix.
  *  This matrix (H) must be \f$ 1 \times N \f$ or a \f$ N \times 1 \f$ matrix and C must by \f$ N \times N \f$, or an exception will be raised.
  *  Refer to multiplyByMatrixAndByTranspose for more information.
  \code
	 return = ( H * C * (~H) ) (0,0);
  \endcode
  * \sa multiplyByMatrixAndByTranspose
  */
template <class T>
T CMatrixTemplateNumeric<T>::multiplyByMatrixAndByTransposeScalar(
	const CMatrixTemplateNumeric<T>		&C ) const
{
	MRPT_TRY_START;
	ASSERT_( (C.m_Rows == C.m_Cols) );
	ASSERT_( &C != this );
	size_t						N = CMatrixTemplate<T>::m_Rows;
	size_t						M = CMatrixTemplate<T>::m_Cols;
	size_t						k,l;
	T							sumAccum,sumAccumInner;

	if (N==1)
	{
		// This is 1xM matrix, C is a MxM matrix
		ASSERT_( C.m_Rows == M );

		sumAccum = 0;
		for (l=0;l<M;l++)
		{
			sumAccumInner = 0;
			for (k=0;k<M;k++)
				sumAccumInner += CMatrixTemplate<T>::m_Val[0][k] * C.m_Val[k][l];
			sumAccum += sumAccumInner * CMatrixTemplate<T>::m_Val[0][l];
		}
		return sumAccum;

	}
	else
	if (M==1)
	{
		// This is Nx1 matrix, C is a NxN matrix:
		// Assume the user really means (~H)*C*H instead of H*C*(~H):
		ASSERT_( C.m_Rows == N );

		sumAccum = 0;
		for (l=0;l<N;l++)
		{
			sumAccumInner = 0;
			for (k=0;k<N;k++)
				sumAccumInner += CMatrixTemplate<T>::m_Val[k][0] * C.m_Val[k][l];
			sumAccum += sumAccumInner * CMatrixTemplate<T>::m_Val[l][0];
		}
		return sumAccum;
	}
	else THROW_EXCEPTION("This matrix must be a one-column or a one-row matrix!");


	MRPT_TRY_END;
}

template<class T>
size_t CMatrixTemplateNumeric<T>::rank(T eps) const	{
	CMatrixTemplateNumeric<T> aux;
	aux=*this;
	size_t val=0;
	while (aux.m_Rows>0&&aux.m_Cols>0)	{
		size_t row=0;
		if (fabs(aux(0,0))<eps)	{
			for (size_t i=1;i<aux.m_Rows;i++) if (fabs(aux(i,0))>eps)	{
				row=i;
				break;
			}
			if (row==0)	{
				aux.deleteColumn(0);
				continue;
			}
		}
		for (size_t i=0;i<aux.m_Rows;i++)	{
			if (i==row) continue;
			T prop=aux(i,0)/aux(row,0);
			for (size_t j=1;j<aux.m_Cols;j++) aux(i,j)-=prop*aux(row,j);
		}
		val++;
		aux.deleteColumn(0);
		aux.deleteRow(row);
	}
	return val;
}


// Template instantiations:
template class MRPTDLLIMPEXP CMatrixTemplateNumeric<double>;
template class MRPTDLLIMPEXP CMatrixTemplateNumeric<float>;

#ifdef HAVE_LONG_DOUBLE
template class MRPTDLLIMPEXP CMatrixTemplateNumeric<long double>;
#endif
