///
/// This file is part of Rheolef.
///
/// Copyright (C) 2000-2009 Pierre Saramito <Pierre.Saramito@imag.fr>
///
/// Rheolef is free software; you can redistribute it and/or modify
/// it under the terms of the GNU General Public License as published by
/// the Free Software Foundation; either version 2 of the License, or
/// (at your option) any later version.
///
/// Rheolef is distributed in the hope that it will be useful,
/// but WITHOUT ANY WARRANTY; without even the implied warranty of
/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
/// GNU General Public License for more details.
///
/// You should have received a copy of the GNU General Public License
/// along with Rheolef; if not, write to the Free Software
/// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
///
/// =========================================================================
#include "basis_fem_RTk.h"
#include "equispaced.icc"
#include "warburton.icc"
#include "vdm.icc"
#include "arma2csr.icc"
#include "reference_element_aux.icc"

namespace rheolef {
using namespace std;

// =========================================================================
// basis members
// =========================================================================
template<class T>
basis_fem_RTk<T>::~basis_fem_RTk()
{
}
template<class T>
basis_fem_RTk<T>::basis_fem_RTk (
  std::string              name, 
  const basis_option& sopt) 
  : basis_rep<T> (name,sopt),
    _b_pre_kp1(),
    _quad(),
    _bar_c(),
    _bkm1_node_internal_d(),
    _loc_ndof_per_sid()
{
  size_type degree = 0;
  if ((name.length()) >= 2 && (name[0] == 'R') && (name[1] == 'T')) {
    // TODO: check also that name fits "RTk" where is an k integer
    degree = atoi(name.c_str()+2);
  } else if (name.length() > 0) { // missing "RT" !
    error_macro ("invalid polynomial name `"<<name<<"' for the RTk polynomial set");
  } else {
    // empty name : default cstor
    degree = 0;
  }
  // requieres a hierarchical pre basis => Dubiner or monomial
  // - monomial is ill conditionned, but easier to decompose
  // - dubiner: TODO project the tilde_psi basis on it
  // note: the internal dof basis is independent 
  _b_pre_kp1 = basis_raw_basic<T> ("M"+itos(degree+1));
}
template<class T>
void
basis_fem_RTk<T>::_initialize (reference_element hat_K) const
{
  if (base::_hat_node [hat_K.variant()].size () != 0) return; // already initialized
  if (hat_K.dimension() == 0) return; // nothing to do

  // abbrevs
  std::vector<point_basic<T> >& hat_node = base::_hat_node [hat_K.variant()];
  size_type d = hat_K.dimension();
  size_type k = degree();
  size_type variant = hat_K.variant();

  // --------------------------------------------------------------------------
  // 1. nodes for dofs : interpolate v.n on sides + internal quadrature nodes
  // --------------------------------------------------------------------------
  // 1.1. compute nnod
  size_type loc_nnod_sid_tot = 0;
  reference_element hat_sid [5];
  size_type loc_inod_sid_start [5];
  size_type degree_sid [5];
  for (size_type loc_isid = 0, loc_nsid = hat_K.n_subgeo(d-1); loc_isid < loc_nsid; ++loc_isid) {
    size_type loc_nsidvert = hat_K.subgeo_size (d-1, loc_isid);
    switch (loc_nsidvert) {
      case 1: loc_nnod_sid_tot += 1; // 0d, vertex
              hat_sid[loc_nsidvert] = reference_element::p;
  	      degree_sid [loc_nsidvert] = 0;
              loc_inod_sid_start[loc_nsidvert] = 0;
              break;
      case 2: loc_nnod_sid_tot += k+1; // 1d, edge
              hat_sid[loc_nsidvert] = reference_element::e;
  	      degree_sid [loc_nsidvert] = k+2;
              loc_inod_sid_start[loc_nsidvert] = 2;
              break;
      case 3: loc_nnod_sid_tot += (k+1)*(k+2)/2; // 2d, triangle
              hat_sid[loc_nsidvert] = reference_element::t;
  	      degree_sid [loc_nsidvert] = k+3;
              loc_inod_sid_start[loc_nsidvert] = 3*degree_sid[loc_nsidvert];
              break;
      case 4: loc_nnod_sid_tot += sqr(k+1); // 2d, quadrangle
              hat_sid[loc_nsidvert] = reference_element::q;
  	      degree_sid[loc_nsidvert] = k+2;
              loc_inod_sid_start[loc_nsidvert] = 4*degree_sid[loc_nsidvert];
              break;
      default: error_macro ("unexpected side size: " << loc_nsidvert);
    }
  }
  size_type loc_nnod_quad = 0;
  if (k > 0) {
    quadrature_option qopt;
    qopt.set_family (quadrature_option::gauss);
    switch (variant) {
      case reference_element::e:
      case reference_element::t:
      case reference_element::T:
        qopt.set_order  (2*(k-1)); // P_{k-1} internal modes for simplicial elts
        break;
      case reference_element::q:
      case reference_element::P:
      case reference_element::H:
        qopt.set_order  (2*k); // P_{k-1,k,k} internal modes for non-simplicial elts 
        break;
      default: error_macro ("unexpected element variant `"<<variant<<"'");
    }
    _quad = quadrature<T> (qopt);
    loc_nnod_quad = _quad.size(hat_K);
  }
  hat_node.resize (loc_nnod_sid_tot + loc_nnod_quad);
  // 1.2. set inod pointers:
  base::_first_inod [variant].fill (0);
  base::_first_inod [variant][d]   = loc_nnod_sid_tot;
  base::_first_inod [variant][d+1] = hat_node.size();

  // 1.3. insert nodes
  // 1.3.1. nodes on sides
  size_type loc_inod = 0;
  for (size_type loc_isid = 0, loc_nsid = hat_K.n_subgeo(d-1); loc_isid < loc_nsid; ++loc_isid) {
    size_type loc_nsidvert = hat_K.subgeo_size (d-1, loc_isid);
    std::vector<point_basic<T> >  hat_node_sid;
    switch (base::_sopt.get_node()) {
      case basis_option::warburton:
            pointset_lagrange_warburton  (hat_sid[loc_nsidvert], degree_sid[loc_nsidvert], hat_node_sid); break;
      case basis_option::equispaced:
            pointset_lagrange_equispaced (hat_sid[loc_nsidvert], degree_sid[loc_nsidvert], hat_node_sid); break;
      default:
            error_macro ("unsupported node set: " << base::_sopt.get_node_name());
    }
    point loc_vertex [4]; // restricted to point_basic<Float> in reference_element::vertex()
    for (size_type loc_jsidvert = 0; loc_jsidvert < loc_nsidvert; ++loc_jsidvert) {
      size_type loc_jvertex = hat_K.subgeo_local_vertex (d-1, loc_isid, loc_jsidvert);
      loc_vertex[loc_jsidvert] = hat_K.vertex (loc_jvertex); // TODO: avoid copy, since hat_K.vertex() is a ref
    }
    // skip boundary nodes on the side hat_sid: start at loc_inod_sid_start
    for (size_type loc_inod_sid = loc_inod_sid_start[loc_nsidvert], loc_nnod_sid = hat_node_sid.size(); loc_inod_sid < loc_nnod_sid; ++loc_inod_sid) {
      check_macro (loc_inod < hat_node.size(), "invalid loc_inod");
      T xi0 = hat_node_sid [loc_inod_sid][0],
        xi1 = hat_node_sid [loc_inod_sid][1];
      if (loc_nsidvert == 4) { // map from [-1,1]^2 to [0,1]^2
        xi0 = (1+xi0)/2;
        xi1 = (1+xi1)/2;
      }
      for (size_type mu = 0; mu < d; ++mu) {
        hat_node[loc_inod][mu] = loc_vertex [0][mu];
        if (d >= 2) {
          hat_node[loc_inod][mu] += xi0*(loc_vertex[1][mu] - loc_vertex[0][mu]);
        }
        if (d == 3) {
          hat_node[loc_inod][mu] += xi1*(loc_vertex[loc_nsidvert-1][mu] - loc_vertex[0][mu]);
        }
      }	
      ++loc_inod;
    }
  }
  // 1.3.2. insert internal quadrature nodes for integrating exactly p(k-1)*q(k-1) in hat_K, when k>0
  if (k > 0)  {
    for (typename quadrature<T>::const_iterator iter_q = _quad.begin(hat_K),
	  last_q = _quad.end(hat_K); iter_q != last_q; iter_q++) {	
      hat_node[loc_inod] = (*iter_q).x;
      ++loc_inod;
    }
  }
  size_type loc_nnod = hat_node.size();

  // --------------------------------------------------------------------------
  // 2. build a transformation tilde_C for evaluating polynomials from (b_pre_{k+1})^d
  //    from a pre-basis:
  //    tilde_psi = {(p,0), (0,p), p in Bkm1} cup {(x0*p,x1*p), p in bar_Bk}
  // --------------------------------------------------------------------------
  // 2.1. compute ndof and dof pointers:
  base::_first_idof [variant].fill (0);
  switch (variant) {
    case reference_element::p:
      base::_first_idof [variant][1] = 1;
      break;
    case reference_element::e:
      base::_first_idof [variant][1] = 2;
      base::_first_idof [variant][2] = k+2;
      break;
    case reference_element::t:
      base::_first_idof [variant][2] = 3*(k+1);
      base::_first_idof [variant][3] = (k+1)*(k+3);
      break;
    case reference_element::q:
      base::_first_idof [variant][2] = 4*(k+1);
      base::_first_idof [variant][3] = 2*(k+1)*(k+2); // Rav-&981, p. 23
      break;
    case reference_element::T:
      base::_first_idof [variant][3] = 4*(k+1)*(k+2)/2;
      base::_first_idof [variant][4] = 3*k*(k+1)*(k+2)/6 + base::_first_idof [variant][3];
      break;
    case reference_element::P:
      base::_first_idof [variant][3] = 2*(k+1)*(k+2)/2 + 3*sqr(k+1);
      base::_first_idof [variant][4] = sqr(k)*(k+1)/2 + base::_first_idof [variant][3];
      break;       
    case reference_element::H:
      base::_first_idof [variant][3] = 6*sqr(k+1);
      base::_first_idof [variant][4] = 3*k*sqr(k+1) + base::_first_idof [variant][3];
      break;       
    default: error_macro ("unexpected element variant `"<<variant<<"'");
  }
  // 2.2. compute ndof per side, for each side (prism have different sides)
  size_type dim_Pkm1 = (k == 0) ? 0 : reference_element::n_node(hat_K.variant(), k-1);
  size_type dim_Pk   =  reference_element::n_node (hat_K.variant(), k);
  size_type dim_Pkp1 = _b_pre_kp1.size(hat_K);
  std::array<size_type,6>& loc_ndof_per_sid = _loc_ndof_per_sid [hat_K.variant()];
  loc_ndof_per_sid.fill(0);
  size_type loc_ndof_sid_tot = 0;
  for (size_type loc_isid = 0, loc_nsid = hat_K.n_subgeo(d-1); loc_isid < loc_nsid; ++loc_isid) {
    size_type loc_nsidvert = hat_K.subgeo_size (d-1, loc_isid);
    size_type sid_variant = 0;
    switch (loc_nsidvert) {
      case 1: sid_variant = reference_element::p; break;
      case 2: sid_variant = reference_element::e; break;
      case 3: sid_variant = reference_element::t; break;
      case 4: sid_variant = reference_element::q; break;
      default: error_macro ("unexpected side with "<<loc_nsidvert<<" vertices");
    }
    loc_ndof_per_sid [loc_isid] = reference_element::n_node(sid_variant, k);
    loc_ndof_sid_tot += loc_ndof_per_sid [loc_isid];
  }
  size_type loc_ndof = base::_first_idof [variant][d+1];

#ifdef TO_CLEAN
  // check size
  warning_macro ("first_inod(hat_K,d)  ="<<base::first_inod(hat_K,d));
  warning_macro ("first_inod(hat_K,d+1)="<<base::first_inod(hat_K,d+1));
  warning_macro ("dim(P"<<int(k-1)<<")="<<dim_Pkm1);
  warning_macro ("dim(P"<<k<<")="<<dim_Pk);
  warning_macro ("dim(P"<<k+1<<")="<<dim_Pkp1);
  warning_macro ("loc_ndof_sid_tot="<<loc_ndof_sid_tot);
  warning_macro ("size(hat_K)="<<base::size(hat_K));
  warning_macro ("first_idof(hat_K,d-1)="<<base::first_idof(hat_K,d-1));
  warning_macro ("first_idof(hat_K,d)  ="<<base::first_idof(hat_K,d));
  warning_macro ("first_idof(hat_K,d+1)="<<base::first_idof(hat_K,d+1));
  warning_macro ("dim(RT"<<k<<")="<<loc_ndof);
  warning_macro ("dim((P"<<k+1<<")^"<<d<<")="<<d*dim_Pkp1);
  warning_macro ("c_tilde(loc_ndof="<<loc_ndof<<",d*dim(Pkp1)="<<d*dim_Pkp1<<")");
#endif // TO_CLEAN

  check_macro (loc_ndof == base::size(hat_K), "invalid ndof="<<loc_ndof<<" != "<< base::size(hat_K));
  //
  // 2.2. decompose the tilde_psi RTk basis on the basis of (P_{k+1})^d
  //      note: explicit easy decomposition when using monomial basis for (P_{k+1})^d
  arma::Mat<T> tilde_c (loc_ndof, d*dim_Pkp1);
  tilde_c.fill (0);
  // 2.2.1 homogeneous polynoms:
  //     (q,0) and (0,r), q,r in basis(Pk) 
  //     leads to a diag matrix block
  //     since basis(Pk) subset basis(P_{k+1}) and the basis is hierarchical
  trace_macro ("basis(1): (q,0) and (0,r), q,r in basis(Pk)...");
  for (size_type loc_comp_idof = 0; loc_comp_idof < dim_Pk; ++loc_comp_idof) {
    for (size_type mu = 0; mu < d; ++mu) {
      size_type loc_idof  = d*loc_comp_idof + mu;
      size_type loc_jpkp1 = d*loc_comp_idof + mu;
      trace_macro ("loc_idof="<<loc_idof<<", loc_jpkp1="<<loc_jpkp1);
      tilde_c (loc_idof, loc_jpkp1) = 1;
    }
  }
  // 2.2.2 non-homogeneous polynoms
  trace_macro ("basis(2): (x0*p,x1*p), p in basis(P_k)\basis(P_{k-1}) [exactly k degree]");
  std::vector<size_type> inod2ideg_kp1;
  build_inod2ideg (hat_K, k+1, inod2ideg_kp1);
  switch (hat_K.variant()) {
    case reference_element::e: 
    case reference_element::t: 
    case reference_element::T: { 
      // triangle :  (x0*p,x1*p), p in basis(P_k)\basis(P_{k-1}) [exactly k degree] 
      //            =(b_iO,b_i1) in P_{k+1} when b is the hierarchical monomial basis
      //           and i0=ilat2ideg([i+1,k-i  ]), i=0..k
      //               i1=ilat2ideg([i  ,k-i+1])
      std::vector<point_basic<size_type> > power_index;
      make_power_indexes_sorted_by_degrees (hat_K, k, power_index);
      for (size_type loc_ideg = dim_Pkm1, loc_ndeg = power_index.size(); loc_ideg < loc_ndeg; ++loc_ideg) {
        for (size_type mu = 0; mu < d; ++mu) {
          point_basic<size_type> ilat = power_index [loc_ideg];
          size_type loc_idof = d*dim_Pk + (loc_ideg - dim_Pkm1);
          ilat [mu]++; // x_mu*monomial(i,k-i) = x^{i+1}*y^{k-i} when mu=0, etc
          size_type loc_inod_kp1 = ilat2loc_inod (hat_K, k+1, ilat);
          size_type loc_ideg_kp1 = inod2ideg_kp1 [loc_inod_kp1];
          size_type loc_jpkp1d = d*loc_ideg_kp1 + mu;
          trace_macro ("loc_idof="<<loc_idof<<", loc_jpkp1d="<<loc_jpkp1d);
          tilde_c (loc_idof, loc_jpkp1d) = 1;
	}
      }
      break;
    }
    case reference_element::q:
    case reference_element::H: {
      // quadrangle : (x0*p,   0)
      //              (    ,x1*q), p,q in basis(P_k)\basis(P_{k-1}) [exactly k degree] 
      //             =(b_i0,   0) 
      //              (   0,b_i1), b_i0, b_i1 in P_{k+1} when b is the hierarchical monomial basis
      //           and i0=ilat2ideg([i+1,j  ]), i=0..k
      //               i1=ilat2ideg([i  ,j+1])
      size_type sub_variant = (d == 2) ? reference_element::e : reference_element::q;
      reference_element hat_sub (sub_variant); // q=e*e ; H = e*q
      std::vector<point_basic<size_type> > power_index_sub;
      make_power_indexes_sorted_by_degrees (hat_sub, k, power_index_sub);
      for (size_type loc_ideg = 0, loc_ndeg = power_index_sub.size(); loc_ideg < loc_ndeg; ++loc_ideg) {
        for (size_type mu = 0; mu < d; ++mu) {
          point_basic<size_type> ilat_sub = power_index_sub [loc_ideg];
          size_type loc_idof = d*dim_Pk + d*loc_ideg + mu;
          point_basic<size_type> ilat (0,0,0);
          ilat [mu]       = k+1; // (x^{k+1}*p(y), 0) & (0, p(x)*y^{k+1}) with p in Pk(sub) 
          ilat [(mu+1)%d] = ilat_sub[0]; 
          if (d == 3) ilat [(mu+2)%d] = ilat_sub[1]; 
          size_type loc_inod_kp1 = ilat2loc_inod (hat_K, k+1, ilat);
          size_type loc_ideg_kp1 = inod2ideg_kp1 [loc_inod_kp1];
          size_type loc_jpkp1d = d*loc_ideg_kp1 + mu;
          trace_macro ("loc_idof="<<loc_idof<<", loc_jpkp1d="<<loc_jpkp1d);
          tilde_c (loc_idof, loc_jpkp1d) = 1;
        }
      }
      break;
    }
    case reference_element::P:
    default: error_macro ("unexpected element `"<<hat_K.name()<<"'");
  }
#ifdef TO_CLEAN
  cout << setprecision(std::numeric_limits<T>::digits10)
       << "tilde_c=[" << endl << tilde_c <<"]"<<endl
       << "[u,s,vt]=svd(tilde_c)" << endl
       << "id=eye(size(s))" << endl
       << "c=u*id*vt" << endl
    ;
#endif // TO_CLEAN
  // --------------------------------------------------------------------------
  // 3. build a transformation C for evaluating polynomials from (b_pre_{k+1})^d
  //    for a raw-basis:
  //      psi = C*b_pre
  // --------------------------------------------------------------------------
  // get psi raw RTk basis by othogonalizing the tilde_psi RTk basis 
  // note: use SVD
  arma::Mat<T> u  (tilde_c.n_rows, tilde_c.n_rows),
               vt (tilde_c.n_cols, tilde_c.n_cols);
  arma::Col<T> s  (tilde_c.n_rows);
  bool svd_ok = arma::svd (u, s, vt, tilde_c, "dc"); // SVD: tilde_c = u*s1*trans(vt)
  size_type rank_s = 0;
  T eps = std::numeric_limits<T>::epsilon();
  for (size_type loc_idof = 0; loc_idof < s.size(); ++loc_idof) {
    rank_s += (abs(s[loc_idof]) > eps) ? 1 : 0;
  }
  check_macro (rank_s == loc_ndof,
    "invalid polynomial space dimension = " << rank_s << " < loc_ndof = " << loc_ndof);
  arma::Mat<T> id (tilde_c.n_rows, tilde_c.n_cols, arma::fill::eye);
  arma::Mat<T> c = id*trans(vt);

#ifdef TO_CLEAN
  arma::Mat<T> s1 (tilde_c.n_rows, tilde_c.n_cols);
  s1.fill(0);
  for (size_type iloc = 0; iloc < tilde_c.n_rows; iloc++) {
    s1(iloc,iloc) = s(iloc);
  }
  cout << "s1  = ["<< endl << s1 <<"];" << endl
       << "u1  = ["<< endl << u  <<"];"<<endl
       << "vt1 = ["<< endl << vt <<"];"<<endl
       << "id1 = ["<< endl << id <<"];"<<endl
       << "c1  = ["<< endl << c  <<"]"<<endl
       << "err=norm(tilde_c-u*s*vt')"<<endl
       << "err1=norm(tilde_c-u1*s1*vt1')"<<endl
       << "err_c=abs(c-c1)"<<endl
       << "err_svd=max(max(abs(c-c1)))"<<endl
       << "err_u=max(max(abs(u-u1)))"<<endl
       << "err_v=max(max(abs(vt-vt1)))"<<endl
       << "err_s=max(diag(s1)-diag(s))"<<endl
    ;
  T err_svd = arma::norm(tilde_c - u*s1*trans(vt));
  cout << "err_svd = " << err_svd << endl;
#endif // TO_CLEAN

  // -------------------------------------------------------------------------------
  // 4. build a transformation bar_C for evaluating polynomials from (b_pre_{k+1})^d
  //    for the dof-basis:
  //      phi = C*b_pre
  // -------------------------------------------------------------------------------
  // 4.1. defines the quadrature nodes, for integral dofs
  //      and evaluate the basis of P_{k-1} on it
  std::array<arma::Mat<T>,3>& bkm1_node_internal_d = _bkm1_node_internal_d [variant];
  if (k > 0) {
    std::vector<point_basic<T> > hat_node_internal (base::first_inod(hat_K,d+1) - base::first_inod(hat_K,d));
    for (size_type loc_inod_int = 0, loc_nnod_int = hat_node_internal.size(); loc_inod_int < loc_nnod_int; ++loc_inod_int) {
      size_type loc_inod = base::first_inod(hat_K,d) + loc_inod_int;
      hat_node_internal [loc_inod_int] = hat_node [loc_inod];
    }
    string basis_dof_name;
    switch (base::_sopt.get_raw_polynomial()) {
      case basis_option::monomial:  basis_dof_name = "M"; break;
      case basis_option::dubiner:   basis_dof_name = "D"; break;
      case basis_option::bernstein: basis_dof_name = "B"; break;
      default: error_macro ("unsupported raw polynomial basis `"<<base::_sopt.get_raw_polynomial_name()<<"'");
    }
    // bkm1_node_internal(i,j) = bj(xi)
    switch (variant) {
      case reference_element::e: 
      case reference_element::t: 
      case reference_element::T: { 
        // int_K b*v dx with v=[p,0],[0,p] and p in P_{k-1}
        basis_raw_basic<T> b_dof_km1 (basis_dof_name+itos(k-1));
        b_dof_km1.eval (hat_K, hat_node_internal, bkm1_node_internal_d[0]);
        for (size_type mu = 1; mu < d; ++mu) {
          bkm1_node_internal_d [mu] = bkm1_node_internal_d [0];
        }
        break;
      }
      case reference_element::q:
      case reference_element::H: {
        // int_K b*v dx with v=[p,0],[0,q] and p in P_{k-1,k}, q in P_{k,k-1}
        basis_raw_basic<T> b_dof_km1 (basis_dof_name+itos(k-1)),
                           b_dof_k   (basis_dof_name+itos(k));
        std::array<std::vector<point_basic<T> >,3> hat_node_internal_comp;
        std::array<arma::Mat<T>,3>                 bkm1_node_internal_comp,
                                                     bk_node_internal_comp;
        reference_element hat_e (reference_element::e);
        for (size_type mu = 0; mu < d; ++mu) {
          hat_node_internal_comp [mu].resize (hat_node_internal.size());
          for (size_type loc_inod_int = 0, loc_nnod_int = hat_node_internal.size(); loc_inod_int < loc_nnod_int; ++loc_inod_int) {
            // resize from q=[-1,1]^2 to e=[0,1]
            hat_node_internal_comp [mu][loc_inod_int][0] = (1+hat_node_internal [loc_inod_int][mu])/2;
          }
          b_dof_km1.eval (hat_e, hat_node_internal_comp[mu], bkm1_node_internal_comp[mu]);
          b_dof_k  .eval (hat_e, hat_node_internal_comp[mu],   bk_node_internal_comp[mu]);
        }
        size_type loc_ndof_int = bkm1_node_internal_comp[0].n_cols*pow(bk_node_internal_comp[0].n_cols,d-1);
        for (size_type mu = 0; mu < d; ++mu) {
          size_type mu2 = (mu+1)%d;
          size_type mu3 = (mu+2)%d;
          bkm1_node_internal_d [mu].resize (hat_node_internal.size(), loc_ndof_int);
          for (size_type loc_inod_int = 0, loc_nnod_int = hat_node_internal.size(); loc_inod_int < loc_nnod_int; ++loc_inod_int) {
            size_type loc_idof_int = 0;
            if (variant == reference_element::q) {
              for (size_type i = 0; i < bkm1_node_internal_comp [mu].n_cols; ++i) {
              for (size_type j = 0; j <   bk_node_internal_comp [mu].n_cols; ++j) {
                   bkm1_node_internal_d    [mu ] (loc_inod_int,loc_idof_int)
                 = bkm1_node_internal_comp [mu ] (loc_inod_int,i)
                 *   bk_node_internal_comp [mu2] (loc_inod_int,j);
                 ++loc_idof_int;
              }}
            } else {
              for (size_type i = 0; i < bkm1_node_internal_comp [mu].n_cols; ++i) {
              for (size_type j = 0; j <   bk_node_internal_comp [mu].n_cols; ++j) {
              for (size_type k = 0; k <   bk_node_internal_comp [mu].n_cols; ++k) {
                   bkm1_node_internal_d    [mu ] (loc_inod_int,loc_idof_int)
                 = bkm1_node_internal_comp [mu ] (loc_inod_int,i)
                 *   bk_node_internal_comp [mu2] (loc_inod_int,j)
                 *   bk_node_internal_comp [mu3] (loc_inod_int,k);
                 ++loc_idof_int;
              }}}
            }
          }
        }
        break;
      }
      case reference_element::P:
      default: error_macro ("unexpected element `"<<hat_K.name()<<"'");
    }
  }
  // -----------------------------------------
  // 4.2. compute basis of (P_{k+1})^d at all nodes
  // -----------------------------------------
  size_type loc_n_bkp1 = _b_pre_kp1.size (hat_K);
  arma::Mat<T> bkp1_node (loc_nnod, loc_n_bkp1);
  _b_pre_kp1.eval (hat_K, hat_node, bkp1_node);  // bkp1_node(i,j) = bj(xi)
  // vector expansion: bkp1d_node(i,d*j+mu) = [bj(xi),0,0], [0,bj(xi),0], etc
  // -> compute the dofs for all this expansion
  arma::Mat<T> bkp1d_dof (loc_ndof, d*loc_n_bkp1); // dof_i(bjd)
  bkp1d_dof.fill (std::numeric_limits<T>::max());
  std::vector<point_basic<T> > bkp1d_j_node (loc_nnod);
  arma::Col<T>                 bkp1d_j_dof  (loc_ndof);
  for (size_type loc_j_bkp1 = 0; loc_j_bkp1 < loc_n_bkp1; ++loc_j_bkp1) {
    for (size_type mu = 0; mu < d; ++mu) {
      for (size_type loc_inod = 0; loc_inod < loc_nnod; ++loc_inod) {
        bkp1d_j_node [loc_inod]     = point_basic<T>(0,0,0);
        bkp1d_j_node [loc_inod][mu] = bkp1_node(loc_inod,loc_j_bkp1);
      }
      bkp1d_j_dof.fill (std::numeric_limits<T>::max());
      _compute_dofs (hat_K, bkp1d_j_node, bkp1d_j_dof);
      size_type loc_j_bkp1d = d*loc_j_bkp1 + mu;
      check_macro (bkp1d_dof.n_rows == bkp1d_j_dof.size(), "invalid sizes");
      bkp1d_dof (arma::span::all, loc_j_bkp1d) = bkp1d_j_dof;
    }
  }
  // -----------------------------------------
  // 4.3. vdm
  // -----------------------------------------
  // VDM(i,j) = dof_i(phi_j)
  //          = phi_j.n(xi) for side dofs
  //            int phi_j(x)*p(x)*dx, for all p in P(k-1)
  arma::Mat<T>&     vdm = base::_vdm    [hat_K.variant()];
  arma::Mat<T>& inv_vdm = base::_inv_vdm[hat_K.variant()];
  vdm = bkp1d_dof*trans(c); // = trans(c*trans(bkp1d_dof));
  inv_vdm.resize (loc_ndof,loc_ndof);
  arma::Mat<T> identity (loc_ndof,loc_ndof);
  identity.eye();
  bool inv_ok = solve (inv_vdm, vdm, identity);
  check_macro (inv_ok, "unisolvence failed for "
        << base::name() <<"(" << hat_K.name() << ") basis");

  // -----------------------------------------
  // 4.4. final composition matrix: bar_c = trans(inv_vdm)*c
  // -----------------------------------------
  arma::Mat<T>& bar_c = _bar_c [hat_K.variant()];
  bar_c = trans(inv_vdm)*c;
#ifdef TO_CLEAN
  cout << "bkp1d_dof=[" << endl << bkp1d_dof <<"]"<<endl
       << "vdm=[" << endl << vdm <<"]"<<endl
       << "inv_ok=" << inv_ok <<"]"<<endl
       << "det(vdm)=" << arma::det(vdm) <<endl
       << "cond(vdm)=" << arma::cond(vdm) <<endl
       << "bar_c=trans(inv_vdm)*c=[" << endl << bar_c <<"]"<<endl;
#endif // TO_CLEAN
}
// ----------------------------------------------------------------------------
// evaluation of all basis functions at hat_x:
// ----------------------------------------------------------------------------
template<class T>
void
basis_fem_RTk<T>::eval (
    reference_element                 hat_K,
    const point_basic<T>&             hat_x,
    std::vector<value_type>&          value) const
{
  base::_initialize_guard (hat_K);
  size_type d = hat_K.dimension();
  size_type loc_ndof = base::size(hat_K);
  //
  // 1) evaluate the basis of P_{k+1}(hat_K) at hat_x : bkp1(x)
  //
  const arma::Mat<T>& bar_c = _bar_c [hat_K.variant()];
  arma::Col<T>& bkp1 = base::_raw_value [hat_K.variant()];
  _b_pre_kp1.eval (hat_K, hat_x, bkp1);
  value.resize (loc_ndof);
  //
  // 2) evaluate the basis of RTk at hat_x: phi(x)
  //      [phi(x)] = [C2]*[bkp1d(x)]
  //    where bkp1d = basis of (P_{k+1}(hat_K))^d by vectorization
  //
  for (size_type loc_idof = 0; loc_idof < loc_ndof; ++loc_idof) {
    value[loc_idof] = point_basic<T>(0,0,0);
    for (size_type loc_jdof_bkp1 = 0, loc_ndof_bkp1 = bkp1.size(); loc_jdof_bkp1 < loc_ndof_bkp1; ++loc_jdof_bkp1) {
      for (size_type mu = 0; mu < d; ++mu) {
        size_type loc_jdof_bkp1d = d*loc_jdof_bkp1 + mu;
        value[loc_idof][mu] += bar_c(loc_idof,loc_jdof_bkp1d)*bkp1[loc_jdof_bkp1];
      }
    }
  }
}
template<class T>
void
basis_fem_RTk<T>::grad_eval(
    reference_element           hat_K,
    const point_basic<T>&       hat_x,
    std::vector<tensor_basic<T> >& value) const
{
  error_macro ("RT: grad_eval() not yet");
}
// ----------------------------------------------------------------------------
// dofs for a scalar-valued function
// ----------------------------------------------------------------------------
// note: as virtual and template members are not available,
//       the function "f" has been already evaluated on the hat_node[] set
template<class T>
void
basis_fem_RTk<T>::_compute_dofs (
  reference_element              hat_K,
  const std::vector<value_type>& f_xnod,
        arma::Col<T>&            dof) const
{
  base::_initialize_guard (hat_K);
  size_type k = degree();
  size_type d = hat_K.dimension();
  size_type loc_ndof = base::size(hat_K);
  const std::array<size_type,6>& loc_ndof_per_sid = _loc_ndof_per_sid [hat_K.variant()];

  dof.resize (loc_ndof);
  if (d == 0) return;

  // side dofs
  size_type loc_inod = 0;
  size_type loc_idof = 0;
  for (size_type loc_isid = 0, loc_nsid = hat_K.n_subgeo(d-1); loc_isid < loc_nsid; ++loc_isid) {
    point_basic<Float> hat_n;
    hat_K.side_normal (loc_isid, hat_n); // TODO: hat_n double precision -> template T
    for (size_type loc_idof_sid = 0; loc_idof_sid < loc_ndof_per_sid[loc_isid]; ++loc_idof_sid) {
      dof[loc_idof] = dot(f_xnod[loc_inod],hat_n);
      loc_idof++;
      loc_inod++;
    }
  }
  // internal dofs
  if (k == 0) return; // no internal dofs when k==0
  const std::array<arma::Mat<T>,3>& bkm1_node_internal_d = _bkm1_node_internal_d [hat_K.variant()];

  size_type loc_ndof_boundary = loc_idof;
  size_type loc_ndof_int_d    = d*bkm1_node_internal_d[0].n_cols;
  check_macro (loc_ndof == loc_ndof_boundary + loc_ndof_int_d,
	"invalid internal dof count: loc_ndof="<<loc_ndof
	<< ", loc_ndof_boundary="<<loc_ndof_boundary
	<< ", loc_ndof_int_d="<<loc_ndof_int_d);

  size_type loc_inod_int0 = loc_inod;
  for (size_type loc_idof_int = 0, loc_ndof_int = bkm1_node_internal_d[0].n_cols; loc_idof_int < loc_ndof_int; ++loc_idof_int) {
    for (size_type mu = 0; mu < d; ++mu) {
      loc_inod = loc_inod_int0;
      T sum = 0;
      size_type inod_q = 0;
      for (typename quadrature<T>::const_iterator iter_q = _quad.begin(hat_K),
          last_q = _quad.end(hat_K); iter_q != last_q; iter_q++, inod_q++, ++loc_inod) {	
        sum += f_xnod [loc_inod][mu]
              *bkm1_node_internal_d[mu] (inod_q, loc_idof_int)
              *(*iter_q).w;
      }
      check_macro (loc_idof < loc_ndof, "invalid size");
      dof [loc_idof] = sum;
      loc_idof++;
    } 
  }
}
// ----------------------------------------------------------------------------
// instanciation in library
// ----------------------------------------------------------------------------
#define _RHEOLEF_instanciation(T)                                             	\
template class basis_fem_RTk<T>;

_RHEOLEF_instanciation(Float)

}// namespace rheolef
