#include <comp.hpp>

namespace ngcomp
{
  using namespace ngcomp;






  BEMElement :: 
  BEMElement (const FESpace & afes,
	      const FESpace & afes_neumann)
    : fes(afes), fes_neumann(afes_neumann)
  {
    ;
  }


 void BEMElement :: 
 GetDofNrs (ARRAY<int> & dnums) const
 {
   if (dimfem != fes.GetNDof()) 
     const_cast<BEMElement&> (*this).Update ();

   dnums = bem2fem;
 }

  void BEMElement :: Update ()
  {
    int i, j, k;

    const MeshAccess & ma = fes.GetMeshAccess();
    int dimfem = fes.GetNDof();
    int nse = ma.GetNSE();
    
    BitArray bound(dimfem);
    bound.Clear();
    ARRAY<int> dnums;

    for (i = 0; i < nse; i++)
      {
	int index = ma.GetSElIndex (i);
	if (index != 0) continue;
	fes.GetSDofNrs (i, dnums);
	for (j = 0; j < dnums.Size(); j++)
	  bound.Set (dnums[j]);
      }

    bem2fem.SetSize(0);
    for (i = 0; i < dimfem; i++)
      if (bound.Test(i))
	bem2fem.Append (i);

    fem2bem.SetSize(dimfem);
    fem2bem = 0;
    for (i = 0; i < bem2fem.Size(); i++)
      fem2bem[bem2fem[i]] = i;
    
    dimd = bem2fem.Size();
    dimn = fes_neumann.GetNDof();
  }
  


  template <class SCAL>
  S_BEMElement<SCAL> :: 
  S_BEMElement (const FESpace & afes,
		const FESpace & afes_neumann)
    : BEMElement (afes, afes_neumann)
  {
    ;
  }

  template <class SCAL>
  void S_BEMElement<SCAL> ::
  Assemble (FlatMatrix<SCAL> & elmat,
	    LocalHeap & lh) const
  {
    cout << "starting bem assembling" << endl;
    int i;
    const MeshAccess & ma = fes.GetMeshAccess();

    Matrix<SCAL> *singlelayer, *doublelayer, *duality, *hypersing;

    singlelayer = new Matrix<SCAL> (dimn);
    doublelayer = new Matrix<SCAL> (dimn, dimd);
    duality = new Matrix<SCAL> (dimn, dimd);
    hypersing = new Matrix<SCAL> (dimd);

    AssembleIntegralOps (*singlelayer, *doublelayer, *hypersing, *duality, lh);

    cout << "matrix assembling done" << endl;

    /*
    (*testout) << "singlelayer = " << endl << *singlelayer << endl;
    (*testout) << "doublelayer = " << endl << *doublelayer << endl;
    (*testout) << "duality = " << endl << *duality << endl;
    (*testout) << "hypersing = " << endl << *hypersing << endl;
    */

    // Pointcare-Steklov:
    // elmat = hypersing + (double+duality)^T single^{-1}  (double+duality)
    
    Matrix<SCAL> inv(dimn), hh(dimd);
    for (i = 0; i < dimn; i++)
      if ( (*singlelayer)(i,i) == TSCAL(0))
	(*singlelayer)(i,i) = 1e-10;
    
    (*doublelayer) += *duality;
    CalcInverse (*singlelayer, inv);
    *duality = inv * (*doublelayer); 
    hh = Trans (*doublelayer) *  (*duality);
    
    elmat.AssignMemory (dimd, dimd, lh);
    elmat = (*hypersing) + hh;
    
    //    (*testout) << "bem, elmat = " << elmat << endl;
  }


  template <class SCAL>
  void S_LaplaceBEMElement<SCAL>::
  AssembleIntegralOps (FlatMatrix<SCAL> & singlelayer, 
		       FlatMatrix<SCAL> & doublelayer, 
		       FlatMatrix<SCAL> & hypersingular, 
		       FlatMatrix<SCAL> & duality,
		       LocalHeap & lh) const
  {
    int i, j, k, l;

    cout << "assemble laplace" << endl;
    const MeshAccess & ma = this->fes.GetMeshAccess();

    singlelayer = TSCAL(0);
    doublelayer = TSCAL(0);
    duality =     TSCAL(0);
    hypersingular =   TSCAL(0);

    
    ElementTransformation eltransi;
    ElementTransformation eltransj;
    ARRAY<int> dnumsdi, dnumsdj, dnumsni, dnumsnj;
    ARRAY<int> dnumsd;

    for (i = 0; i < ma.GetNSE(); i++)
      for (j = 0; j < ma.GetNSE(); j++)
	{
	  if (ma.GetSElIndex(i) != 0 || ma.GetSElIndex(j) != 0)
	    continue;

	  lh.CleanUp();

	  ma.GetSurfaceElementTransformation (i, eltransi, lh);
	  this->fes.GetSDofNrs (i, dnumsdi);
	  this->fes_neumann.GetSDofNrs (i, dnumsni);

	  ma.GetSurfaceElementTransformation (j, eltransj, lh);
	  this->fes.GetSDofNrs (j, dnumsdj);
	  this->fes_neumann.GetSDofNrs (j, dnumsnj);

	  for (k = 0; k < dnumsdi.Size(); k++)
	    dnumsdi[k] = this->fem2bem[dnumsdi[k]];
	  for (k = 0; k < dnumsdj.Size(); k++)
	    dnumsdj[k] = this->fem2bem[dnumsdj[k]];

	  dnumsd.SetSize(dnumsdi.Size()+dnumsdj.Size());
	  for (k = 0; k < dnumsdi.Size(); k++)
	    dnumsd[k] = dnumsdi[k];
	  for (k = 0; k < dnumsdj.Size(); k++)
	    dnumsd[dnumsdi.Size()+k] = dnumsdj[k];


	  const NodalFiniteElement & fedi = 
	    dynamic_cast<const NodalFiniteElement&> (this->fes.GetSFE(i, lh));
	  const NodalFiniteElement & fedj = 
	    dynamic_cast<const NodalFiniteElement&> (this->fes.GetSFE(j, lh));
	  const NodalFiniteElement & feni = 
	    dynamic_cast<const NodalFiniteElement&> (this->fes_neumann.GetSFE(i, lh));
	  const NodalFiniteElement & fenj = 
	    dynamic_cast<const NodalFiniteElement&> (this->fes_neumann.GetSFE(j, lh));

	  FlatMatrix<SCAL> elsingle, eldouble, elduality, elhyper;

	  
	  AssembleElementMatrix (fedi, fedj, feni, fenj, eltransi, eltransj, 
				 elsingle, eldouble, elduality, elhyper,
				 lh);

	  /*
	  (*testout) << "elsingle = " << elsingle << endl;
	  (*testout) << "eldouble = " << eldouble << endl;
	  (*testout) << "elhyper = " << elhyper << endl;
	  (*testout) << "eldual = " << elduality << endl;
	  */

	  for (k = 0; k < dnumsni.Size(); k++)
	    for (l = 0; l < dnumsnj.Size(); l++)
	      if (dnumsni[k] != -1 && dnumsnj[l] != -1)
		singlelayer(dnumsni[k], dnumsnj[l]) += elsingle(k,l);

	  for (k = 0; k < dnumsni.Size(); k++)
	    for (l = 0; l < dnumsd.Size(); l++)
	      if (dnumsni[k] != -1 && dnumsd[l] != -1)
		doublelayer(dnumsni[k], dnumsd[l]) += eldouble(k,l);

	  for (k = 0; k < dnumsni.Size(); k++)
	    for (l = 0; l < dnumsd.Size(); l++)
	      if (dnumsni[k] != -1 && dnumsd[l] != -1)
		duality(dnumsni[k], dnumsd[l]) += elduality(k,l);

	  for (k = 0; k < dnumsd.Size(); k++)
	    for (l = 0; l < dnumsd.Size(); l++)
	      if (dnumsd[k] != -1 && dnumsd[l] != -1)
		hypersingular(dnumsd[k], dnumsd[l]) += elhyper(k,l);
	}
    

  }



  template <class SCAL>
  void S_LaplaceBEMElement<SCAL>::
  AssembleElementMatrix (const NodalFiniteElement & fed1, 
			 const NodalFiniteElement & fed2, 
			 const NodalFiniteElement & fen1, 
			 const NodalFiniteElement & fen2, 
			 const ElementTransformation & eltrans1, 
			 const ElementTransformation & eltrans2, 
			 FlatMatrix<SCAL> & elsingle, 
			 FlatMatrix<SCAL> & eldouble, 
			 FlatMatrix<SCAL> & elduality, 
			 FlatMatrix<SCAL> & elhyper, 
			 LocalHeap & lh) const
  {
    //    (*testout) << "call element matrix" << endl;
    int order1, order2;

    if (eltrans1.GetElementNr() == eltrans2.GetElementNr())
      {
	order1 = 31;
	order2 = 33;
      }
    else
      {
	order1 = order2 = 5;
      }
    
    int ndofd1, ndofd2, ndofn1, ndofn2;
    int i, j, k, l;
    double det, fac;

    ndofd1 = fed1.GetNDof ();
    ndofd2 = fed2.GetNDof ();
    ndofn1 = fen1.GetNDof ();
    ndofn2 = fen2.GetNDof ();


    elsingle.AssignMemory (ndofn1, ndofn2, lh);
    eldouble.AssignMemory (ndofn1, ndofd1+ndofd2, lh);
    elduality.AssignMemory (ndofn1, ndofd1+ndofd2, lh);
    elhyper.AssignMemory (ndofd1+ndofd2, ndofd1+ndofd2, lh);

    elsingle = 0.0;
    eldouble = 0.0;
    elduality = 0.0;
    elhyper = 0.0;

    const IntegrationRule & ir1 = 
      GetIntegrationRules().SelectIntegrationRule (fed1.ElementType(), order1);
    const IntegrationRule & ir2 = 
      GetIntegrationRules().SelectIntegrationRule (fed2.ElementType(), order2);

    FlatVector<SCAL> shaped(ndofd1+ndofd2,lh);
    
    for (i = 0; i < ir1.GetNIP(); i++)
      for (j = 0; j < ir2.GetNIP(); j++)
	{
	  void * heapp;
	  heapp = lh.GetPointer();
	  
	  SpecificIntegrationPoint<1,2> sip1 (ir1[i], eltrans1, lh);
	  SpecificIntegrationPoint<1,2> sip2 (ir2[j], eltrans2, lh);
	  
	  det = fabs (sip1.GetJacobiDet() * sip2.GetJacobiDet());

	  const FlatVector<> & shapen1 = fen1.GetShape (sip1.IP(), lh);
	  const FlatVector<> & shapen2 = fen2.GetShape (sip2.IP(), lh);
	  const FlatVector<> & shaped1 = fed1.GetShape (sip1.IP(), lh);
	  const FlatVector<> & shaped2 = fed2.GetShape (sip2.IP(), lh);

	  for (k = 0; k < shaped1.Size(); k++)
	    shaped(k) = shaped1(k);
	  for (k = 0; k < shaped2.Size(); k++)
	    shaped(shaped1.Size()+k) = -shaped2(k);

	  double xy[3], n1[3], n2[3];
	  for (k = 0; k < 2; k++)
	    {
	      xy[k] = sip1.GetPoint()(k) - sip2.GetPoint()(k);
              cout << "S_LaplaceBEMElement :: AssembleElementMatrix : WARNING! Normal Vector not calculatet" << endl;
//	      n1[k] = eltrans1.NVMatrix()(k,0);
//	      n2[k] = eltrans2.NVMatrix()(k,0);
	    }

	  double e = Fundamental (xy);
	  double de = DFundamental (xy, n2);
	  double dde = DDFundamental (xy, n1, n2);

	  //	  (*testout) << "e = " << e << ", de = " << de << endl;

	  // single layer integral 
	  fac = e*det*sip1.IP().Weight()*sip2.IP().Weight();
	  for (k = 0; k < shapen1.Size(); k++)
	    for (l = 0; l < shapen2.Size(); l++)
	      elsingle(k,l) += fac * shapen1(k) * shapen2(l);

	  // double layer integral 
	  fac = de*det*sip1.IP().Weight()*sip2.IP().Weight();
	  for (k = 0; k < shapen1.Size(); k++)
	    for (l = 0; l < shaped.Size(); l++)
	      eldouble(k,l) += fac * shapen1(k) * shaped(l);

	  // hyper singular kernel
	  fac = 0.5*dde*det*sip1.IP().Weight()*sip2.IP().Weight();
	  for (k = 0; k < shaped.Size(); k++)
	    for (l = 0; l < shaped.Size(); l++)
	      elhyper(k,l) += fac * shaped(k) * shaped(l);
	}



    // assemble duality product
    if (eltrans1.GetElementNr() == eltrans2.GetElementNr())
      {
	int order = 2;

	int ndofd, ndofn;
	int i, j, k, l;
	double det, fac;
	
	ndofd = fed1.GetNDof ();
	ndofn = fen1.GetNDof ();

	const IntegrationRule & ir = 
	  GetIntegrationRules().SelectIntegrationRule (fed1.ElementType(), order);

	for (i = 0; i < ir.GetNIP(); i++)
	  {
	    void * heapp;
	    heapp = lh.GetPointer();
	    
	    SpecificIntegrationPoint<1,2> sip (ir[i], eltrans1, lh);
	    det = fabs (sip.GetJacobiDet());

	    FlatVector<double> shapen = fen1.GetShape (sip.IP(), lh);
	    FlatVector<double> shaped = fed1.GetShape (sip.IP(), lh);

	    double fac = det*sip.IP().Weight();
	    for (k = 0; k < shapen.Size(); k++)
	      for (l = 0; l < shaped.Size(); l++)
		elduality(k,l) += fac * shapen(k) * shaped(l);
	  }
      }
  }


  template <class SCAL>
  double S_LaplaceBEMElement<SCAL> :: 
  Fundamental (double * xy) const
  {
    double axy = sqrt (xy[0]*xy[0] + xy[1]*xy[1]);
    return -(log (axy) / (2*M_PI));
  }


  template <class SCAL>
  double S_LaplaceBEMElement<SCAL> :: 
  DFundamental (double * xy, double * n) const
  {
    double axy2 = xy[0]*xy[0] + xy[1]*xy[1];
    double nxy = xy[0] * n[0] + xy[1] * n[1];
    return nxy / axy2 / (2*M_PI);
  }

  template <class SCAL>
  double S_LaplaceBEMElement<SCAL> :: 
  DDFundamental (double * xy, double * n1, double * n2) const
  {
    double axy2 = xy[0]*xy[0] + xy[1]*xy[1];
    double n1xy = xy[0] * n1[0] + xy[1] * n1[1];
    double n2xy = xy[0] * n2[0] + xy[1] * n2[1];
    double n1n2 = n1[0] * n2[0] + n1[1] * n2[1];
    
    return (n1n2 / axy2 - n1xy * n2xy / (axy2*axy2)) / (2*M_PI);
  }


  template class S_BEMElement<double>;
  template class S_BEMElement<Complex>;

  template class S_LaplaceBEMElement<double>;
  template class S_LaplaceBEMElement<Complex>;








  const double EulerGamma = 0.57721566490153286060;
  
  double PolyGamma (int n)
  {
    double sum = 0;
    for (int k = 1; k <= n-1; k++)
      sum += 1.0 / k;
    sum -= EulerGamma;
    return sum;
  }
  
  double Fact (int n)
  {
    double f = 1;
    for (int k = 2; k <= n; k++)
      f *= k;
    return f;
  }

  #define STEPS 100

  double BesselOld (int n, double z)
  {
    double sum = 0;
    for (int k = 0; k < STEPS; k++)
      sum += pow (-1.0, k) / Fact(k) / Fact(k+n) * pow (z/2, n+2*k);
    return sum;
  }

  double Bessel (int n, double z)
  {
    double sum = 0;
    double fac = pow (z/2, n) / Fact(n);
    int k = 0;
    do
      {
	sum += fac;
	k++;
	fac *= (-1) * z*z / (4 * k * (k+n));
      }
    while (fabs (fac) > 1e-20);
    return sum;
  }
  

  double NeumannFktOld (int n, double z)
  {
    int k;
    double sum = 2 / M_PI * Bessel (n, z) * log (z/2.0);

    double sum1 = 0;
    for (k = 0; k <= n-1; k++)
      sum1 += Fact (n-k-1) / Fact(k) * pow (z/2, 2 * k - n);

    sum -= sum1 / M_PI;

    double sum2 = 0;
    for (k = 0; k <= STEPS; k++)
      sum2 += pow (-1.0, k) * (PolyGamma(k+1) + PolyGamma (k+n+1)) 
	/ Fact(k) / Fact(k+n) * pow (z/2, 2*k+n);

    sum -= sum2 / M_PI;

    return sum;
  }



  // Bessel funktion of second kind, known as Neumann Fkt, also Weber Fkt
  // Written as Y_n (z)
  double NeumannFktOld1 (int n, double z)
  {
    int k;
    if (z == 0) z = 1e-10;

    double sum = 2 / M_PI * Bessel (n, z) * log (z/2.0);

    double sum1 = 0;
    for (k = 0; k <= n-1; k++)
      sum1 += Fact (n-k-1) / Fact(k) * pow (z/2, 2 * k - n);

    sum -= sum1 / M_PI;

    double sum2 = 0;
    double fac = pow (z/2, n) / Fact(n);
    k = 0;
    do
      {
	sum2 += fac * (PolyGamma(k+1) + PolyGamma(k+n+1));
	k++;
	fac *= (-1) * z*z / (4 * k * (k+n));
      }
    while (fabs (fac) > 1e-20);

    sum -= sum2 / M_PI;

    return sum;
  }



  // Bessel funktion of second kind, known as Neumann Fkt, also Weber Fkt
  // Written as Y_n (z)
  double NeumannFkt (int n, double z)
  {
    int k;
    if (z == 0) z = 1e-10;

    double sum = 2 / M_PI * Bessel (n, z) * log (z/2.0);

    double sum1 = 0;
    for (k = 0; k <= n-1; k++)
      sum1 += Fact (n-k-1) / Fact(k) * pow (z/2, 2 * k - n);

    sum -= sum1 / M_PI;

    double sum2 = 0;
    double fac = pow (z/2, n) / Fact(n);
    k = 0;
    double polygamma_kp1 = PolyGamma(1);
    double polygamma_kpnp1 = PolyGamma(n+1);
    do
      {
	// sum2 += fac * (PolyGamma(k+1) + PolyGamma(k+n+1));
	sum2 += fac * (polygamma_kp1 + polygamma_kpnp1);

	k++;
	polygamma_kp1 += 1.0 / k;
	polygamma_kpnp1 += 1.0 / (k+n);
	fac *= (-1) * z*z / (4 * k * (k+n));
      }
    while (fabs (fac) > 1e-20);

    sum -= sum2 / M_PI;

    return sum;
  }



  Complex GreenReg (double k, double r)
  {
    if (k*r < 20)
      {
	Complex h0 = Complex (Bessel (0, k*r), 
			      NeumannFkt (0, k*r));
	return Complex(0,0.25) * h0 + log(r) / (2*M_PI);
      }
    else
      {
	double kr = k*r;
	double c1 = cos(kr+M_PI/4) / sqrt(kr) / 5;
	double c2 = cos(kr-M_PI/4) / sqrt(kr) / 5;
	return Complex (c1+log(r)/(2*M_PI), c2);
      }
  }
  
  Complex DGreenReg (double k, double r)
  {
    if (k*r < 20)
      {
	Complex h1 = Complex (Bessel (1, k*r) , 
			      NeumannFkt (1, k*r));
	return Complex(0,-0.25 * k) * h1 + 1.0 / r / (2*M_PI);
      }
    else
      {
	double kr = k*r;
	double c1 = k/5 * 
	  ( -sin(kr+M_PI/4) / sqrt(kr) + 0.5 * cos(kr+M_PI/4) * pow (kr, -1.5));
	double c2 = k/5 *
	  ( -sin(kr-M_PI/4) / sqrt(kr) + 0.5 * cos(kr-M_PI/4) * pow (kr, -1.5));
	return Complex (c1+1/r/(2*M_PI), c2);
      }
  }

  Complex DDGreenReg (double k, double r)
  {
    if (k*r < 20)
      {
	Complex h2 = Complex (Bessel (1, k*r) / (k*r) - Bessel (0, k*r),
			      NeumannFkt (1,k*r) / (k*r) - NeumannFkt (0, k*r));
	
	return Complex(0,0.25*k*k) * h2 - 1.0 / (r*r) / (2*M_PI);
      }
    else
      {
	double kr = k*r;
	double c1 = k*k/5 * 
	  ( -cos(kr+M_PI/4) / sqrt(kr) - sin(kr+M_PI/4) * pow(kr,-1.5)
	    -0.75 * cos(kr+M_PI/4) * pow (kr, -2.5));
	double c2 = k/5 *
	  ( -cos(kr-M_PI/4) / sqrt(kr) - sin(kr-M_PI/4) * pow(kr,-1.5)
	    -0.75 * cos(kr-M_PI/4) * pow (kr, -2.5));
	return Complex (c1-1/(r*r*2*M_PI), c2);
      }
  }




  Helmholtz_BEMElement ::
  Helmholtz_BEMElement (const FESpace & afes, 
			const FESpace & afes_neumann,
			double ak0)
    : S_BEMElement<Complex> (afes, afes_neumann), laplace(afes, afes_neumann)
  {
    k0 = ak0;
    cout << "create helmholtz" << endl;

    int i;
    clock_t start, end;
    double val = 0;

    /*
    start = clock();
    for (i = 0; i < 100000; i++)
      val += Bessel (0, 1);
    end = clock();
    cout << "Evaluating Bessel(0,1) takes " << double(end-start)/(CLOCKS_PER_SEC*1e5) << endl;

    start = clock();
    for (i = 0; i < 100000; i++)
      val += Bessel (0, 10);
    end = clock();
    cout << "Evaluating Bessel(0,10) takes " << double(end-start)/(CLOCKS_PER_SEC*1e5) << endl;

    start = clock();
    for (i = 0; i < 100000; i++)
      val += Bessel (0, 100);
    end = clock();
    cout << "Evaluating Bessel(0,100) takes " << double(end-start)/(CLOCKS_PER_SEC*1e5) << endl;


    start = clock();
    for (i = 0; i < 100000; i++)
      val += NeumannFkt (0, 1);
    end = clock();
    cout << "Evaluating NeumannFkt(0,1) takes " << double(end-start)/(CLOCKS_PER_SEC*1e5) << endl;

    start = clock();
    for (i = 0; i < 100000; i++)
      val += NeumannFkt (0, 10);
    end = clock();
    cout << "Evaluating NeumannFkt(0,10) takes " << double(end-start)/(CLOCKS_PER_SEC*1e5) << endl;

    start = clock();
    for (i = 0; i < 100000; i++)
      val += NeumannFkt (0, 100);
    end = clock();
    cout << "Evaluating NeumannFkt(0,100) takes " << double(end-start)/(CLOCKS_PER_SEC*1e5) << endl;





    cout << "val = " << val << endl;
*/








    ofstream out("helmholtz_green.dat");
    ofstream dout("helmholtz_dgreen.dat");
    ofstream ddout("helmholtz_ddgreen.dat");

    ofstream out1("bessel.dat");
    ofstream out2("bessel2.dat");
    double r;
    /*
    for (r = 0.00001; r < 10; r += 0.01)
      {
	//	out1 << r << " " << Bessel(0,r)  << " " << NeumannFkt(0,r) << endl;
	out1 << r << " " << Bessel(0, r) << " " << Bessel(1, r) << " " << Bessel(2, r) << endl;
	//	out1 << r << " " << NeumannFkt(0, r) << " " << NeumannFkt(1, r) << " " << NeumannFkt(2, r) << endl;
	//	out2 << r << " " << DNeumannFktFast(0, r) << " " << DNeumannFktFast(1, r) << " " << DNeumannFktFast(2, r) << endl;
      }
    */    
    for (r = 0.00001; r < 100; r += 0.1)
      {
	/*
	out << r << " "
	    << Bessel (2, r) << " "
	    << Bessel (1, r) / (r) - Bessel (0, r) << " "
	    << Bessel (2, r) - (Bessel (1, r) / (r) - Bessel (0, r)) << " "
	    << endl;
	*/
	/*
	Complex b (Bessel (0, r), NeumannFkt(0,r) );
	Complex e (cos (r), -sin(r));
	Complex prod = b * e;
	out << r << " " << b.real() << " " << b.imag() << " "
	    << e.real() << " " << e.imag() << " "
	    << prod.real() << " " << prod.imag() << endl;
	*/

	Complex g = GreenReg (1, r);
	g -= log(r) / (2*M_PI);

	/*
	double c1 = (cos(r)-sin(r)) / (sqrt(r)+1e-8) / (5 * sqrt(2.0));
	double c2 = (cos(r)+sin(r)) / (sqrt(r)+1e-8) / (5 * sqrt(2.0));
	*/
	double c1 = cos(r+M_PI/4) / (sqrt(r)+1e-8) / 5;
	double c2 = cos(r-M_PI/4) / (sqrt(r)+1e-8) / 5;

	double c3 = -cos(r) / (r*sqrt(r)+0.0001) / (50);
	double c4 = sin(r) / (r*sqrt(r)+0.0001) / (50);

	out << r << " "
	    << g.real() << " " << g.imag() << " ";
	out << c1 << " " << c2 << " "
	    << c3 << " " << c4 
	    << endl;
      }

    for (r = 0.00001; r < 100; r += 0.1)
      {
	Complex g = GreenReg (1, r);
	//	out << r << " " << g.real() << " " << g.imag() << endl;

	Complex dg = DGreenReg (1, r);
	dout << r << " " << dg.real() << " " << dg.imag() << endl;

	Complex ddg = DDGreenReg (1, r);
	ddout << r << " " << ddg.real() << " " << ddg.imag() << endl;
      }
    
  }

  void Helmholtz_BEMElement :: Update ()
  {
    BEMElement::Update();
    laplace.Update();
  }
  
  
  void Helmholtz_BEMElement ::
  AssembleIntegralOps (FlatMatrix<Complex> & singlelayer, 
		       FlatMatrix<Complex> & doublelayer, 
		       FlatMatrix<Complex> & hypersingular, 
		       FlatMatrix<Complex> & duality,
		       LocalHeap & lh) const
  {

    laplace.AssembleIntegralOps (singlelayer, doublelayer, 
				 hypersingular, duality, lh);


    const MeshAccess & ma = fes.GetMeshAccess();

    int i, j, k, l;

    ElementTransformation eltransi;
    ElementTransformation eltransj;
    ARRAY<int> dnumsdi, dnumsdj, dnumsni, dnumsnj;

    for (i = 0; i < ma.GetNSE(); i++)
      {
	cout << i << "/" << ma.GetNSE() << " " << flush;
	for (j = 0; j < ma.GetNSE(); j++)
	  {
	    if (ma.GetSElIndex(i) != 0 || ma.GetSElIndex(j) != 0)
	      continue;
	    
	    lh.CleanUp();
	    
	    ma.GetSurfaceElementTransformation (i, eltransi, lh);
	    fes.GetSDofNrs (i, dnumsdi);
	    fes_neumann.GetSDofNrs (i, dnumsni);
	    
	    ma.GetSurfaceElementTransformation (j, eltransj, lh);
	    fes.GetSDofNrs (j, dnumsdj);
	    fes_neumann.GetSDofNrs (j, dnumsnj);
	    
	    for (k = 0; k < dnumsdi.Size(); k++)
	      dnumsdi[k] = fem2bem[dnumsdi[k]];
	    for (k = 0; k < dnumsdj.Size(); k++)
	      dnumsdj[k] = fem2bem[dnumsdj[k]];
	    

	    const NodalFiniteElement & fedi = 
	      dynamic_cast<const NodalFiniteElement&> (fes.GetSFE(i, lh));
	    const NodalFiniteElement & fedj = 
	      dynamic_cast<const NodalFiniteElement&> (fes.GetSFE(j, lh));
	    const NodalFiniteElement & feni = 
	      dynamic_cast<const NodalFiniteElement&> (fes_neumann.GetSFE(i, lh));
	    const NodalFiniteElement & fenj = 
	      dynamic_cast<const NodalFiniteElement&> (fes_neumann.GetSFE(j, lh));
	    
	    FlatMatrix<Complex> elsingle, eldouble, elduality, elhyper;

	  
	    AssembleElementMatrix (fedi, fedj, feni, fenj, eltransi, eltransj, 
				   elsingle, eldouble, elduality, elhyper,
				   lh);

	    (*testout) << "elsingle = " << elsingle << endl;
	    (*testout) << "eldouble = " << eldouble << endl;
	    (*testout) << "elhyper = " << elhyper << endl;
	    (*testout) << "eldual = " << elduality << endl;
	    
	    for (k = 0; k < dnumsni.Size(); k++)
	      for (l = 0; l < dnumsnj.Size(); l++)
		if (dnumsni[k] != -1 && dnumsnj[l] != -1)
		  singlelayer(dnumsni[k], dnumsnj[l]) += elsingle(k,l);
	    
	    for (k = 0; k < dnumsni.Size(); k++)
	      for (l = 0; l < dnumsdj.Size(); l++)
		if (dnumsni[k] != -1 && dnumsdj[l] != -1)
		  doublelayer(dnumsni[k], dnumsdj[l]) += eldouble(k,l);

	    for (k = 0; k < dnumsdi.Size(); k++)
	      for (l = 0; l < dnumsdj.Size(); l++)
		if (dnumsdi[k] != -1 && dnumsdj[l] != -1)
		  hypersingular(dnumsdi[k], dnumsdj[l]) += elhyper(k,l);
	  }
      }
    (*testout) << "single = " << singlelayer << endl
	       << "double = " << doublelayer << endl
	       << "hyper = " << hypersingular << endl;
  }
  
  void Helmholtz_BEMElement ::
  AssembleElementMatrix (const NodalFiniteElement & fed1, 
			 const NodalFiniteElement & fed2, 
			 const NodalFiniteElement & fen1, 
			 const NodalFiniteElement & fen2, 
			 const ElementTransformation & eltrans1, 
			 const ElementTransformation & eltrans2, 
			 FlatMatrix<Complex> & elsingle, 
			 FlatMatrix<Complex> & eldouble, 
			 FlatMatrix<Complex> & elduality, 
			 FlatMatrix<Complex> & elhyper, 
			 LocalHeap & lh) const
  {
    //    (*testout) << "call element matrix" << endl;
    int order1, order2;

    if (eltrans1.GetElementNr() == eltrans2.GetElementNr())
      {
	order1 = 17;
	order2 = 19;
      }
    else
      {
	order1 = order2 = 9;
      }

    int ndofd1, ndofd2, ndofn1, ndofn2;
    int i, j, k, l;
    double det;
    Complex fac;

    ndofd1 = fed1.GetNDof ();
    ndofd2 = fed2.GetNDof ();
    ndofn1 = fen1.GetNDof ();
    ndofn2 = fen2.GetNDof ();


    elsingle.AssignMemory (ndofn1, ndofn2, lh);
    eldouble.AssignMemory (ndofn1, ndofd2, lh);
    elduality.AssignMemory (ndofn1, ndofd2, lh);
    elhyper.AssignMemory (ndofd1, ndofd2, lh);

    elsingle = 0.0;
    eldouble = 0.0;
    elduality = 0.0;
    elhyper = 0.0;

    const IntegrationRule & ir1 = 
      GetIntegrationRules().SelectIntegrationRule (fed1.ElementType(), order1);
    const IntegrationRule & ir2 = 
      GetIntegrationRules().SelectIntegrationRule (fed2.ElementType(), order2);

    
    for (i = 0; i < ir1.GetNIP(); i++)
      for (j = 0; j < ir2.GetNIP(); j++)
	{
	  void * heapp;
	  heapp = lh.GetPointer();
	  
	  SpecificIntegrationPoint<1,2> sip1 (ir1[i], eltrans1, lh);
	  SpecificIntegrationPoint<1,2> sip2 (ir2[j], eltrans2, lh);
	  
	  det = fabs (sip1.GetJacobiDet() * sip2.GetJacobiDet());

	  const ngbla::FlatVector<> & shapen1 = fen1.GetShape (sip1.IP(), lh);
	  const ngbla::FlatVector<> & shapen2 = fen2.GetShape (sip2.IP(), lh);
	  const ngbla::FlatVector<> & shaped1 = fed1.GetShape (sip1.IP(), lh);
	  const ngbla::FlatVector<> & shaped2 = fed2.GetShape (sip2.IP(), lh);

	  double xy[3], n1[3], n2[3];
	  for (k = 0; k < 2; k++)
	    {
	      xy[k] = sip1.GetPoint()(k) - sip2.GetPoint()(k);
              cout << "Helmholtz_BEMElement :: AssembleElementMatrix : WARNING! Normal Vector not calculatet" << endl;
//	      n1[k] = eltrans1.NVMatrix()(k,0);
//	      n2[k] = eltrans2.NVMatrix()(k,0);
	    }

	  Complex e = Fundamental (xy);
	  Complex de = DFundamental (xy, n2);
	  Complex dde = DDFundamental (xy, n1, n2);

	  //	  (*testout) << "e = " << e << ", de = " << de << endl;

	  // single layer integral 
	  fac = e*det*sip1.IP().Weight()*sip2.IP().Weight();
	  for (k = 0; k < shapen1.Size(); k++)
	    for (l = 0; l < shapen2.Size(); l++)
	      elsingle(k,l) += fac * shapen1(k) * shapen2(l);

	  // double layer integral 
	  fac = de*det*sip1.IP().Weight()*sip2.IP().Weight();
	  for (k = 0; k < shapen1.Size(); k++)
	    for (l = 0; l < shaped2.Size(); l++)
	      eldouble(k,l) += fac * shapen1(k) * shaped2(l);

	  // hyper singular kernel
	  fac = dde*det*sip1.IP().Weight()*sip2.IP().Weight();
	  for (k = 0; k < shaped1.Size(); k++)
	    for (l = 0; l < shaped2.Size(); l++)
	      elhyper(k,l) += fac * shaped1(k) * shaped2(l);
	}
  }




  Complex Helmholtz_BEMElement :: 
  Fundamental (double * xy) const
  {
    double axy = sqrt (xy[0]*xy[0] + xy[1]*xy[1]);
    return GreenReg (k0, axy);
  }



  Complex Helmholtz_BEMElement :: 
  DFundamental (double * xy, double * n) const
  {
    double axy = sqrt (xy[0]*xy[0] + xy[1]*xy[1]);
    double nxy = xy[0] * n[0] + xy[1] * n[1];
    return DGreenReg (k0, axy) * nxy / axy;
  }

  Complex Helmholtz_BEMElement :: 
  DDFundamental (double * xy, double * n1, double * n2) const
  {
    double axy = sqrt (xy[0]*xy[0] + xy[1]*xy[1]);
    double n1xy = xy[0] * n1[0] + xy[1] * n1[1];
    double n2xy = xy[0] * n2[0] + xy[1] * n2[1];
    double n1n2 = n1[0] * n2[0] + n1[1] * n2[1];
    
    Complex dg = DGreenReg (k0, axy);
    Complex ddg = DDGreenReg (k0, axy);

    return n1xy * n2xy / (axy*axy) * ddg + (n1n2 * axy * axy - n1xy * n2xy ) / (axy * axy * axy) * dg;
  }


#ifdef none
  void Helmholtz_BEMElement::
  AssembleElementMatrix (const NodalFiniteElement & fed1, 
			 const NodalFiniteElement & fed2, 
			 const NodalFiniteElement & fen1, 
			 const NodalFiniteElement & fen2, 
			 const ElementTransformation & eltrans1, 
			 const ElementTransformation & eltrans2, 
			 FlatMatrix<TSCAL> & elsingle, 
			 FlatMatrix<TSCAL> & eldouble, 
			 FlatMatrix<TSCAL> & elduality, 
			 FlatMatrix<TSCAL> & elhyper, 
			 LocalHeap & lh) const
  {

    Vec<2> a1, b1, a2, b2, nv1, nv2;
    double l1, l2, alpha;
    int i, j, k, l;

    for (i = 0; i < 2; i++)
      {
        cout << "Helmholtz_BEMElement :: AssembleElementMatrix : WARNING! Normal Vector not calculatet" << endl;
        
	a1(i) = eltrans1.PointMatrix()(i, 0);
	b1(i) = eltrans1.PointMatrix()(i, 1);
//	nv1(i) = eltrans1.NVMatrix()(i,0);

	a2(i) = eltrans2.PointMatrix()(i, 0);
	b2(i) = eltrans2.PointMatrix()(i, 1);
//	nv2(i) = eltrans2.NVMatrix()(i,0);
      }

    nv1 /= L2Norm (nv1);
    nv2 /= L2Norm (nv2);

    Vec<2> ab1 = b1 - a1;
    Vec<2> ab2 = b2 - a2;
    
    l1 = L2Norm (ab1);
    l2 = L2Norm (ab2);
    alpha = l2/l1;
    
    int singular_case;
    if (L2Norm (Vec<2> (a2-b1)) < 1e-12)
      {
	if (InnerProduct (nv1, nv2) > 0.999)
	  singular_case = 2;
	else
	  singular_case = 4;
      }
    else if (L2Norm (Vec<2> (a1-b2)) < 1e-12)
      {
	if (InnerProduct (nv1, nv2) > 0.999)
	  singular_case = 3;
	else
	  singular_case = 5;
      }
    else if (L2Norm (Vec<2> (a1-a2)) < 1e-12 &&
	     L2Norm (Vec<2> (b1-b2)) < 1e-12)
      {
	singular_case = 6;
      }
    else
      singular_case = 1;

    /*
    cout << "el1: " << a1 << ", " << b1 << endl;
    cout << "el2: " << a2 << ", " << b2 << endl;
    cout << "case : " << singular_case << endl;
    */


    const IntegrationRule & ir1 = 
      GetIntegrationRules().SelectIntegrationRule (fed1.ElementType(), 3);
    const IntegrationRule & ir2 = 
      GetIntegrationRules().SelectIntegrationRule (fed2.ElementType(), 5);


    /*
    cout << "DGreen(1,1) = " << DGreenReg (3,2) << " =?= " 
	 << DGreenReg_alt (3,2) 
	 << " =?= " << (GreenReg(3,2.001) - GreenReg (3,2))*1000.0
	 << endl;

    double x = 1.0;
    cout << "DDGreen(1,1) = " << DDGreenReg (k,x) << " =?= " 
	 << DDGreenReg_alt (k,x)
	 << " = " << (GreenReg(k,x+1e-3) - 2.0 * GreenReg (k,x) + GreenReg(k,x-1e-3) )*1e6
	 << endl;
    exit (1);
    */


    for (i = 0; i < ir1.GetNIP(); i++)
      for (j = 0; j < ir2.GetNIP(); j++)
	{
	  void * heapp;
	  heapp = lh.GetPointer();
	  
	  SpecificIntegrationPoint<1,2> sip1 (ir1[i], eltrans1, lh);
	  SpecificIntegrationPoint<1,2> sip2 (ir2[j], eltrans2, lh);
	  
	  double det = fabs (sip1.GetJacobiDet() * sip2.GetJacobiDet());

	  const ngbla::FlatVector<> & shaped1 = fed1.GetShape (sip1.IP(), lh);
	  const ngbla::FlatVector<> & shaped2 = fed2.GetShape (sip2.IP(), lh);


	  Vec<2> rvec = sip2.GetPoint() - sip1.GetPoint();
	  double r = L2Norm (rvec);
	  
	  // regular part of single-layer potential
	  double fac = det*sip1.IP().Weight()*sip2.IP().Weight();

	  elsingle(0,0) += fac * GreenReg (k0, r);

	  
	  double rnv1 = InnerProduct (rvec, nv1);
	  double rnv2 = InnerProduct (rvec, nv2);
	 
	  for (k = 0; k < shaped2.Size(); k++)
	    eldouble(0, k) += fac * DGreenReg (k0, r) * rnv2/r * shaped2(k);

	  for (k = 0; k < shaped1.Size(); k++)
	    for (l = 0; l < shaped2.Size(); l++)
	      elhyper(l, k) += fac * DDGreenReg (k0, r) * rnv1 * rnv2 / (r*r) 
		* shaped1(l) * shaped2(k);
	}


    double A = 0.5/M_PI;
    double d = 1.0;
    double a = alpha;
    double L = l1;
    double Vanal;
    Vec<2> Kanal;
    Mat<2,2> Danal;

    Vanal = 0;
    Kanal = 0.0;
    Danal = 0.0;
    
    switch (singular_case)
      {
      case 1:
	for (int i = 0; i < ir1.GetNIP(); i++)
	  for (int j = 0; j < ir2.GetNIP(); j++)
	    {
	      void * heapp;
	      heapp = lh.GetPointer();
	      
	      SpecificIntegrationPoint<1,2> sip1 (ir1[i], eltrans1, lh);
	      SpecificIntegrationPoint<1,2> sip2 (ir2[j], eltrans2, lh);

	      const ngbla::FlatVector<> & shaped1 = fed1.GetShape (sip1.IP(), lh);
	      const ngbla::FlatVector<> & shaped2 = fed2.GetShape (sip2.IP(), lh);
	  
	      double det = fabs (sip1.GetJacobiDet() * sip2.GetJacobiDet());

	      Vec<2> rvec = sip2.GetPoint() - sip1.GetPoint();
	      double r = L2Norm (rvec);
	  
	      double fac = det*sip1.IP().Weight()*sip2.IP().Weight();
	      
	      Vanal += fac * (-A) * log (r);
	      
	      double rnv1 = InnerProduct (rvec, nv1);
	      double rnv2 = InnerProduct (rvec, nv2);
	 
	      for (k = 0; k < shaped2.Size(); k++)
		Kanal(k) += fac * (-A) / r * rnv2/r * shaped2(k);

	      for (k = 0; k < shaped1.Size(); k++)
		for (l = 0; l < shaped2.Size(); l++)
		  Danal(l, k) += fac * A / (r*r) * rnv1 * rnv2 / (r*r) 
		    * shaped1(l) * shaped2(k);
	    }
	break;
	
      case 2: 
	{
	  Vanal = (A*L*L*(2*a*a*log(a) - 2*((1 + a*a)*log(1 + a) + a*(-3 + log(sqr(1 + a)*L*L)))))/2;

	  Danal(0,0) = -((A*(-a + a*a*log(a) - (-1 + a*a)*log(1 + a)))/a);
	  Danal(0,1) = -((A*(a + a*a*log(a) - (1 + a*a)*log(1 + a)))/a);
	  Danal(1,0) = (A*(-a + a*(2 + a)*log(a) - sqr(1 + a)*log(1 + a)))/a - 2*A*log(d/L);
	  Danal(1,1) = (A*(a + a*a*log(a) - (-1 + a*a)*log(1 + a)))/a; 

	  break;
	}
      case 3: 
	{
	  Vanal = (A*L*L*(6*a + 2*a*a*log(a) - 2*sqr(1 + a)*log(1 + a) - 4*a*log(L)))/2;

	  Danal(1,1)=(A*(a + a*a*log(a) - (-1 + a*a)*log(1 + a)))/a;
	  Danal(1,2)=(A*(-a + a*(2 + a)*log(a) - sqr(1 + a)*log(1 + a)))/a - 2*A*log(d/L);
	  Danal(2,1)=-((A*(a + a*a*log(a) - (1 + a*a)*log(1 + a)))/a);
	  Danal(2,2)=-((A*(-a + a*a*log(a) - (-1 + a*a)*log(1 + a)))/a);

	  break;
	}
      case 4:
	{
	  Vanal = -(A*L*L*(-6*a + M_PI + a*a*M_PI - 2*atan(1/a) - 2*a*a*atan(a) + 2*a*log(1 + a*a) + 4*a*log(L)))/2;

	  Kanal(0)=(A*L*(-4*a*atan(a) + 2*a*a*log(a) - (-1 + a*a)*log(1 + a*a)))/(2.0*a); 
	  Kanal(1)=(A*L*(2*a*a*log(a) - (1 + a*a)*log(1 + a*a)))/(2.0*a);


	  Danal(0,0)=(A*(2*a - M_PI + a*a*M_PI + 2*atan(1/a) - 2*a*a*atan(a)))/(2.*a);
	  Danal(0,1)=(A*(-2*a + M_PI + a*a*M_PI - 2*atan(1/a) - 2*a*a*atan(a)))/(2.*a);
	  Danal(1,0)=-(A*(2*a + M_PI + a*a*M_PI - 2*atan(1/a) - 2*a*a*atan(a) - 4*a*log(a) + 2*a*log(1 + a*a)))/(2.*a) - 2*A*log(d/L);
	  Danal(1,1)=-(A*(-2*a - M_PI + a*a*M_PI + 2*atan(1/a) - 2*a*a*atan(a)))/(2.*a);


	  break;
	}
      case 5:
	{
	  Vanal    = -(A*L*L*(-2*(-1 + a*a)*atan(a) + a*(-6 + a*M_PI + 2*log(1 + a*a) + 4*log(L))))/2;
	  Kanal(0) = (A*L*(2*a*a*log(a) - (1 + a*a)*log(1 + a*a)))/(2.0*a); 
	  Kanal(1) = (A*L*(-4*a*atan(a) + 2*a*a*log(a) - (-1 + a*a)*log(1 + a*a)))/(2.0*a);

	  Danal(0,0)=-(A*(a*(-2 + a*M_PI) - 2*(1 + a*a)*atan(a)))/(2.*a);
	  Danal(0,1)=-(A*(-2*(-1 + a*a)*atan(a) + a*(2 + a*M_PI - 4*log(a) + 2*log(1 + a*a))))/(2.*a) - 2*A*log(d/L);
	  Danal(1,0)=(A*(a*(-2 + a*M_PI) - 2*(-1 + a*a)*atan(a)))/(2.*a);
	  Danal(1,1)=(A*(a*(2 + a*M_PI) - 2*(1 + a*a)*atan(a)))/(2.*a);

	  break;
	}
      case 6:
	{
	  Vanal = -(A*L*L*(-3 + 2*log(L)));
	  Kanal(0) = Kanal(1) = L / 2;

	  Danal(1,1)=A + 2*A*log(d/L);
	  Danal(1,2)=-(A);
	  Danal(2,1)=-(A);
	  Danal(2,2)=A + 2*A*log(d/L);
	  break;
	}
      }
    elsingle(0,0) += Vanal;



  }


#endif
  
}
