!{\src2tex{textfont=tt}}
!!****f* ABINIT/outkss
!! NAME
!! outkss
!!
!! FUNCTION
!! This routine creates an output file containing the Kohn-Sham
!! electronic Structure for a large number of eigenstates
!! (energies and eigen-functions).
!! The resulting file (_KSS) is needed for a GW post-treatment.
!!
!! The routine drives the following operations:
!!  - Re-ordering G-vectors according to stars (sets of Gs with
!!    the same magnitude). A set of g for all k-points is created.
!!  - Creating and opening the output '_KSS' file
!!  - Printing out output file header information...
!! ... and, for each k-point:
!!    According to 'kssform', either
!!      - Re-computing <G|H|G_prim> matrix elements for all (G, G_prim).
!!        Diagonalizing H in the plane-wave basis.
!!   or - Taking eigenvalues/vectors from congugate-gradient ones.
!!  - Writing out eigenvalues and eigenvectors.
!!
!! COPYRIGHT
!! Copyright (C) 2000-2007 ABINIT group (MT, VO, AR, MG)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt.
!!
!! INPUTS
!!  atindx(natom)=index table for atoms (see scfcv.f)
!!  atindx1(natom)=index table for atoms, inverse of atindx (see scfcv.f)
!!  cg(2,mpw*nspinor*mband*mkmem*nsppol)=planewave coefficients of wavefunctions.
!!  dtfil <type(datafiles_type)>=variables related to files
!!  dtset <type(dataset_type)>=all input variables for this dataset
!!  ecut=cut-off energy for plane wave basis sphere (Ha)
!!  eigen(mband*nkpt*nsppol)=array for holding eigenvalues (hartree)
!!  gmet(3,3)=reciprocal space metric tensor in bohr**-2.
!!  gprimd(3,3)=dimensional reciprocal space primitive translations
!!  hdr <type(hdr_type)>=the header of wf, den and pot files
!!  kg(3,mpw*mkmem)=reduced planewave coordinates.
!!  kssform=govern the Kohn-Sham Structure file format
!!  mband=maximum number of bands
!!  mgfft=maximum size of 1D FFTs
!!  mkmem =number of k points which can fit in memory; set to 0 if use disk
!!  mpi_enreg=informations about MPI parallelization
!!  mpsang= 1+maximum angular momentum for nonlocal pseudopotentials
!!  mpw=maximum dimensioned size of npw.
!!  natom=number of atoms in cell.
!!  nattyp(ntypat)= # atoms of each type.
!!  nfft=(effective) number of FFT grid points (for this processor)
!!  nkpt=number of k points.
!!  npwarr(nkpt)=number of planewaves in basis at this k point
!!  nspinor=number of spinorial components of the wavefunctions
!!  nsppol=1 for unpolarized, 2 for spin-polarized
!!  nsym=number of symmetries in space group
!!  ntypat=number of types of atoms in unit cell.
!!  occ(mband*nkpt*nsppol)=occupation number for each band (usually 2) for each k.
!!  ph1d(2,3*(2*mgfft+1)*natom)=one-dimensional structure factor information
!!  prtvol=control print volume and debugging output
!!  psps <type(pseudopotential_type)>=variables related to pseudopotentials
!!  rmet(3,3)=real space metric (bohr**2)
!!  rprimd(3,3)=dimensional primitive translations for real space (bohr)
!!  ucvol=unit cell volume (bohr**3)
!!  wffnow=information about wf disk file
!!  vtrial(nfft,nsppol)=the trial potential
!!  xred(3,natom)=reduced dimensionless atomic coordinates
!!  ylm(mpw*mkmem,mpsang*mpsang*useylm)=real spherical harmonics for each G and k point
!!
!! OUTPUT
!!  (only writing)
!!
!! NOTES
!! * The routine can be time consuming (in particular when computing
!!   <G|H|G_prim> elements for all (G, G_prim)) (kssform=1).
!!   So, it is recommended to call it once per run...
!!
!! * The routine RE-compute all Hamiltonian terms.
!!   So it is equivalent to an additional electronic SC cycle.
!!   (This has no effect is convergence was reach... If not,
!!    eigenvalues/vectors may differs from the congugaste gradient ones)
!!
!! * The routine does not work with nspinor=2 or mpspso=2 (mpssoang>mpsang).
!!
!! * The routine works only for norm-conserving pseudopotentials (no PAW).
!!
!! * The routine gives correct _KSS files, only with one projector in each
!!   angular momentum channel.
!!
!! * The routine gives strange file formats if ((kssform==0 or kssform==1) and
!!   real(dp) means single precision). It is needed to correct all the write
!!   in such a way to force however real(dp) on the output file.
!!
!! * There exists two file formats:
!!    kssform==1 diagonalized file _KSS in real(dp) is generated.
!!    kssform==3 same as kssform=1 but the wavefunctions are not diagonalized
!!               (they are taken from congugate-gradient ones)
!!    Old kssform=0 and kssform=2 are obsolete - no longer available
!!    The reccomended form is 1.
!!
!! * Here are used mpsang and psps%mpsang??? which ones?
!!   We need 1+maximum angular momentum for nonlocal pseudopotentials
!!   to dimensionate variables and write correctly the files.
!!
!! PARENTS
!!      outscfcv
!!
!! CHILDREN
!!      abi_etsf_geo_put,abi_etsf_init,chpev,chpevx,clsopn,dsksta,dtsetcopy
!!      dtsetfree,etsf_io_gwdata_put,etsf_io_low_close,etsf_io_low_error_to_str
!!      etsf_io_low_open_modify,fftpac,fourwf,hdr_io,hdr_io_etsf,hdr_skip
!!      leave_new,leave_test,memkss,mkffnl,mkkin,mpi_barrier,nonlop,ph1d3d
!!      rdnpw,rwwf,sort_dp,sphereboundary,timab,writewf,wrtout,xcomm_init
!!      xexch_mpi,xsum_mpi,zhpev,zhpevx
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

subroutine outkss(atindx,atindx1,dtfil,dtset,ecut,gmet,gprimd,hdr,kg,&
&                  kssform,mband,mgfft,mkmem,mpi_enreg,mpsang,mpw,natom,&
&                  nattyp,nfft,nkpt,npwarr,nspinor,nsppol,nsym,ntypat,occ,&
&                  ph1d,prtvol,psps,rmet,rprimd,ucvol,wffnow,vtrial,xred,ylm,cg,eigen)

 use defs_basis
 use defs_datatypes
#if defined HAVE_ETSF_IO
 use etsf_io
#endif

!This section has been created automatically by the script Abilint (TD). Do not modify these by hand.
#ifdef HAVE_FORTRAN_INTERFACES
 use interfaces_01manage_mpi
 use interfaces_11util
 use interfaces_12ffts
 use interfaces_13io_mpi
 use interfaces_13ionetcdf
 use interfaces_13nonlocal
 use interfaces_13recipspace
 use interfaces_14iowfdenpot
 use interfaces_15common
 use interfaces_lib01hidempi
#else
 use defs_interfaces
 use defs_xexchmpi
 use defs_xfuncmpi
#endif
!End of the abilint section

 implicit none

#if defined MPI
          include 'mpif.h'
#endif
!Arguments ------------------------------------
 integer, intent(in) :: kssform,mband,mgfft,mkmem,mpsang,mpw,natom
 integer, intent(in) :: nfft,nkpt,nsppol,nsym,ntypat,prtvol
 integer, intent(inout) :: nspinor
 real(dp), intent(in) :: ecut,ucvol
 type(MPI_type), intent(inout) :: mpi_enreg
 type(datafiles_type), intent(in) :: dtfil
 type(dataset_type), intent(in) :: dtset
 type(hdr_type), intent(inout) :: hdr
 type(pseudopotential_type), intent(in) :: psps
 type(wffile_type), intent(inout) :: wffnow
 integer, intent(in) :: atindx(natom),atindx1(natom)
 integer :: kg(3,mpw*mkmem),nattyp(ntypat)
 integer, intent(in), target :: npwarr(nkpt)
 real(dp), intent(in) :: gmet(3,3),gprimd(3,3),occ(mband*nkpt*nsppol)
 real(dp), intent(in) :: ph1d(2,3*(2*mgfft+1)*natom),rmet(3,3),rprimd(3,3)
 real(dp), intent(inout) :: vtrial(nfft,nsppol)
 real(dp), intent(in) :: xred(3,natom),ylm(mpw*mkmem,mpsang*mpsang*psps%useylm)
 real(dp), intent(in) :: cg(2,mpw*nspinor*mband*mkmem*nsppol),eigen(mband*nkpt*nsppol)

!Local variables-------------------------------
 integer,parameter :: unitkss=75
 integer :: bdtot_index,choice,cplex,cpopt,dimffnl,fform,i,ia,iatom,ib,ibp,idir,ier
 integer :: ierr,ig,igp,igspinor,ii,iinv,ikg,ikpt,il,il0,ilim,ilm,ilmn,in,index
 integer :: inp,ipw,is,is1,is2,isdiscarded,isretained,ish,ishm,ispinor,ispinorp,isppol,istwf_k,j,jj,k
 integer :: k_index,kk,matblk,maxpw,mcg_disk,mproj,n1,n2,n3,n4,n5,n6,nband_k
 integer :: nbandkss_k,nbandksseff,nbase,negv,ngdiago,nghg,nkpg,nnlout,npw_k,npwkss
 integer :: nrst1,nrst2,nsym2,ntemp,option,paw_opt,pinv,rdwr,signs,sizepw,spaceComm
 integer :: tim_fourwf,tim_nonlop,tim_rwwf,unwfnow
 real(dp) :: arg,cinf=1.0e24,csup=0.0_dp,dtnons,dum,einf=1.0e24,eps,esup=0.0_dp
 real(dp) :: norm,weight
 complex(dp) :: cdum
 logical :: diago,found,prtdebug
 character(len=fnlen) :: filegw,filekss,filevkb
 character(len=500) :: message
 character(len=80) :: title
 character(len=10) :: stag(2)=(/'SPIN UP:  ','SPIN DOWN:'/)
 integer, allocatable, target :: vkbsign_int(:,:)
 integer :: flag(16),gbas(3),gcur(3),geq(3),nbandkssk(nkpt),symrel2(3,3,nsym)
 real(dp) :: nonlop_dum(1,1),xcart(3,natom)
 integer,allocatable :: gbase(:,:),gbasek(:,:,:),gbig(:,:),gbound(:,:)
 integer,allocatable :: gcurr(:,:),gshell(:,:),ifail(:),insort(:),iwork(:),gtmp(:,:)
 integer,allocatable :: kg_dum(:,:),nbasek(:),nshell(:),shlim(:)
 integer, allocatable, target :: kg_k(:,:)
 integer,allocatable :: trsl(:)
 real(dp) :: enlout(1),gcar(3),kpoint(3),tsec(2),ylmgr_dum(1)
 real(dp), target :: tnons2(3,nsym)
 real(dp),allocatable :: cg_disk(:,:),cnorm(:),cnormk(:,:),cwork(:,:),ctmp(:)
 real(dp),allocatable :: eig_dum(:),eigval(:),eigvec(:,:,:),en(:),ffnl(:,:,:,:)
 real(dp),allocatable :: ghg(:,:),gvnlg(:,:),kinpw(:),kpg_dum(:,:),modkplusg(:),oc(:),occ_dum(:)
 real(dp),allocatable :: occ_k(:),ph3d(:,:,:),phkxred(:,:),pwave(:,:),pwave_so(:,:)
 real(dp),allocatable :: rwork(:),subghg(:,:),subghg_so(:,:),vkb(:,:,:),vkbd(:,:,:)
 real(dp), allocatable, target :: vkb_tgt(:, :, :), vkbd_tgt(:, :, :)
 real(dp),allocatable :: vkbsign(:,:)
 real(dp), allocatable :: vlocal(:,:,:),work(:,:,:,:),ylm_k(:,:)
 real(dp),allocatable :: wfg(:,:,:)
 type(cprj_type) :: cprj_dum(1)
#if defined MPI
 !Variables introduced for MPI version
 integer :: bufnb=20,bufrt,bufsz
#endif
#if defined HAVE_ETSF_IO
 type(etsf_dims) :: dims
 logical :: lstat
 type(etsf_io_low_error) :: error
 character(len=1024) :: errmess
 type(dataset_type) :: dtset_cpy
 type(etsf_gwdata) :: gwdata
! type(etsf_wavedata) :: wavedata
 type(wffile_type) :: wff
 integer :: ncid
#endif

! *********************************************************************

!DEBUG
!write(6,*) 'outkss: enter'
!ENDDEBUG

#ifdef VMS
!DEC$ ATTRIBUTES ALIAS:'ZHPEV' :: zhpev
!DEC$ ATTRIBUTES ALIAS:'ZHPEVX' :: zhpevx
#endif

 call timab(234,1,tsec)

 prtdebug=.true.
 if(prtvol>=10)prtdebug=.true.

!A-Write preliminary message:
!-----------------------------
 if(kssform==3) then
  write(message,'(a,70("="),a,a,a,a)') ch10,ch10, &
& ' Calculating and writing out Kohn-Sham electronic Structure file',ch10, &
& ' Using conjugate gradient wavefunctions and energies (kssform=3)'
 else
  write(message,'(a,70("="),a,a,a,a,i1,a)') ch10,ch10, &
& ' Calculating and writing out Kohn-Sham electronic Structure file',ch10, &
& ' Using diagonalized wavefunctions and energies (kssform=',kssform,')'
 end if
 call wrtout(6,message,'COLL')
 call wrtout(ab_out,message,'COLL')

!Init spaceComm
 call xcomm_init(mpi_enreg,spaceComm)

!B-Perform some tests:
!---------------------
!Check whether nband is a constant for all k point:
!MG060903 nband(nkpt*nsppol) ==> added external loop on spin
 if (dtset%occopt>=2 .and. dtset%occopt<=7) then
  do isppol=1,nsppol
   do ikpt=1,nkpt
    if(dtset%nband(ikpt+(isppol-1)*nkpt)/=dtset%nband(1))then
     write(message, '(6a,i4,a,i3,a,i4,3a)' ) ch10,&
&     ' outkss: ERROR -',ch10,&
&     '  The number of bands must be the same for all k-points ',&
&     ch10,'  but nband(1)=',dtset%nband(1),' is different of nband(',&
&     ikpt+(isppol-1)*nkpt,')=',dtset%nband(ikpt+(isppol-1)*nkpt),'.',ch10,&
&     '  Program does not stop but _KSS file will not be created...'
     call wrtout(6,message,'COLL')
     return
    end if
   end do
  end do
 end if
!END MG
!Check istwfk (must be 1):
 do ikpt=1,nkpt
  if(dtset%istwfk(ikpt)/=1) then
   write(message,'(10a)') ch10,&
&   ' outkss: ERROR -',ch10,&
&   '  nbandkss<>0 and istwfk<>1 not allowed:',ch10,&
&   '  States output not programmed for time-reversal symmetry.',ch10,&
&   '  Action : change istwfk in input file (put it to 1 for all kpt).',ch10,&
&   '  Program does not stop but _KSS file will not be created...'
   call wrtout(6,message,'COLL')
   return
  end if
 end do
!Check nspinor:
 if (nspinor>1) then
! NOTE : at least the use of fftpac should be modified to cope with nspinor==2
  write(message, '(6a)' ) ch10,&
&    ' outkss: ERROR -',ch10,&
&    '  Variable nspinor should be 1 !!',ch10,&
&    '  Program does not stop but _KSS file will not be created...'
  call wrtout(6,message,'COLL')
  return
 end if
!Check nsppol:
!MG060907 now nsppol=2 is admitted
! if (nsppol>1) then
!  write(message, '(6a)' ) ch10,&
!&    ' outkss: ERROR -',ch10,&
!&    '  Variable nsppol should be 1 !!',ch10,&
!&    '  Program does not stop but _KSS file will not be created...'
!  call wrtout(6,message,'COLL')
!  return
! end if
!Check mpspso:
 if (psps%mpssoang/=mpsang) then
  write(message, '(6a)' ) ch10,&
&    ' outkss: ERROR -',ch10,&
&    '  Variable mpspso should be 1 !!',ch10,&
&    '  Program does not stop but _KSS file will not be created...'
  call wrtout(6,message,'COLL')
  return
 end if
!Check mproj:
 mproj=maxval(psps%indlmn(3,:,:))
 if(kssform/=0 .and. mproj>1)then
  write(message,'(a,a,a,a,a,a,a,a)')ch10,&
&  ' outkss : ERROR -',ch10,&
&  '  The present version of outkss do not accept pseudopotentials',ch10,&
&  '  with more than one projector per angular momentum channel,',ch10,&
&  '  when kssform is non zero.'
  call wrtout(6,message,'COLL')
  print *, psps%indlmn(3,:,:)
  print *, psps%indlmn(1,:,:)
  return
 end if
!Check parareel:
 if (dtset%parareel/=0) then
  write(message, '(6a)' ) ch10,&
&    ' outkss: ERROR -',ch10,&
&    '  outkss cannot be used with parareel/=0 !',ch10,&
&    '  Program does not stop but _KSS file will not be created...'
  call wrtout(6,message,'COLL')
  return
 end if
!Check useylm:
 if(psps%useylm/=0)then
  write(message,'(6a)')ch10,&
&  ' outkss : ERROR -',ch10,&
&  '  The present version of outkss does not work with useylm/=0 !',ch10,&
&  '  Program does not stop but _KSS file will not be created...'
  call wrtout(6,message,'COLL')
  return
 end if
!Check PAW:
 if(psps%usepaw/=0)then
  write(message,'(6a)')ch10,&
&  ' outkss : ERROR -',ch10,&
&  '  The present version of outkss does not work in PAW mode !',ch10,&
&  '  Program does not stop but _KSS file will not be created...'
  call wrtout(6,message,'COLL')
  return
 end if
!Check bands parallelization:
#if defined MPI
 if(mpi_enreg%paralbd/=0)then
  write(message, '(6a)' ) ch10,&
&    ' outkss: ERROR -',ch10,&
&    '  outkss cannot be used with parallelization on bands (paralbd/=0) !',ch10,&
&    '  Program does not stop but _KSS file will not be created...'
  call wrtout(6,message,'COLL')
  return
 end if
#endif
!Check required memory:
!MG FIXME to be modified to take into account the case nsppol=2
 if (kssform/=3) call memkss(mband,mgfft,mkmem,mpi_enreg,mproj,psps%mpssoang,&
&                mpw,natom,dtset%ngfft,nkpt,nspinor,nsym,ntypat)

!C-Initialize some variables:
!----------------------------
 n1=dtset%ngfft(1) ; n2=dtset%ngfft(2) ; n3=dtset%ngfft(3)
 n4=dtset%ngfft(4) ; n5=dtset%ngfft(5) ; n6=dtset%ngfft(6)
 sizepw=2*mpw
 diago=(kssform/=3)

!D-Prepare set containing all g-vectors sorted by shells:
!--------------------------------------------------------
 write(message,'(2a)') ch10,&
&  ' Sorting g-vecs for an output of states on an unique "big" PW basis.'
 call wrtout(6,message,'COLL')
!
!"Suppress" inversion from symetries list:
!- - - - - - - - - - - - - - - - - - - - -
!First find the first occurence of the inversion symmetry
 is1=0
 found=.false.
 do while(is1<nsym .and. .not.(found))
  is1=is1+1
  found=.true.
  do i=1,3
   do j=1,3
    if(i==j) then
     found=(found.and.(dtset%symrel(j,i,is1)==-1))
    else
     found=(found.and.(dtset%symrel(j,i,is1)==0))
    end if
   end do
  end do
 end do
 if(found) then
  write(message,'(2a,i3)') ch10,&
&  ' The inversion is the symmetry operation no. ',is1
 else
  write(message,'(a)') &
&  ' The inversion was not found in the symmetries list.'
 end if
 call wrtout(6,message,'COLL')
!Then find the symmetries that are related through the inversion symmetry
 nsym2=0
 do is=1,nsym-1
  do is2=is+1,nsym
   found=.true.
   do i=1,3
    do j=1,3
     found=(found.and.(dtset%symrel(j,i,is)==-dtset%symrel(j,i,is2)))
    end do
    dtnons=dtset%tnons(i,is2)-dtset%tnons(i,is)-dtset%tnons(i,is1)
    found=(found.and. &
&    (abs(dtnons)<1e-8 .or. abs(dtnons-1)<1e-8 .or. abs(dtnons+1)<1e-8))
   end do

   if (found) then
    nsym2=nsym2+1
!FB061016 when you can choose, retain the symmetry with positive determinant
    if(all(dtset%tnons(:,is2)<1e-8).and.all(dtset%tnons(:,is)<1e-8)) then
     if( dtset%symrel(1,1,is)*dtset%symrel(2,2,is)*dtset%symrel(3,3,is)&
&       +dtset%symrel(1,2,is)*dtset%symrel(2,3,is)*dtset%symrel(3,1,is)&
&       +dtset%symrel(1,3,is)*dtset%symrel(2,1,is)*dtset%symrel(3,2,is)&
&       -dtset%symrel(1,3,is)*dtset%symrel(2,2,is)*dtset%symrel(3,1,is)&
&       -dtset%symrel(1,2,is)*dtset%symrel(2,1,is)*dtset%symrel(3,3,is)&
&       -dtset%symrel(1,1,is)*dtset%symrel(2,3,is)*dtset%symrel(3,2,is) > 0. ) then
      isretained=is
      isdiscarded=is2
     else
      isretained=is2
      isdiscarded=is
     endif
    elseif(all(dtset%tnons(:,is2)<1e-8)) then
     isretained=is2
     isdiscarded=is
    else
     isretained=is
     isdiscarded=is2
    end if
    symrel2(:,:,nsym2)=dtset%symrel(:,:,isretained)
    tnons2(:,nsym2)=dtset%tnons(:,isretained)
    write(message,'(a,i3,a,i3,3a,i3,a)') &
&   ' The symmetry operations no. ',is,' and no. ',is2,&
&   ' are related through the inversion.',ch10,&
&   ' The symmetry operation no. ',isdiscarded,' will be suppressed.'
    call wrtout(6,message,'COLL')
   end if
  end do
 end do
 if (nsym2/=(nsym/2) .or. nsym==1) then
  nsym2=nsym
  symrel2(:,:,:)=dtset%symrel(:,:,:)
  tnons2(:,:)=dtset%tnons(:,:)
  pinv=1
  write(message,'(3a)') &
&  ' - outkss - COMMENT :',ch10,&
&  '   GW program uses the original set of symmetries '
  call wrtout(6,message,'COLL')
 else
  pinv=-1
  write(message,'(a)') &
&  ' Inversion related operations have been suppressed from symmetries list.'
  call wrtout(6,message,'COLL')
 end if
 if(any(abs(tnons2(:,1:nsym2))>1e-8)) then
  write(message,'(5a)') &
&  ' - outkss - ERROR:',ch10,&
&  '   Non-symmorphic operations still remain in the symmetries list ',&
&  ch10,&
&  '   Program does not stop but _KSS file will not be created...'
  call wrtout(6,message,'COLL')
  return
 end if

!Search for base g-vectors generating all g-shells:
!- - - - - - - - - - - - - - - - - - - - - - - - -
!Loop over k-points:
 ikg=0;if (mkmem==0) rewind (unit=dtfil%unkg)
 allocate(nbasek(nkpt))
 allocate(gbasek(3,mpw,nkpt))
 allocate(cnormk(mpw,nkpt))
 allocate(gcurr(3,mpw))
 nbasek(:)=0;cnormk(:,:)=0._dp;gbasek(:,:,:)=0
 do ikpt=1,nkpt
!
#if defined MPI
           if (minval(abs(mpi_enreg%proc_distrb(ikpt,1,1:nsppol)-mpi_enreg%me))/=0) cycle
#endif

!MG QUESTION Does it work in parallel? every process should read a different
! FIXME      block of G vectors and comunicate to the others which block has
!            been read
!            the update ikg=ikg+npw_k works fine in a serial alghorithm but
!            requires inter-processor comunication in parallel.

! Read g-vectors corresponding to k-point:
  if (mkmem==0) then
   read(dtfil%unkg) npw_k
   read(dtfil%unkg)
   read(dtfil%unkg) ((gcurr(i,ig),i=1,3),ig=1,npw_k)
  else
   npw_k=npwarr(ikpt)
   do ig=1,npw_k
    gcurr(:,ig)=kg(:,ikg+ig)
   end do
  end if
  ikg=ikg+npw_k

! Search for the g''s generating the others by symmetry:
  do ig=1,npw_k
   gcur(:)=gcurr(:,ig)
   norm=0
   do j=1,3
    gcar(j)=gcur(1)*gprimd(j,1)+gcur(2)*gprimd(j,2)+gcur(3)*gprimd(j,3)
    norm=norm+gcar(j)**2
   end do
   eps=1.d-8*norm
   found=.false.;in=1
   do while ((.not.found).and.(in<=nbasek(ikpt)))
    if (abs(norm-cnormk(in,ikpt))<=eps) then
     gbas(:)=gbasek(:,in,ikpt)
     is=1
     do while ((.not.found).and.(is<=nsym2))
      geq(:)=(symrel2(1,:,is)*gcur(1)+symrel2(2,:,is)*gcur(2)+ &
&             symrel2(3,:,is)*gcur(3))
      found=((geq(1)==gbas(1)).and.(geq(2)==gbas(2)).and.&
&            (geq(3)==gbas(3)))
      if (pinv==-1) found=(found.or.((geq(1)==-gbas(1)).and.&
&                         (geq(2)==-gbas(2)).and.(geq(3)==-gbas(3))))
      is=is+1
     end do
    end if
    in=in+1
   end do
   if (.not.found) then
    nbasek(ikpt)=nbasek(ikpt)+1
    cnormk(nbasek(ikpt),ikpt)=norm
    gbasek(:,nbasek(ikpt),ikpt)=gcur(:)
   end if
  end do

!End loop over k-points:
 end do
!
#if defined MPI
           call xsum_mpi(nbasek,spaceComm,ierr)
           call xsum_mpi(cnormk,spaceComm,ierr)
           call xsum_mpi(gbasek,spaceComm,ierr)
#endif

!Reduce info over k-points:
 nbase=0
 allocate(gbase(3,sizepw))
 allocate(cnorm(sizepw))
 do ikpt=1,nkpt
  do in=1,nbasek(ikpt)
   gcur(:)=gbasek(:,in,ikpt)
   norm=cnormk(in,ikpt);eps=1.d-8*norm
   found=.false.;inp=1
   do while ((.not.found).and.(inp<=nbase))
    if (abs(norm-cnorm(inp))<=eps) then
     gbas(:)=gbase(:,inp)
     is=1
     do while ((.not.found).and.(is<=nsym2))
      geq(:)=(symrel2(1,:,is)*gcur(1)+symrel2(2,:,is)*gcur(2)+ &
&             symrel2(3,:,is)*gcur(3))
      found=((geq(1)==gbas(1)).and.(geq(2)==gbas(2)).and.&
&            (geq(3)==gbas(3)))
      if (pinv==-1) found=(found.or.((geq(1)==-gbas(1)).and.&
&                         (geq(2)==-gbas(2)).and.(geq(3)==-gbas(3))))
      is=is+1
     end do
    end if
    inp=inp+1
   end do
   if (.not.found) then
    nbase=nbase+1
    if (nbase.gt.sizepw) then
     write(message, '(4a,i5,a,i5,3a)' ) ch10,&
&      ' outkss: ERROR -',ch10,'  nbase (',nbase,&
&      ') became greater than sizepw (',sizepw,') !',ch10,&
&      '  Program does not stop but _KSS file will not be created...'
     call wrtout(6,message,'COLL')
     return
    end if
    cnorm(nbase)=cnormk(in,ikpt)
    gbase(:,nbase)=gcur(:)
   end if
  end do
 end do
 deallocate(nbasek,cnormk,gbasek)

!Reorder base g-vectors in order of increasing module:
 allocate(insort(nbase))
 do in=1,nbase
  insort(in)=in
 end do
 call sort_dp(nbase,cnorm,insort,tol14)

!Generate all shells of g-vectors:
!(shell of a g=set of all symetrics of this g)
!- - - - - - - - - - - - - - - - - - - - - - -
 allocate(gbig(3,sizepw))
 allocate(nshell(nbase))
 allocate(gshell(3,2*nsym2))
 maxpw=0
!
!Loop over all different modules of g''s (=shells):
 do in=1,nbase
  nshell(in)=0
  gcur(:)=gbase(:,insort(in))
!
! Loop over all symetries:
  do is=1,nsym2
   do iinv=pinv,1,2
    geq(:)=iinv*(symrel2(1,:,is)*gcur(1)+symrel2(2,:,is)*gcur(2)+ &
&                symrel2(3,:,is)*gcur(3))
!
!   Search for symetric of g and eventually add it:
    found = .false.;ish=1
    do while ((.not.found).and.(ish<=nshell(in)))
     found=((geq(1)==gshell(1,ish)).and.&
&           (geq(2)==gshell(2,ish)).and.&
&           (geq(3)==gshell(3,ish)))
     ish=ish+1
    end do
    if (.not.found) then
     nshell(in)=nshell(in)+1
     gshell(:,nshell(in))=geq(:)
    end if
!
! End loop over symetries
   end do
  end do
!
! Store this shell of g''s in a big array of g (gbig):
  if ((maxpw+nshell(in)).gt.sizepw) then
!  We need to increase the size of the gbase, gbig and cnorm arrays
!  while still keeping their content.
!  This is done using two temporary arrays gtmp and ctmp
   allocate(ctmp(sizepw))
   allocate(gtmp(3,sizepw))
   sizepw=maxpw+nshell(in)

   ctmp(:)=cnorm(:)
   gtmp(:,:)=gbase(:,:)

   deallocate(cnorm)
   allocate(cnorm(sizepw))
   cnorm(:)=ctmp(:)
   deallocate(ctmp)

   deallocate(gbase)
   allocate(gbase(3,sizepw))
   gbase(:,:)=gtmp(:,:)
   gtmp(:,:)=gbig(:,:)
   deallocate(gbig)
   allocate(gbig(3,sizepw))
   gbig(:,:)=gtmp(:,:)
   deallocate(gtmp)
  end if
  do ig=1,nshell(in)
   gbig(:,ig+maxpw)=gshell(:,ig)
  end do
  maxpw=maxpw+nshell(in)
!
!End loop over shells:
 end do
!
!Compute shell limits:
 allocate(shlim(nbase))
 ilim=0
 do in=1,nbase
  ilim=ilim+nshell(in)
  shlim(in)=ilim
 end do
!
!Print out shell limits:
 write(message,'(3a)') ' Shells found:',ch10,&
&  ' number of shell    number of G vectors      cut-off energy'
 call wrtout(6,message,'COLL')
 do in=1,nbase
  write(message,'(12x,i4,17x,i6,12x,f8.3)') in, shlim(in),2*pi**2*cnorm(in)
  call wrtout(6,message,'COLL')
 end do
 write(message,'(a)') ch10;call wrtout(6,message,'COLL')
!
 deallocate(gcurr,gbase,gshell,insort,nshell,cnorm)

!E-determine optimal number of bands and g''s to be written:
!----------------------------------------------------------
 npwkss=dtset%npwkss
 if ((npwkss==0).or.(npwkss>=maxpw)) then
  npwkss=maxpw
  write(message, '(5a)' ) &
&   ' Since the number of g''s to be written on file',ch10,&
&   ' was 0 or too large, it has been set to the max. value.,',ch10,&
&   ' computed from the union of the sets of G vectors for the different k-points.'
  call wrtout(6,message,'COLL')
 end if
!
!XG 020224 This coding can induce a border effect when ishm+1>nbase
!ishm=0
!do while ((shlim(ishm+1)<=npwkss).and.(ishm<nbase))
! ishm=ishm+1
!end do
!This one is clean
 ishm=0
 do ii=1,nbase
  if(shlim(ii)<=npwkss)then
   ishm=ii
  else
   exit
  end if
 end do
!
 if (shlim(ishm)/=npwkss) then
  nrst1=shlim(ishm)
  nrst2=min0(shlim(min0(ishm+1,nbase)),maxpw)
  if (iabs(npwkss-nrst2)<iabs(npwkss-nrst1)) nrst1=nrst2
  npwkss=nrst1;if (shlim(ishm)<npwkss) ishm=ishm+1
  write(message, '(5a)' ) &
&   ' Since the number of g''s to be written on file',ch10,&
&   ' was''nt a whole number of ''stars'', the program set',ch10,&
&   ' it to the nearest ''star limit''.'
  call wrtout(6,message,'COLL')
 end if
 write(message, '(a,i5)' ) &
& ' Number of g-vectors written on file is: ',npwkss
 call wrtout(6,message,'COLL')

!check on number of stored bands
 if(diago) then
  if ((dtset%nbandkss==-1).or.(dtset%nbandkss>=maxpw)) then
   nbandkssk(1:nkpt)=npwarr(1:nkpt)
   write(message, '(6a)' ) ch10,&
&    ' Since the number of bands to be computed was (-1) or',ch10,&
&    ' too large, it has been set to the max. value. allowed for each k,',ch10,&
&    ' thus, the minimum of the number of plane waves for each k point.'
   call wrtout(6,message,'COLL')
  else
   nbandkssk(1:nkpt)=dtset%nbandkss
   found=.false.
   do ikpt=1,nkpt
    if (dtset%nbandkss>npwarr(ikpt)) then
     nbandkssk(ikpt)=npwarr(ikpt)
     found=.true.
    end if
   end do
   if (found) then
    write(message,'(9a)') &
&    ' outkss - WARNING:',ch10,&
&    ' The value choosen for the number of bands in file',ch10,&
&    ' (nbandkss) was greater than at least one number of plane',ch10,&
&    ' waves for a given k-point (npw_k).',ch10,&
&    ' It has been modified consequently.'
    call wrtout(6,message,'COLL')
   end if
  end if
  found=.false.
  do ikpt=1,nkpt
   if (nbandkssk(ikpt)>npwkss) then
    nbandkssk(ikpt)=npwkss
    found=.true.
   end if
  end do
  if (found) then
   write(message,'(7a)') &
&   ' outkss - WARNING:',ch10,&
&   ' The number of bands to be computed (for one k) was',ch10,&
&   ' greater than the number of g-vectors to be written.',ch10,&
&   ' It has been modified consequently.'
   call wrtout(6,message,'COLL')
  end if
  nbandksseff=minval(nbandkssk)
 else ! .not.diago
  do ikpt=1,nkpt
   do isppol=1,nsppol
    nbandkssk(ikpt)=dtset%nband(ikpt+(isppol-1)*nkpt)
   end do
  end do
  nbandksseff=minval(nbandkssk)
  if(dtset%nbandkss>0 .and. dtset%nbandkss<nbandksseff) nbandksseff=dtset%nbandkss
!MG FIXME here we could output a WARNING if (dtset%nbandkss>nbandksseff)
!   indeed the user is using the cg algorithm but the number of bands used in the
!   SCF calculation is less than the number of bands required in the _KSS file
 end if ! diago

 write(message, '(a,i5)' ) &
& ' Number of bands written on file is: ',nbandksseff
 call wrtout(6,message,'COLL')
!
 found=.false.
 do ikpt=1,nkpt
  if (nbandkssk(ikpt)<npwarr(ikpt)) found=.true.
 end do
!MG60907: minor BUG in version 5.2.2
! we must enter the loop only in (diago == true) and then check the value of found
! in 5.2.2 we were using the condition: if (found .and. diago) then ....
 if (diago) then
  if (found) then
   write(message, '(6a)' ) ch10,&
&   ' Since the number of bands to be computed',ch10,&
&   ' is less than the number of G-vectors found,',ch10,&
&   ' the program will perform partial diagonalizations.'
  else
   write(message, '(6a)' ) ch10,&
&   ' Since the number of bands to be computed',ch10,&
&   ' is equal to the nb of G-vectors found for each k-pt,',ch10,&
&   ' the program will perform complete diagonalizations.'
  end if
  call wrtout(6,message,'COLL')
 end if

!F-Open file for output, write headers and some data:
!----------------------------------------------------
!Check required disk space:
!MG060907 FIXME must add nsppol in the list of input variables
 call dsksta(ishm,nbandksseff,npwkss,nkpt,nsym2)

#if defined MPI
           if (mpi_enreg%me==0) then
#endif

  !Begin to open and write output file(s)
  ! The ETSF output is a first basic attempt, branching all write calss.
  ! A better integration will be done later.
  !Open file for Kohn Sham Structure output (_KSS)
  filekss=trim(dtfil%filnam_ds(4))//'_KSS'
  if (dtset%accesswff == 0) then
   open(unitkss,file=filekss,form='unformatted')
#if defined HAVE_ETSF_IO
  else if (dtset%accesswff == 3) then
    call dtsetcopy(dtset_cpy, dtset)
    dtset_cpy%mpw = npwkss
    dtset_cpy%mband = nbandksseff
    dtset_cpy%nsym = nsym2
    dtset_cpy%symrel(:,:,1:nsym2) = symrel2(:,:,1:nsym2)
    dtset_cpy%tnons(:,1:nsym2) = tnons2(:,1:nsym2)
    call abi_etsf_init(dtset_cpy, filekss, etsf_main_wfs_pw, &
                     & etsf_grp_main + etsf_grp_gwdata + etsf_grp_geometry + &
                     & etsf_grp_kpoints + etsf_grp_electrons + etsf_grp_wavedata, &
                     & hdr%lmn_size, psps)
    ! Complete the geometry informations with missing values from hdr_io().
    call abi_etsf_geo_put(dtset_cpy, filekss, psps, rprimd, xred)
    ! We open again for further additions
    call etsf_io_low_open_modify(ncid, trim(filekss)//"-etsf.nc", lstat, error_data = error)
    if (.not. lstat) then
      ! We handle the error
      call etsf_io_low_error_to_str(errmess, error)
      write(message, "(A,A,A,A)") ch10, " outkss: ERROR -", ch10, &
                                & errmess(1:min(475, len(errmess)))
      call wrtout(std_out, message, 'COLL')
      call leave_new('COLL')
    end if
#endif
  end if

 write(message,'(3a)')ch10,' Opening file for KS structure output: ',filekss
 call wrtout(6,message,'COLL')
  !Write abinit standard header
  fform=502;rdwr=2
  if (dtset%accesswff == 0) then
    call hdr_io(fform,hdr,rdwr,unitkss)
    title='Results from ABINIT code'
    write(unitkss) title(1:80)
    title='Ab-initio plane waves calculation'
    write(unitkss) title(1:80)
    write(unitkss) nsym2, nbandksseff, npwkss, ishm, mpsang
#if defined HAVE_ETSF_IO
  else if (dtset%accesswff == 3) then
    call hdr_io_etsf(fform,hdr,rdwr,ncid)
#endif
  end if

 write(message,'(a,i6)') ' number of Gamma centered plane waves ',npwkss
 call wrtout(6,message,'COLL')
 call wrtout(ab_out,message,'COLL')
 write(message,'(a,i6)') ' number of Gamma centered shells ',ishm
 call wrtout(6,message,'COLL')
 call wrtout(ab_out,message,'COLL')
 write(message,'(a,i6)') ' number of bands ',nbandksseff
 call wrtout(6,message,'COLL')
 call wrtout(ab_out,message,'COLL')
 write(message,'(a,i6)') ' maximum angular momentum components ',mpsang
 call wrtout(6,message,'COLL')
 call wrtout(ab_out,message,'COLL')
 if (dtset%accesswff == 0) then
  write(unitkss) (((symrel2(i,j,k),i=1,3),j=1,3),k=1,nsym2)
  write(unitkss) ((tnons2(i,k),i=1,3),k=1,nsym2)
 end if
 write(message,'(a,i2,a)') ' number of symmetry operations ',nsym2,&
&  ' (without inversion)'
 call wrtout(6,message,'COLL')
 if (dtset%accesswff == 0) then
  ! FIXME! need to add these values...
  write(unitkss) ((gbig(i,ig),i=1,3),ig=1,npwkss)
  write(unitkss) (shlim(in),in=1,ishm)
 end if
!Here write vkbsign
!WARNING : it seems that the present version of outkss deals only with
!one projector in each angular momentum channel (XG 010927)
!and only with non-paw pseudopotentials
 ! The allocation is done in the wrong order for dimensions...
 if (dtset%accesswff == 0) then
  allocate(vkbsign(ntypat,mpsang));vkbsign(:,:)=zero
  do is=1,ntypat
   il0=0;do ilmn=1,psps%lmnmax
   il=1+psps%indlmn(1,ilmn,is)
   in=psps%indlmn(3,ilmn,is)
   if ((il/=il0).and.(in==1)) then
    il0=il
    vkbsign(is,il)=dsign(one,psps%ekb(ilmn,is))
   end if
   end do
  end do
  write(unitkss) ((vkbsign(is,il),il=1,mpsang),is=1,ntypat)
  deallocate(vkbsign)
#if defined HAVE_ETSF_IO
 else if (dtset%accesswff == 3) then
  allocate(vkbsign_int(mpsang, ntypat));vkbsign_int(:,:)=0
  do is=1,ntypat
   il0=0
   do ilmn=1,psps%lmnmax
    il=1+psps%indlmn(1,ilmn,is)
    in=psps%indlmn(3,ilmn,is)
    if ((il/=il0).and.(in==1)) then
     il0=il
     vkbsign_int(il,is)=int(dsign(one,psps%ekb(ilmn,is)))
    end if
   end do
  end do
  gwdata%kb_formfactor_sign%data2D => vkbsign_int
! We don't write it now, it will be written with the formfactors.
  write(*,*) vkbsign_int
  call etsf_io_gwdata_put(ncid, gwdata, lstat, error)
  if (.not. lstat) then
!  We handle the error
   call etsf_io_low_error_to_str(errmess, error)
   write(message, "(A,A,A,A)") ch10, " outkss: ERROR -", ch10, &
                              & errmess(1:min(475, len(errmess)))
   call wrtout(std_out, message, 'COLL')
   call leave_new('COLL')
  end if
  gwdata%kb_formfactor_sign%data2D => null()
  deallocate(vkbsign_int)
#endif
 end if

 do ig=1,min(8,npwkss)
  write(message,'(a,i2,a,3i3)') '   *   g(',ig,')=',gbig(:,ig)
  call wrtout(6,message,'COLL')
 end do
 do ig=max(min(8,npwkss),npwkss-6),npwkss
  write(message,'(a,i4,a,3i3)') '   *   g(',ig,')=',gbig(:,ig)
  call wrtout(6,message,'COLL')
 end do

#if defined MPI
           end if
#endif

 deallocate(shlim)

 if(diago) write(message,'(a)') ' Diagonalized eigenvalues'
 if (.not.diago) write(message,'(a)') ' Conjugate gradient eigenvalues'
 call wrtout(ab_out,message,'COLL')
!MG060907 Maybe we can add a SPIN label in the ab_out file
 if(dtset%enunit==1) then
  write(message,'(a)') '   k    eigenvalues [eV]'
  call wrtout(ab_out,message,'COLL')
 else
  write(message,'(a)') '   k    eigenvalues [Hartree]'
  call wrtout(ab_out,message,'COLL')
 end if

!G-Begin loop over k-points:
!--------------------------

!Prepare WF file for reading IF MKMEM=0
!MG060907 This part is unchanged, only close, re-open the external file
!         skipping the header
 if ((.not.diago).and.(mkmem==0)) then
  mcg_disk=mpw*nspinor*mband
  allocate(eig_dum(mband),kg_dum(3,0),occ_dum(mband),cg_disk(2,mcg_disk))
  call clsopn(wffnow)
  call hdr_skip(wffnow,ierr)

! Define offsets, in case of MPI I/O
!  call WffKg(wffnow,0)
!  call xdefineOff(0,wffnow,mpi_enreg,dtset%nband,npwarr,nspinor,nsppol,nkpt)
 end if

 call timab(234,2,tsec)

!MG060907 kindex is OK but I have to take into account the spin
! remember that the number of bands for each k point and spin must be the same
 k_index=0;bdtot_index=0
 allocate(vlocal(n4,n5,n6))

!loop over spin
 do isppol=1,nsppol
  ikg=0
  if (mkmem==0) rewind (unit=dtfil%unkg)
!MG NOTE in the present version psps%useylm/=0 is not allowed
  if (mkmem==0.and.psps%useylm==1) rewind dtfil%unylm

!Set up local potential vlocal with proper dimensioning, from vtrial
  call fftpac(isppol,nsppol,n1,n2,n3,n4,n5,n6,dtset%ngfft,vtrial,vlocal,2)

  do ikpt=1,nkpt

   call timab(235,1,tsec)

   if (nsppol==2) then
    write(message,'(2a,i3,3x,a)') ch10,' k-point ',ikpt,stag(isppol)
   else
    write(message,'(2a,i3)') ch10,' k-point ',ikpt
   end if
   call wrtout(6,message,'PERS')
!MG nband_k is constant, see previous part of the code
   nband_k=dtset%nband(ikpt+(isppol-1)*nkpt)
   npw_k=npwarr(ikpt)
   istwf_k=dtset%istwfk(ikpt)
   kpoint(:)=dtset%kpt(:,ikpt)
   nbandkss_k=nbandkssk(ikpt)
   if (diago) ngdiago=npw_k
   allocate(ylm_k(npw_k,mpsang*mpsang*psps%useylm))
   allocate(trsl(maxpw))
!
#if defined MPI
           if (mpi_enreg%proc_distrb(ikpt,1,isppol)==mpi_enreg%me) then
#endif

!H-Load G-vectors, for this k-point:
!-----------------------------------
   allocate(gbound(2*mgfft+8,2))
   allocate(kg_k(3,npw_k))
   if (mkmem==0) then
    call rdnpw(ikpt,isppol,nband_k,npw_k,nspinor,0,dtfil%unkg)
    read (dtfil%unkg) kg_k(1:3,1:npw_k)
    call sphereboundary(gbound,istwf_k,kg_k,mgfft,npw_k)
!MG in the present version psps%useylm/=0 is not allowed
    if (psps%useylm==1) then
     read(dtfil%unylm)
     read(dtfil%unylm) ((ylm_k(ipw,ilm),ipw=1,npw_k),ilm=1,mpsang*mpsang)
    end if
   else
    kg_k(:,1:npw_k)=kg(:,1+ikg:npw_k+ikg)
    call sphereboundary(gbound,istwf_k,kg_k,mgfft,npw_k)
!MG in the present version psps%useylm/=0 is not allowed
    if (psps%useylm==1) then
     do ilm=1,mpsang*mpsang
      ylm_k(1:npw_k,ilm)=ylm(1+ikg:npw_k+ikg,ilm)
     end do
    end if
    ikg=ikg+npw_k
   end if

!I-Associate set of g-vectors with the big array of g''s:
!-------------------------------------------------------
!              Note:
!              Array gbig(3,ig=1,maxpw) contains all g-vectors used
!              for all k-points, in order of increasing shells.
!              For a each k-point, the wave-functions are defined only
!              on a particular set of g-vectors g''(k) (included in gbig).
!              This set is defined by array trsl:
!              - Array trsl(ig=1,maxpw) translates the index of the
!                gbig (from 1 to maxpw) into the corresponding
!                index in array g''(k). If gbig(ig) does not exist in
!                g''(k), trsl(ig) contains npw(k)+1.
!
!  Initialize array trsl:
   trsl(:)=npw_k+1
!  Loop over g-vectors, for this k point:
   do ig=1,npw_k
    gcur(:)=kg_k(:,ig)
!   Search selected vector in array gbig:
    igp=0
    found = .false.
    do while ((.not.found).and.(igp<maxpw))
     igp=igp+1
     found=((gcur(1)==gbig(1,igp)).and.&
&           (gcur(2)==gbig(2,igp)).and.&
&           (gcur(3)==gbig(3,igp)))
    end do
!   Store it if found:
    if (found) then
     trsl(igp)=ig
    else
     write(message, '(6a)' ) ch10,&
&    ' outkss: BUG -',ch10,&
&    '  The set of g vectors is inconsistent ! Check source.',ch10,&
&    '  Program does not stop but _KSS file will not be created...'
     call wrtout(6,message,'COLL')
     return
    end if
!  End loop over g vectors, for this k point:
   end do

!J-Re-evaluate some useful quantities:
!--------------------------------------
   allocate(phkxred(2,natom))
   dimffnl=2;allocate(ffnl(npw_k,dimffnl,psps%lmnmax,ntypat))
   allocate(kinpw(npw_k))
   allocate(modkplusg(npw_k))
   allocate(vkb(npw_k+1,ntypat,mpsang))
   allocate(vkbd(npw_k+1,ntypat,mpsang))
   nkpg=0
   do ia=1,natom
    iatom=atindx(ia)
    arg=two_pi*(kpoint(1)*xred(1,ia)+&
&              kpoint(2)*xred(2,ia)+&
&              kpoint(3)*xred(3,ia))
    phkxred(1,iatom)=cos(arg)
    phkxred(2,iatom)=sin(arg)
   end do
   call mkffnl(psps%dimekb,dimffnl,psps%ekb,ffnl,psps%ffspl,gmet,gprimd,&
&              1,0,psps%indlmn,kg_k,kpg_dum,kpoint,psps%lmnmax,&
&              psps%lnmax,psps%mpsang,psps%mqgrid_ff,nkpg,npw_k,&
&              ntypat,psps%pspso,psps%qgrid_ff,rmet,&
&              psps%usepaw,psps%useylm,ylm_k,ylmgr_dum)
   call mkkin(ecut,dtset%ecutsm,dtset%effmass,gmet,kg_k,kinpw,kpoint,npw_k)
   modkplusg(:) = sqrt(0.5_dp/pi**2*kinpw(:))
   modkplusg(:) = max(modkplusg(:),tol10)
   matblk=dtset%nloalg(4)
   if (dtset%nloalg(1)>0) matblk=natom
   allocate(ph3d(2,npw_k,matblk))
   if (dtset%nloalg(1)>0) then
    call ph1d3d(1,natom,kg_k,kpoint,matblk,natom,&
&               npw_k,n1,n2,n3,phkxred,ph1d,ph3d)
   end if

!MG note that these definitions are only valid if useylm==0
   vkb(:,:,:) = zero
   vkbd(:,:,:) = zero
   do is=1,ntypat
    il0=0;do ilmn=1,psps%lmnmax
     il=1+psps%indlmn(1,ilmn,is)
     in=psps%indlmn(3,ilmn,is)
     if ((il/=il0).and.(in==1)) then
      il0=il
      if(abs(psps%ekb(ilmn,is))>1.0d-10) then
       if (il==1) then
        vkb(1:npw_k,is,il)  = ffnl(:,1,ilmn,is)
        vkbd(1:npw_k,is,il) = ffnl(:,2,ilmn,is)*modkplusg(:)/2/pi
       else if (il==2) then
        vkb(1:npw_k,is,il)  = ffnl(:,1,ilmn,is)*modkplusg(:)
        do ig = 1, npw_k
         vkbd(ig,is,il) = ((ffnl(ig,2,ilmn,is)*modkplusg(ig)*modkplusg(ig))+&
&                           ffnl(ig,1,ilmn,is) )/2/pi
        end do
       else if (il==3) then
        vkb(1:npw_k,is,il)  = ffnl(:,1,ilmn,is)*modkplusg(:)**2
        vkbd(1:npw_k,is,il) = (ffnl(:,2,ilmn,is)*modkplusg(:)**3+&
&                            2*ffnl(:,1,ilmn,is)*modkplusg(:) )/2/pi
       else if (il==4) then
        vkb(1:npw_k,is,il)  = ffnl(:,1,ilmn,is)*modkplusg(:)**3
        vkbd(1:npw_k,is,il) = (ffnl(:,2,ilmn,is)*modkplusg(:)**4+&
&                            3*ffnl(:,1,ilmn,is)*modkplusg(:)**2 )/2/pi
       end if
       vkb(:,is,il)  = sqrt(4*pi/ucvol*(2*il-1)*abs(psps%ekb(ilmn,is)))*vkb(:,is,il)
       vkbd(:,is,il) = sqrt(4*pi/ucvol*(2*il-1)*abs(psps%ekb(ilmn,is)))*vkbd(:,is,il)
      else
       vkb(:,is,il)  = zero
       vkbd(:,is,il) = zero
      end if
     end if
    end do
   end do
   deallocate(modkplusg)

   if(diago) then

!K-Compute <G|H|G_prim> matrix elements for all (G, G_prim):
!---------------------------------------------------

    write(message,'(a)') ' Calculating <G|H|G''> elements'
    call wrtout(6,message,'PERS')
!!!!  BE CAREFULL : DOES NOT WORK WITH nspinor=2 !!!!
    allocate(pwave(2,npw_k*nspinor))
    allocate(subghg(2,npw_k*nspinor))
    allocate(gvnlg(2,npw_k*nspinor))
    if (nspinor==2) allocate(pwave_so(2,npw_k),subghg_so(2,npw_k))
    allocate(ghg(2,(ngdiago*nspinor*(ngdiago*nspinor+1))/2))
    allocate(eigval(npw_k*nspinor))
    allocate(eigvec(2,ngdiago*nspinor,nbandkssk(ikpt)))
!   Initialize plane-wave array:
    pwave(:,:)=0._dp
!   Initialize index for packed storage of <G|H|G''>:
    nghg=0
!   Loop over G'':
    do ispinorp=1,nspinor
     do igp=1,ngdiago
      pwave(1,igp)=1._dp
!     Compute <G|Vlocal|G''>:
      allocate(work(2,n4,n5,n6))
      cplex=1;option=2;tim_fourwf=15;weight=1._dp
      call fourwf(cplex,vlocal,pwave,subghg,work,gbound,gbound,&
&                 istwf_k,kg_k,kg_k,mgfft,mpi_enreg,1,dtset%ngfft,npw_k,npw_k,n4,n5,n6,&
&                 option,tim_fourwf,weight)
      if(nspinor==2)then
       pwave_so(:,1:npw_k)=pwave(:,npw_k+1:2*npw_k)
       call fourwf(cplex,vlocal,pwave_so,subghg_so,work,gbound,gbound,&
&                  istwf_k,kg_k,kg_k,mgfft,mpi_enreg,1,dtset%ngfft,npw_k,npw_k,&
&                  n4,n5,n6,option,tim_fourwf,weight)
       subghg(:,npw_k+1:2*npw_k)=subghg_so(:,1:npw_k)
      end if
      deallocate(work)
!     Compute <G|Vnonlocal|G''>:
      signs=2;choice=1;nnlout=1;idir=0;tim_nonlop=9;paw_opt=0;cpopt=-1
      call nonlop(atindx1,choice,cpopt,cprj_dum,psps%dimekb,&
&      psps%ntypat,dimffnl,dimffnl,psps%ekb,&
&      enlout,ffnl,ffnl,gmet,gprimd,idir,&
&      psps%indlmn,istwf_k,kg_k,kg_k,kpg_dum,kpg_dum,kpoint,kpoint,&
&      dum,psps%lmnmax,matblk,mgfft,mpi_enreg,psps%mpsang,&
&      psps%mpssoang,natom,nattyp,dtset%ngfft,nkpg,nkpg,dtset%nloalg,nnlout,npw_k,npw_k,&
&      nspinor,ntypat,paw_opt,phkxred,phkxred,ph1d,ph3d,ph3d,psps%pspso,&
&      signs,nonlop_dum,nonlop_dum,&
&      tim_nonlop,ucvol,psps%useylm,pwave,gvnlg)
!     Assemble modified kinetic, local and
!     nonlocal contributions to <G|H|G''>.
      do ispinor=1,nspinor
       do ig=1,igp
        igspinor=ig+npw_k*(ispinor-1)
        if(kinpw(ig)<huge(0.0_dp)*1.d-11) then
         subghg(1,igspinor)=kinpw(ig)*pwave(1,igspinor) &
&                          +subghg(1,igspinor)+gvnlg(1,igspinor)
         subghg(2,igspinor)=kinpw(ig)*pwave(2,igspinor) &
&                          +subghg(2,igspinor)+gvnlg(2,igspinor)
        else
         subghg(1,igspinor)=zero
         subghg(2,igspinor)=zero
        end if
       end do
!      Store <G|H|G''> in packed storage:
       do ig=1,igp
        nghg=nghg+1
        ghg(:,nghg)=subghg(:,ig)
       end do
      end do
!     End loop over G'':
      pwave(1,igp)=0._dp
     end do
    end do
    deallocate(pwave,subghg,gvnlg)
    if (nspinor==2) deallocate(pwave_so,subghg_so)

!L-Diagonalize <G|H|G''> matrix:
!------------------------------

    call timab(236,1,tsec)

!   Initialize non-defined <G|H|G''> to zero:  OBSOLETE !
!    eigvec(:,npw_k+1,:)=0._dp
!    if (nspinor==2) eigvec(:,(npw_k+1)*nspinor,:)=0._dp
!
    if (nbandkss_k>=ngdiago*nspinor) then
!
!    Complete diagonalization:
     nbandkss_k=ngdiago*nspinor
     if (nsppol==2) then
      write(message,'(a,i3,3x,2a,i5)') ' Begin complete diago for ikpt=',&
&                                   ikpt,stag(isppol),' - Size of mat.=',ngdiago*nspinor
     else
      write(message,'(a,i3,a,i5)') ' Begin complete diago for ikpt=',&
&                                  ikpt,' - Size of mat.=',ngdiago*nspinor
     end if
     call wrtout(6,message,'PERS')

     allocate(cwork(2,2*npw_k*nspinor),rwork(3*npw_k*nspinor))
#if defined T3E
      call chpev('v','u',ngdiago*nspinor,ghg,eigval,eigvec,&
&                ngdiago*nspinor,cwork,rwork,ier)
#else
      call zhpev('v','u',ngdiago*nspinor,ghg,eigval,eigvec,&
&                ngdiago*nspinor,cwork,rwork,ier)
#endif
     deallocate(rwork,cwork)
    else
!
!    Partial diagonalization:
     if (nsppol==2) then
      write(message,'(a,i3,3x,2a,i5,a,i5)') &
&       ' Begin partial diago for ikpt=',ikpt,stag(isppol),' - Size of mat.=',&
&       ngdiago*nspinor,' - # bnds=',nbandkss_k
     else
      write(message,'(a,i3,a,i5,a,i5)') &
&       ' Begin partial diago for ikpt=',ikpt,' - Size of mat.=',&
&       ngdiago*nspinor,' - # bnds=',nbandkss_k
     end if
     call wrtout(6,message,'PERS')

     allocate(cwork(2,2*npw_k*nspinor),rwork(7*npw_k*nspinor),&
&             iwork(5*npw_k*nspinor),ifail(npw_k*nspinor))
#if defined T3E
      call chpevx('v','i','u',ngdiago*nspinor,ghg,0.,0.,1,nbandkss_k,&
&                 -1.e-8,negv,eigval,eigvec,ngdiago*nspinor,&
&                 cwork,rwork,iwork,ifail,ier)
#else
      call zhpevx('v','i','u',ngdiago*nspinor,ghg,0._dp,0._dp,1,nbandkss_k,&
&                 -1.d-8,negv,eigval,eigvec,ngdiago*nspinor,&
&                 cwork,rwork,iwork,ifail,ier)
#endif
     deallocate(rwork,cwork,iwork,ifail)
    end if
    if (ier.ne.0) then
     write(message,'(a,i3,a,i3,a,i2,a)') &
&     ' outkss: WARNING - ikpt=',ikpt,' spin= ',isppol,' - error in diago (ier=',ier,') !'
     call wrtout(6,message,'PERS')
    end if
    deallocate(ghg)

    call timab(236,2,tsec)

   end if ! diago

   call timab(237,1,tsec)

   deallocate(gbound,phkxred,ph3d,ffnl,kinpw)

#if defined MPI
           end if
#endif


!          Transfer data between procs:
#if defined MPI
            bufsz=nbandksseff/bufnb;bufrt=nbandksseff-bufnb*bufsz
              nband_k=dtset%nband(ikpt+(isppol-1)*nkpt)

            if ((mpi_enreg%me==0).or.&
&               (mpi_enreg%proc_distrb(ikpt,1,isppol)==mpi_enreg%me)  ) then

             if ((mpi_enreg%me==0).and.&
 &                   (mpi_enreg%proc_distrb(ikpt,1,isppol)/=mpi_enreg%me)) then
             allocate(vkb(npw_k+1,ntypat,mpsang),vkbd(npw_k+1,ntypat,mpsang))
             if (diago) then
              allocate(eigval(npw_k*nspinor),eigvec(2,ngdiago*nspinor,nbandkssk(ikpt)))
             end if
             end if           !mpi_enreg%me == 0

             call xexch_mpi(trsl,npwkss,mpi_enreg%proc_distrb(ikpt,1,isppol) &
             &             ,trsl,0,spaceComm,ierr)
             call xexch_mpi(vkb,(npw_k+1)*mpsang*ntypat,mpi_enreg%proc_distrb(ikpt,1,isppol) &
             &             ,vkb,0,spaceComm,ierr)
             call xexch_mpi(vkbd,(npw_k+1)*mpsang*ntypat,mpi_enreg%proc_distrb(ikpt,1,isppol) &
             &             ,vkbd,0,spaceComm,ierr)
             if (diago) then
              call xexch_mpi(eigval,nbandksseff,mpi_enreg%proc_distrb(ikpt,1,isppol) &
              &             ,eigval,0,spaceComm,ierr)
               if (bufsz>0) then
                do  i=0,bufnb-1
                 call xexch_mpi(eigvec(:,:,i*bufsz+1:(i+1)*bufsz),2*ngdiago*nspinor*bufsz,mpi_enreg%proc_distrb(ikpt,1,isppol) &
                 &             ,eigvec(:,:,i*bufsz+1:(i+1)*bufsz),0,spaceComm,ierr)
                end do
               end if
              if (bufrt>0) then
               call xexch_mpi(eigvec(:,:,bufnb*bufsz+1:bufnb*bufsz+bufrt),2*ngdiago*nspinor*bufrt,&
               &                  mpi_enreg%proc_distrb(ikpt,1,isppol) &
               &             ,eigvec(:,:,bufnb*bufsz+1:bufnb*bufsz+bufrt),0,spaceComm,ierr)
               end if
              end if
            call timab(48,2,tsec)
            end if

            if (mpi_enreg%me==0) then
#endif

!M-Prepare data for writing on disk
!---------------------------------------------------------
   allocate(en(nbandksseff),wfg(2,npwkss,nbandksseff))
   en(:)=zero;wfg(:,:,:)=zero
   if (.not.diago) then
!MG check where bdtot_index is correctly increased !!
!   it should be OK since eigen(mband*nkpt*nsppol)
!   and the number of bands is the same for all k-points
    en(1:nbandksseff)=eigen(1+bdtot_index:nbandksseff+bdtot_index)

!MG CHECK THIS PART
    if (mkmem==0) then
     tim_rwwf=0
     call rwwf(cg_disk,eig_dum,0,0,0,ikpt,isppol,kg_dum,mband,mcg_disk,&
&              nband_k,nband_k,npw_k,nspinor,occ_dum,-2,0,tim_rwwf,wffnow)
     do ib=1,nbandksseff
      do ig=1,npwkss
       if(trsl(ig)>npw_k) cycle
       wfg(:,ig,ib)=cg_disk(:,trsl(ig)+(ib-1)*npw_k*nspinor)
      end do
     end do
!MG CHECK THIS PART
!MG cg(2,mpw*nspinor*mband*mkmem*nsppol)=planewave coefficients of wavefunctions.
!MG the increment of k_index is : k_index=k_index+npw_k*nband_k*nspinor
    else
     do ib=1,nbandksseff
      do ig=1,npwkss
       if(trsl(ig)>npw_k) cycle
       wfg(:,ig,ib)=cg(:,trsl(ig)+(ib-1)*npw_k*nspinor+k_index)
      end do
     end do
    end if
   else
    en(1:nbandksseff)=eigval(1:nbandksseff)
    do ib=1,nbandksseff
     do ig=1,npwkss
      if(trsl(ig)>npw_k .or. trsl(ig)>ngdiago) cycle
      wfg(:,ig,ib)=eigvec(:,trsl(ig),ib)
     end do
    end do
!   check diagonalized eigenvalues with respect to conjugate gradient ones
    ntemp=min(nbandksseff,nband_k)
    if(any(abs(en(1:ntemp)-eigen(1+bdtot_index:ntemp+bdtot_index)) &
&      > 1.d-3)) then
     write(message, '(6a)' ) ch10,&
&     ' outkss: WARNING -',ch10,&
&     '  The diagonalized eigenvalues differ by more than 10^-3 Hartree',ch10,&
&     '  with respect to the conjugated gradient values.'
     call wrtout(6,message,'COLL')
    end if
   end if

!N-Write out results
!---------------------------------------------------------
! Print results on log and out
   if(dtset%enunit==1) then
    if (nsppol==2) then
     write(message,'(a,i3,3x,2a)') ' Eigenvalues in eV for ikpt=',ikpt,stag(isppol),':'
    else
     write(message,'(a,i3,a)') ' Eigenvalues in eV for ikpt=',ikpt,':'
    end if
    call wrtout(6,message,'COLL')
    write(message,'(i4,4x,9(1x,f7.2))')&
&    ikpt,(en(ib)*Ha_eV,ib=1,min(9,nbandksseff))
    call wrtout(6,message,'COLL')
    call wrtout(ab_out,message,'COLL')
    if(nbandksseff>9) then
     do j=10,nbandksseff,9
      write(message,'(8x,9(1x,f7.2))')&
&      (en(ib)*Ha_eV,ib=j,min(j+8,nbandksseff))
      call wrtout(6,message,'COLL')
      call wrtout(ab_out,message,'COLL')
     end do
    end if
   else
    if (nsppol==2) then
     write(message,'(a,i3,3x,a)') ' Eigenvalues in Hartrees for ikpt=',ikpt,stag(isppol)
    else
     write(message,'(a,i3,a)') ' Eigenvalues in Hartrees for ikpt=',ikpt,':'
    end if
    call wrtout(6,message,'COLL')
    write(message,'(i4,4x,9(1x,f7.4))')&
&    ikpt,(en(ib),ib=1,min(9,nbandksseff))
    call wrtout(6,message,'COLL')
    call wrtout(ab_out,message,'COLL')
    if(nbandksseff>9) then
     do j=10,nbandksseff,9
      write(message,'(8x,9(1x,f7.4))') (en(ib),ib=j,min(j+8,nbandksseff))
      call wrtout(6,message,'COLL')
      call wrtout(ab_out,message,'COLL')
     end do
    end if
   end if

!  test on the normalization of wavefunctions
   do ib=1,nbandksseff
!  print *, '(if floating underflows occur, '
!  print *, 'it means that the imag part cdum is really small'
    cdum = cmplx(dot_product(wfg(1,:,ib),wfg(1,:,ib)) + dot_product(wfg(2,:,ib),wfg(2,:,ib)), 0, dp)
    if(abs(cdum)<einf) einf = abs(cdum)
    if(abs(cdum)>esup) esup = abs(cdum)
   end do
!  test on the orthogonalization of wavefunctions
   do ib=1,nbandksseff
    do ibp=ib+1,nbandksseff
     cdum = cmplx( &
       & dot_product(wfg(1,:,ib),wfg(1,:,ibp)) + dot_product(wfg(2,:,ib),wfg(2,:,ibp)), &
       & dot_product(wfg(1,:,ib),wfg(2,:,ibp)) - dot_product(wfg(2,:,ib),wfg(1,:,ibp)), dp)
     if(abs(cdum)<cinf) cinf = abs(cdum)
     if(abs(cdum)>csup) csup = abs(cdum)
    end do
   end do

!  Write pseudopotential information on disk:
   if (dtset%accesswff == 0) then
     do is=1,ntypat
      do il=1,mpsang
       write(unitkss) (vkb(trsl(ig),is,il),ig=1,npwkss)
       write(unitkss) (vkbd(trsl(ig),is,il),ig=1,npwkss)
      end do
     end do
#if defined HAVE_ETSF_IO
   else if (dtset%accesswff == 3) then
     allocate(vkb_tgt(npw_k, mpsang, ntypat))
     allocate(vkbd_tgt(npw_k, mpsang, ntypat))
     do is = 1, ntypat, 1
       do il = 1, mpsang, 1
         do ig = 1, npwkss, 1
           if (trsl(ig) <= npw_k) then
             vkb_tgt(trsl(ig), il, is) = vkb(trsl(ig), is, il)
             vkbd_tgt(trsl(ig), il, is) = vkbd(trsl(ig), is, il)
           end if
         end do
       end do
     end do
     gwdata%kb_formfactors%data3D => vkb_tgt
     gwdata%kb_coeff__kpoint_access = ikpt
     gwdata%kb_coeff__number_of_coefficients = npw_k
     gwdata%kb_formfactor_derivative%data3D => vkbd_tgt
     gwdata%kb_coeff_der__kpoint_access = ikpt
     gwdata%kb_coeff_der__number_of_coefficients = npw_k
     call etsf_io_gwdata_put(ncid, gwdata, lstat, error)
     if (.not. lstat) then
       ! We handle the error
       call etsf_io_low_error_to_str(errmess, error)
       write(message, "(A,A,A,A)") ch10, " outkss: ERROR -", ch10, &
                                 & errmess(1:min(475, len(errmess)))
       call wrtout(std_out, message, 'COLL')
       call leave_new('COLL')
     end if
     deallocate(vkb_tgt, vkbd_tgt)
#endif
   end if

!  Write eigenvalues on disk:
   if (nsppol==2) then
    write(message,'(a,i3,3x,a)') &
&       ' Writing out eigenvalues/vectors for ikpt=',ikpt,stag(isppol)
   else
    write(message,'(a,i3,a)') &
&       ' Writing out eigenvalues/vectors for ikpt=',ikpt,'.'
   end if
   call wrtout(6,message,'COLL')
   if (dtset%accesswff == 0) then
     write(unitkss) (en(ib),ib=1,nbandksseff)
   end if

!  Write occupation numbers on disk:
!MG bdtot_index is increaded inside the loop over k-points using:
!   bdtot_index=bdtot_index+nband_k ; occ(mband*nkpt*nsppol)
   allocate(occ_k(max(nband_k,nbandksseff)))
   occ_k(1:nband_k)=occ(1+bdtot_index:nband_k+bdtot_index)
   if (nband_k.lt.nbandksseff) occ_k(nband_k+1:nbandksseff)=0._dp

   if (nsppol==2) then
    write(message,'(a,i3,3x,a)') ' Occupation numbers for ikpt=',ikpt,stag(isppol)
   else
    write(message,'(a,i3,a)') ' Occupation numbers for ikpt=',ikpt,':'
   end if
   call wrtout(6,message,'COLL')
   write(message,'(i4,4x,9(1x,f7.4))')&
&   ikpt,(occ_k(ib),ib=1,min(9,nbandksseff))
   call wrtout(6,message,'COLL')
   if(nbandksseff>9) then
    do j=10,nbandksseff,9
     write(message,'(8x,9(1x,f7.4))') (occ_k(ib),ib=j,min(j+8,nbandksseff))
     call wrtout(6,message,'COLL')
    end do
   end if
   deallocate(occ_k)

!  Write wavefunctions on disk:
   if (dtset%accesswff == 0) then
     do ib=1,nbandksseff
      write(unitkss) (wfg(:,ig,ib),ig=1,npwkss)
     end do
   end if

   ! When ETSF, use the rwwf routine (should always be done like that)
   if (dtset%accesswff == 3) then
#if defined HAVE_ETSF_IO
     wff%master = 0
     wff%me = mpi_enreg%me
     wff%unwff = ncid
     wff%accesswff = 3
     call writewf(reshape(wfg(:, 1:npw_k, :), (/2, nbandksseff * npw_k /)), en, 0, &
                & hdr%headform, 0, ikpt, isppol, kg_k, nbandksseff, nbandksseff * npw_k, &
                & nbandksseff, nbandksseff, npw_k, nspinor, occ(1:nbandksseff), 2, 1, wff)
#endif
   end if

!MG070602 bug fix from Hamann
!deallocate(en,wfg,kg_k)
   deallocate(en,wfg)
!ENDMG

#if defined MPI
           end if
           if ((mpi_enreg%me==0).or.&
&              (mpi_enreg%proc_distrb(ikpt,1,isppol)==mpi_enreg%me)) then
#endif

   deallocate(vkb,vkbd)
   if (diago) deallocate(eigval,eigvec)
!MG070602 bug fix from Hamann
   if(allocated(kg_k)) deallocate(kg_k)
!ENDMG

#if defined MPI
           end if
#endif

!O-End loop over k-points:
!-------------------------
   deallocate(trsl,ylm_k)
   k_index=k_index+npw_k*nband_k*nspinor
   bdtot_index=bdtot_index+nband_k
#if defined MPI
!BEGIN TF_CHANGES
           call MPI_BARRIER(spaceComm,ierr)
!END TF_CHANGES
#endif

   call timab(237,2,tsec)

! End k point loop
  end do
!End loop over spins
 end do

!MG maybe we can specify that max and min are calculated taking into account
!the spin but it is not so important
!ENDMG
 write(message,'(3a,f9.6,2a,f9.6,4a,f9.6,2a,f9.6,a)') &
& ' Test on the normalization of the wavefunctions',ch10,&
& '  min sum_G |a(n,k,G)| = ',einf,ch10,&
& '  max sum_G |a(n,k,G)| = ',esup,ch10,&
& ' Test on the orthogonalization of the wavefunctions',ch10,&
& '  min sum_G a(n,k,G)* a(n'',k,G) = ',cinf,ch10,&
& '  max sum_G a(n,k,G)* a(n'',k,G) = ',csup,ch10
 call wrtout(6,message,'COLL')
 call wrtout(ab_out,message,'COLL')

 deallocate(vlocal)
 deallocate(gbig)
 if ((.not.diago).and.(mkmem==0)) deallocate(eig_dum,kg_dum,occ_dum,cg_disk)
#if defined MPI
!BEGIN TF_CHANGES
           call leave_test(mpi_enreg)
!END TF_CHANGES
#endif

!P-Close files:
!--------------
#if defined MPI
!BEGIN TF_CHANGES
           call MPI_BARRIER(spaceComm,ierr)
!END TF_CHANGES
           if (mpi_enreg%me==0) then
#endif
 if (dtset%accesswff == 0) then
  close(unit=unitkss)
#if defined HAVE_ETSF_IO
 else if (dtset%accesswff == 3) then
  call dtsetfree(dtset_cpy)
  call etsf_io_low_close(ncid, lstat, error)
  if (.not. lstat) then
    ! We handle the error
    call etsf_io_low_error_to_str(errmess, error)
    write(message, "(A,A,A,A)") ch10, " outkss: ERROR -", ch10, &
                              & errmess(1:min(475, len(errmess)))
    call wrtout(std_out, message, 'COLL')
    call leave_new('COLL')
  end if
#endif
 end if
 write(message,'(2a)') ' Closing file ',ch10
 call wrtout(6,message,'COLL')
#if defined MPI
           end if
#endif

!End

!DEBUG
!write(6,*) 'outkss: exit'
!ENDDEBUG
end subroutine outkss
!!***
