c
c    Main spin-orbit DFT driver
c
      logical function dft_scf_so
     &                 (rtdb, Etold, Enuc, iVcoul_opt, iVxc_opt, 
     &                  iter, g_dens, g_dens_at, g_movecs, g_vxc, 
     &                  g_fock, g_svecs, svals, g_xcinv, g_s)
c     
c     $Id: dft_scf_so.F 21279 2011-10-24 03:13:09Z niri $
c     
      implicit none
#include "errquit.fh"
c     
      integer rtdb              ! [input]
      double precision Etold, Enuc, trace
      integer iVcoul_opt
      integer iVxc_opt
      integer iter, swap(20),nswap, ndet, idet
      integer g_dens(2), g_movecs(2), g_vxc(4), 
     &     g_fock, g_svecs,
     &     g_xcinv,  g_scr
      integer g_dens_at(2)
      double precision  svals(*)
c     so
      integer g_densso(2), g_tmp_ri,   
     &     g_moso(2), g_old(2),
     &     g_fockso(2), g_scr2, g_damp_so(2), g_gmovecs(2)
c
      integer la, ia            ! complex*16 a(nbf_ao, nbf_ao)
      integer lw, iw            ! double precision w(nbf_ao)
      integer llwork 
      integer lwork, iwork      ! complex*16 work(3) 
      integer lrwork, irwork    ! double precision rwork
      integer info 
      integer lbuff, ibuff
      integer nbf_mo 
c      logical numerical 
      integer g_s
c
c     declarations for fractional occupation 
c
      integer nmo_fon  ! number of fractionally occupied orbitals
      integer ncore_fon ! number of fully occupied orbitals
      double precision nel_fon  ! fractional electron number
      double precision avg_fon  ! fractional occupancy (averaged)
      integer nTotOcc           ! nTotOcc: no. of (occupied maybe f.o.) mo's 
      logical fon 
      integer ntmp_fon(2)
      double precision rtmp_fon(2), pstrace
      double precision scale, ncheck
      integer kfon_occ, lfon_occ
      logical debug_fon, det_eng 
      integer iswap,jswap
c
c     so
      double precision rho_n, toll_s
c     
c     == zora related ==
      double precision ener_scal, ener_kin
      double precision numelecs
      integer ncanorg
      logical ldmix
      character*7 vecs_or_dens
      integer icalczora
      logical do_purescalar
c
c     == vdw contrib ==
      double precision dum
      logical xc_chkdispauto
      external xc_chkdispauto
      logical disp
c
#include "bas.fh"
#include "geom.fh"
#include "mafdecls.fh"
#include "stdio.fh"
#include "rtdb.fh"
#include "cdft.fh"
#include "global.fh"
#include "msgids.fh"
#include "util.fh"
#include "zora.fh"  ! zora contribution
#include "case.fh"  ! coulomb attenuation
c     
      Logical movecs_write_so, movecs_converged
      External movecs_write_so, movecs_converged
c     
      Logical movecs_read_header_so, movecs_read_so
      External movecs_read_header_so, movecs_read_so 
c
      Logical spinor_guess
      External spinor_guess
      integer  ga_create_atom_blocked
      external ga_create_atom_blocked
c     
      logical oprint, status
      double precision Exc(2), rms(2), derr(2)
      integer nmo(2), icall(2)
c      integer nva
      integer n3c_dbl, n3c_int, n_batch
      integer iwhat_max
      integer l_3cwhat, k_3cwhat, l_3cERI, k_3cERI
      integer dft_n3cint, n_semi_bufs, fd
      external dft_n3cint
      double precision dft_n3cdbl
      external dft_n3cdbl
      Integer l_eval
      integer k_eval(2)
      integer natoms, nTotEl

      integer l_occ, k_occ
      integer i, j,  i1, jstart 
      integer me, nproc
      integer g_tmp, g_fockt, g_wght, g_xyz,g_nq
c     so
      integer g_so(3)
c     so
      integer nheap, nstack
      integer ispin, idone
      integer nexc
      integer iswitc
      integer itol_max, iaoacc_max
      integer itol_min, iAOacc_min
      double precision tol_rho_min, tol_rho_max
      integer npol
      integer leneval, lcd_coef, icd_coef
      integer lcntoce, icntoce, lcntobfr, icntobfr,
     &     lcetobfr, icetobfr, lrdens_atom, irdens_atom,
     &     nscr, lscr, iscr
      double precision start_wall, current_wall, elapsed_wall,
     &     save_wall, current_cpu, start_cpu,
     &     wall_time_reqd
      integer int_wall_time_reqd
      double precision ecoul, ecore, noso
      double precision pp, delta
      double precision anucl_charg, anel
      double precision anoca, anocb, onempp
      double precision etnew, tol2e, tol2e_sleazy
c     convergence declarations
      double precision rlshift_input, rlshift_def
      integer ndamp_input, ndamp_def
c     
c     Note, damping, levelshifting, and diising logicals
c     are used to turn on/off these procedures per
c     iteration.  The alternative logicals nodamping, 
c     nolevelshifting, and nodiis are specified and held
c     for the entire convergence sequence.
c     
      logical diising, damping, levelshifting
      logical keep_damp_on,keep_levl_on, keep_diis_on
      Logical  IOLGC, mulliken
      logical converged, wght_GA
c      logical oconverged 
      logical oprint_parm, oprint_conv, oprint_vecs, 
     &     oprint_eval, oprint_syma, oprint_time, 
     &     oprint_info, oprint_tol, oprint_final_vecs, 
     &     oprint_energy_step, oprint_intermediate_fock,
     &     oprint_3c2e, oprint_interm_overlap, oprint_interm_S2,
     &     oprint_conv_details
      double precision zero, onem, one, mone
      parameter(zero = 0.d0, one = 1.d0, mone=-1.0d0, onem = -one)
c     
      integer ilo, ihi          ! For printing movecs analysis
      double precision eval_pr_tol_lo, eval_pr_tol_hi
      parameter (eval_pr_tol_lo = -1.5d0, eval_pr_tol_hi=0.5)
C     
c     
c     early convergence tolerances
c     
      parameter(itol_min = 7, iAOacc_min = 12, tol_rho_min = 1.d-7)
c     
      double precision dft_dencvg, dft_time
      external dft_dencvg
      double precision homo, lumo, homo_lumo_gap
      integer l_ir, k_ir
      logical last_time_energy
      logical check_shift, lmaxov_sv
      character*7 name
      character*4 scftype
      character*255 basis_name, basis_trans
      integer nopen, nclosed
c     !!! BGJ
      logical cphf_poliz, do_poliz
      external cphf_poliz
c     !!! BGJ
      character*255 title1       ! Returns title of job that created vectors
      character*255 basis_name1  ! Returns name of basis set
      character*255 scftype1     ! Returns the SCF type of the vectors
      integer nbf1               ! Returns no. of functions in basis
      integer g_oep
c     integer ijk
c     
c     == zora related ==
      logical dft_zora_read_so, dft_zora_write_so
      external dft_zora_read_so, dft_zora_write_so

      logical dft_zora_inquire_file_so
      external dft_zora_inquire_file_so

      character*255 zorafilename
      integer g_zora_sf(2)
      integer g_zora_scale_sf(2)
      integer g_zora_so(3)
      integer g_zora_scale_so(3)
      double precision Ezora_sf
      integer switch_sclMO_so ! switch =1,0 ON,OFF sclMO-FA-02-18-11
c
      logical spinor
      logical dft_mem3c
      external dft_mem3c
      external dft_scaleMO_so ! FA-occupations from input script 02-15-11
c
      icd_coef = 0
      k_3cERI  = 0
      k_3cwhat = 0
      me = ga_nodeid()
      nproc = ga_nnodes()
c
c     to do scalar relativistic calculations within the two-component framework
c
      do_purescalar = .false.
      if (.not.rtdb_get(rtdb,'sodft:scalar',mt_log,1,do_purescalar))
     &     do_purescalar = .false.
c
      if (me.eq.0.and.do_purescalar) then
         call util_print_centered(LuOut,
     $        'Neglecting spin-orbit terms', 23, .true.)
         write(LuOut,*)
       endif ! me
c
      call ecce_print_module_entry('dft')
      dft_scf_so = .false.
      nbf_mo = 2*nbf_ao
      lmaxov_sv = lmaxov
      oprint = util_print('information', print_low)
      oprint_info = util_print('common', print_debug)
      oprint_parm = util_print('parameters', print_default)
      oprint_3c2e = util_print('3c 2e integrals', print_default)
      oprint_conv = util_print('convergence', print_default)
      oprint_conv_details = util_print('convergence details', 
     &     print_high)
      oprint_vecs = util_print('intermediate vectors', print_high)
      oprint_eval = util_print('intermediate evals', print_high)
      oprint_syma = util_print('interm vector symm', print_high)
      oprint_time = util_print('dft timings', print_high)
      oprint_tol = util_print('screening parameters', print_high)
      oprint_energy_step = util_print('intermediate energy info',
     &     print_high)
      oprint_intermediate_fock = util_print('intermediate fock matrix',
     &     print_high)
      oprint_interm_S2 = util_print('intermediate S2',print_high)
      oprint_interm_overlap = util_print('intermediate overlap',
     &     print_high)
      oprint_final_vecs = util_print('final vectors', print_high)
c
      ispin=1
      call int_1e_uncache_ga()      
c     !!! BGJ
c     Store SCF hamiltonian type as DFT for use in BGJ routines
      if (.not. rtdb_put(rtdb, 'bgj:scf_type', MT_INT, 1, 2))
     $     call errquit('dft_scf_so: put of bgj:scf_type failed',0,
     &       RTDB_ERR)
c     !!! BGJ
c     
c     see if levelshifting monitoring is desired
c     
      if (.not. rtdb_get(rtdb, 'dft:check_shift', mt_log, 1,
     &     check_shift))then
         check_shift = .false.      
      endif
c     
      if (.not. geom_ncent(geom, natoms))
     &     call errquit('dft_scf_so: geom_ncent failed',73, GEOM_ERR)
      if (.not. geom_nuc_charge(geom, anucl_charg))
     &     call errquit('dft_scf_so: geom_nuc_charge failed', 
     & 0, GEOM_ERR)
c     
      anel = int(anucl_charg) - rcharge
c     
c     Pre-compute mapping vectors
c     
      if (.not.ma_push_get
     &     (mt_int,nshells_ao,'cntoce map',lcntoce,icntoce))
     &     call errquit('dft_scf_so:push_get failed', 13, MA_ERR)
      if (.not.ma_push_get
     &     (mt_int,nshells_ao*2,'cntoce map',lcntobfr,icntobfr))
     &     call errquit('dft_scf_so:push_get failed', 13, MA_ERR)
      if (.not.ma_push_get
     &     (mt_int,natoms*2,'cntoce map',lcetobfr,icetobfr))
     &     call errquit('dft_scf_so:push_get failed', 13, MA_ERR)
c     
      call build_maps(ao_bas_han, int_mb(icntoce), int_mb(icntobfr), 
     &     int_mb(icetobfr), natoms, nshells_ao)
c     
c     Set aside some memory for reduced density matrix
c     
      if (.not.MA_Push_Get(MT_Dbl,2*natoms*natoms,'rdens_atom',
     &     lrdens_atom,irdens_atom))
     &     call errquit('dft_scf_so: cannot allocate rdens_atom',
     & 0, MA_ERR)
c     
c     determine pattern of orbitals' occupancy
c     
      if (.not. MA_Push_Get(MT_Dbl,nbf_ao*2,'mo occ',l_occ,k_occ))
     &     call errquit('dft_scf_so: failed to alloc',999, MA_ERR)
c     
c     get orbital overlap tolerance
c     
      if (.not. rtdb_get(rtdb, 'dft:toll_s', MT_DBL, 1, toll_s))
     .     call errquit('dft_scf_so: lost toll_s ',0, RTDB_ERR)
c     
      nTotEl = noc(1) + noc(2)
      nmo(1) = nbf_ao
      nmo(2) = nbf_ao
c     
      anoca = noc(1)
      anocb = noc(2)
c     
c     UHF occupations
c     
      call dfill(nbf_mo, 0.0d0, dbl_mb(k_occ), 1)
      do i = 1, noc(1)
         dbl_mb(i-1+k_occ) = 1.0d0
      enddo
      do i = nbf_ao+1, nbf_ao+noc(2)
         dbl_mb(i-1+k_occ) = 1.0d0
      enddo
c     
      wght_GA = .false.
c     
c     Determine whether to fit the electronic charge density.
c     
      CDFIT = .FALSE.
      if (iVcoul_opt.eq.1)CDFIT = .TRUE.
      XCFIT = .FALSE.
      if (iVxc_opt.eq.1)XCFIT = .TRUE.
c
c     Define various constants.
c     
      npol = (ipol*(ipol+1))/2
c     
      itol_max = itol2e
      iaoacc_max = iaoacc
      tol_rho_max = tol_rho
      if (oprint_time)
     &     call dft_tstamp(' Before 3c-2e initialize.')
c     
      if (CDFIT)then
         if(dft_mem3c(
     I     natoms,npol,oprint_parm,oprint_3c2e,
     O     n3c_int,n3c_dbl,n_semi_bufs,
     O     l_3ceri,k_3ceri, l_3cwhat,k_3cwhat)) then
            call dft_3cincor(n_batch, n3c_int, int_mb(k_3cwhat), 
     &                       dbl_mb(k_3cERI), n3c_dbl, iwhat_max, 
     &                       n_semi_bufs, fd)
            incore=.true.
         else
            if (me.eq.0 .and. oprint_3c2e)write(LuOut,3230)
            incore=.false.
         endif
      endif
 3230 format(/,10x,'Incore memory use for 3-center 2e- integrals is ',
     &     'turned off. ')
      mulliken = .false.
      if (imull.eq.1)mulliken = .true.
      IOLGC = .TRUE.
      if (noio.eq.1)IOLGC = .FALSE.
c     
c     Energy decomposition switch
c     
      nExc    = idecomp + 1
      Etnew = 0.d0
c     
c     SCF energy convergence criterion. 
c     
      g_tmp = ga_create_atom_blocked(geom, AO_bas_han, 'ga_temp')
      g_fockt = ga_create_atom_blocked(geom, AO_bas_han, 'fock tr')
c     
c     Set up local convergence parameters
c     
      diising = diis
      damping = damp
      levelshifting = levelshift
      keep_damp_on = .false.
      keep_levl_on = .false.
      keep_diis_on = .false.
      ndamp_input = ndamp
      rlshift_input = rlshift
      ndamp_def = 0
      rlshift_def = 0.0
      rlshift = rlshift_def
c     
      if (nodamping)damping = .false.
      if (nolevelshifting) then 
         levelshifting = .false.
         rlshift = rlshift_def
      endif
      if (nodiis) then
         diising = .false.
      endif
      if (ncydp.ne.0)then
         damping = .true. 
         ndamp = ndamp_input
      endif
      if (ncysh.ne.0)then
         levelshifting = .true.
         rlshift = rlshift_input
      endif
      if (ncyds.ne.0)then
         diising = .true.
      endif
c     
c     Initialize DIIS call counter.
c     
      icall(1) = 0
      icall(2) = 0
c     
c     Begin the SCF cycle.
c     
c     allocate eigenvalue array
c     
      leneval = 4*nbf_ao 
      if (.not.MA_Push_Get(MT_Dbl,leneval,'eval',l_eval,k_eval(1)))
     &     call errquit('dft_scf_so: cannot allocate eval',0, MA_ERR)
      k_eval(2) = k_eval(1) + nbf_mo
c     
c     Dump DFT parameters (if debugging) to see if they make sense
c     
      if (me.eq.0.and.oprint_info)call dft_dump_info(me)
c     
c     Get initial density.
c     
      if (oprint_time)
     &     call dft_tstamp(' Before call to DFT_INIT.')
      scftype = 'UHF'
c     
c     allocate array for irreps
c     
      if (.not.MA_Push_Get(mt_int,2*nbf_ao,'dft:irreps',l_ir,k_ir))
     &     call errquit('dft_scf_so: cannot allocate irreps',0, MA_ERR)
      nopen = mult - 1
      nclosed = (nTotEl - nopen) / 2
c     
      if (.not. bas_name(ao_bas_han, basis_name, basis_trans))
     $     call errquit('dft_scf_so: bas_name?', 0, BASIS_ERR)
c     
c     get info for int2e_ and set sleazy tolerance
c     
      tol2e_sleazy = 1.d-3
      call scf_get_fock_param(rtdb, tol2e_sleazy)
c     
c     Force sleazy SCF into "direct" mode.
c     
      call fock_force_direct(rtdb)
cso
cso   allocate Fock matrix and movecs 
cso
c
c     real molecular orbital vectors
      if(.not.ga_create(mt_dbl,nbf_mo,nbf_mo,'Movecs Re',0,0, 
     &     g_moso(1)))     
     &     call errquit('dft_scf_so: error creating Movecs Re',0,
     &       GA_ERR)
      if(.not.ga_create(mt_dbl,nbf_ao,nbf_ao,'Movecs Re',0,0, 
     &     g_gmovecs(1)))     
     &     call errquit('dft_scf_so: error creating Movecs Re',0,
     &       GA_ERR)
c
c     imaginary molecular orbital vectors
      if(.not.ga_create(mt_dbl,nbf_mo, nbf_mo,'Movecs Im',0,0, 
     &     g_moso(2)))
     &     call errquit('dft_scf_so: error creating Movecs Im',0,
     &       GA_ERR)
      if(.not.ga_create(mt_dbl,nbf_ao, nbf_ao,'Movecs Im',0,0, 
     &     g_gmovecs(2)))
     &     call errquit('dft_scf_so: error creating Movecs Im',0,
     &       GA_ERR)
c
      call ga_zero(g_moso(1))
      call ga_zero(g_moso(2))
      call ga_zero(g_gmovecs(1))
      call ga_zero(g_gmovecs(2))
c
      call ga_sync() 
c
c     real part of the fock matrix
      if(.not.ga_create(mt_dbl,nbf_mo,nbf_mo,'Fock Re',0,0, 
     &     g_fockso(1)))
     &     call errquit('dft_scf_so: error creating Fock Re',0, GA_ERR)
      call ga_zero(g_fockso(1))
c
c     imaginary part of the fock matrix
      if(.not.ga_create(mt_dbl,nbf_mo,nbf_mo,'Fock Im',0,0, 
     &     g_fockso(2)))
     &     call errquit('dft_scf_so: error creating Fock Im',0, GA_ERR)
      call ga_zero(g_fockso(2))
c
c     extra arrays
      if(.not.ga_create(mt_dbl,nbf_mo,nbf_mo,'old re',0,0, 
     &     g_old(1)))
     &     call errquit('dft_scf_so: error creating Old Re',0, GA_ERR)
      call ga_zero(g_old(1))
      if(.not.ga_create(mt_dbl,nbf_mo,nbf_mo,'old Im',0,0, 
     &     g_old(2)))
     &     call errquit('dft_scf_so: error creating Old Im',0, GA_ERR)
      call ga_zero(g_old(2))
c
c     real part of the spin-orbit density matrix
      if(.not.ga_create(mt_dbl,nbf_mo,nbf_mo,'DenMx Re',0,0, 
     &     g_densso(1)))
     &     call errquit('dft_scf_so: error creating DenMx Re',0, GA_ERR)
      call ga_zero(g_densso(1))
c
c     imaginary part of the spin-orbit density matrix
      if(.not.ga_create(mt_dbl,nbf_mo,nbf_mo,'DenMx Im',0,0, 
     &     g_densso(2)))
     &     call errquit('dft_scf_so: error creating DenMx Im',0, GA_ERR)
      call ga_zero(g_densso(2))
c
c     extra arrays
      if(.not.ga_create(mt_dbl,nbf_mo,nbf_mo,'Tmp ReIm',0,0, 
     &     g_tmp_ri))
     &     call errquit('dft_scf_so: error creating Tmp ReIm',0, GA_ERR)
      call ga_zero(g_tmp_ri)
c
c     spin-orbit matrices: 1->z, 2->y, 3->x
      if(.not.ga_create(mt_dbl,nbf_ao,nbf_ao,'so z',0,0, 
     &     g_so(1)))
     &     call errquit('dft_scf_so: error creating so z',0, GA_ERR)
      call ga_zero(g_so(1))
      if(.not.ga_create(mt_dbl,nbf_ao,nbf_ao,'so y',0,0, 
     &     g_so(2)))
     &     call errquit('dft_scf_so: error creating so y',0, GA_ERR)
      call ga_zero(g_so(2))
      if(.not.ga_create(mt_dbl,nbf_ao,nbf_ao,'so x',0,0, 
     &     g_so(3)))
     &     call errquit('dft_scf_so: error creating so x',0, GA_ERR)
      call ga_zero(g_so(3))
c
c     extra arrays
      if(.not.ga_create(mt_dbl, 2*nbf, 2*nbf,'old den', 0, 0, 
     &     g_damp_so(1)))
     &     call errquit('dft_scf_so: error creating damp ga', 0, GA_ERR)
      call ga_zero(g_damp_so(1))
      if(.not.ga_create(mt_dbl, 2*nbf, 2*nbf,'old den', 0, 0, 
     &     g_damp_so(2)))
     &     call errquit('dft_scf_so: error creating damp ga', 0, GA_ERR)
      call ga_zero(g_damp_so(2))
c
c     == zora arrays ==
      if (do_zora) then
       if (me.eq.0) then
         call util_print_centered(LuOut,
     $        'Performing ZORA calculations', 23, .true.)
         write(LuOut,*)
       endif ! me
c
c      == get filename for zora data ==
       call util_file_name('zora_so',.false.,.false.,zorafilename)
c
c      == zora: scalar arrays ==
       if(.not.ga_create(mt_dbl,nbf_ao,nbf_ao,'g_zora_sf',0,0,
     &    g_zora_sf(1)))
     &    call errquit('dft_scf_so: error creating g_zora_sf',0, 
     &       GA_ERR)
       call ga_zero(g_zora_sf(1))
       if(.not.ga_create(mt_dbl,nbf_ao,nbf_ao,'g_zora_sf',0,0,
     &    g_zora_sf(2)))
     &    call errquit('dft_scf_so: error creating g_zora_sf',0, 
     &       GA_ERR)
       call ga_zero(g_zora_sf(2))
c
c      == zora: scalar energy scaling arrays ==
       if(.not.ga_create(mt_dbl,nbf_ao,nbf_ao,'g_zora_scale_sf',0,0,
     &    g_zora_scale_sf(1)))
     & call errquit('dft_scf_so: error creating g_zora_scale_sf',0, 
     &    GA_ERR)
       call ga_zero(g_zora_scale_sf(1))
       if(.not.ga_create(mt_dbl,nbf_ao,nbf_ao,'g_zora_scale_sf',0,0,
     &    g_zora_scale_sf(2)))
     & call errquit('dft_scf_so: error creating g_zora_scale_sf',0, 
     &    GA_ERR)
       call ga_zero(g_zora_scale_sf(2))
c
c      == zora: spin-orbit arrays ==
       if(.not.ga_create(mt_dbl,nbf_ao,nbf_ao,'g_zora_so 1',0,0,
     &    g_zora_so(1)))
     & call errquit('dft_scf_so: error creating g_zora_so 1',0, GA_ERR)
       call ga_zero(g_zora_so(1))
       if(.not.ga_create(mt_dbl,nbf_ao,nbf_ao,'g_zora_so 2',0,0,
     & g_zora_so(2)))
     & call errquit('dft_scf_so: error creating g_zora_so 2',0, GA_ERR)
       call ga_zero(g_zora_so(2))
       if(.not.ga_create(mt_dbl,nbf_ao,nbf_ao,'g_zora_so 3',0,0,
     & g_zora_so(3)))
     & call errquit('dft_scf_so: error creating g_zora_so 3',0, GA_ERR)
       call ga_zero(g_zora_so(3))
c
c      == zora: spin-orbit energy scaling arrays ==
       if(.not.ga_create(mt_dbl,nbf_ao,nbf_ao,'g_zora_scale_so 1',0,0,
     & g_zora_scale_so(1)))
     & call errquit('dft_scf_so: error creating g_zora_scale_so 1',0, 
     &    GA_ERR)
       call ga_zero(g_zora_scale_so(1))
       if(.not.ga_create(mt_dbl,nbf_ao,nbf_ao,'g_zora_scale_so 2',0,0,
     & g_zora_scale_so(2)))
     & call errquit('dft_scf_so: error creating g_zora_scale_so 2',0, 
     &    GA_ERR)
       call ga_zero(g_zora_scale_so(2))
       if(.not.ga_create(mt_dbl,nbf_ao,nbf_ao,'g_zora_scale_so 3',0,0,
     &   g_zora_scale_so(3)))
     &   call errquit('dft_scf_so: error creating g_zora_scale_so',0, 
     &    GA_ERR)
       call ga_zero(g_zora_scale_so(3))
c
c      == generate an superposition of atomic densities ==
       call ga_zero(g_dens_at(1))
       if (ipol.gt.1) call ga_zero(g_dens_at(2))
       call guess_dens(rtdb, geom, ao_bas_han, g_dens_at)
       if (oskel) call ga_symmetrize(g_dens_at(1))
       if(ipol.gt.1) then
            call ga_copy(g_dens_at(1),g_dens_at(2))
            call ga_dscal(g_dens_at(1),dble(ntotel-nclosed)/(ntotel))
            call ga_dscal(g_dens_at(2),dble(nclosed)/(ntotel))
            if(oskel) call ga_symmetrize(g_dens_at(2))
       end if
c
c       == in case fon is used together with zora ==
c       == pstrace is queried in the grid code ==
c
       fon = .false.
       if (rtdb_get(rtdb,'dft:fon',mt_log,1,fon)) then 
         pstrace=ga_ddot(g_dens_at(1),g_s)
         pstrace=pstrace + ga_ddot(g_dens_at(2),g_s)
         if(ga_nodeid().eq.0) write (luout,'(5x,a,1x,e15.7)')
     &      'tr(P*S): ',pstrace
         if (.not. rtdb_put(rtdb, 'dft:pstrace', mt_dbl, 1, pstrace))
     &     call errquit('dft_scf: rtdb_put pstrace failed', 1, RTDB_ERR)
       end if ! fon check
c
c      == try reading the zora contributions from file ==
       icalczora = 0
       if (.not.dft_zora_read_so(zorafilename, nbf_ao, ipol, nmo, mult,
     &     g_zora_sf, g_zora_scale_sf, g_zora_so, g_zora_scale_so)) 
     &     icalczora = 1
c 
       if (icalczora.eq.1) then
        if (me.eq.0) then
         call util_print_centered(LuOut,
     $        'Generating atomic ZORA corrections', 23, .true.)
         write(LuOut,*)
        end if ! me = 0
c
c       == calculate the zora atomic corrections ==
        call zora_getv_so(rtdb, g_dens_at, g_zora_sf, g_zora_scale_sf,
     &                      g_zora_so, g_zora_scale_so, nexc)
c
c       == write out the atomic zora corrections to file ==
        if (.not.dft_zora_write_so(rtdb, ao_bas_han, zorafilename,
     &    nbf_ao, ipol, nmo, mult, g_zora_sf, g_zora_scale_sf, 
     &    g_zora_so, g_zora_scale_so))
     &    call errquit('dft_scf_so: dft_zora_write_so failed', 0, 
     &            DISK_ERR)
c
       end if ! icalczora
c
c       == for scalar calculations via the 2 component formalism
        if (do_purescalar) then 
            call ga_zero(g_zora_so(1))
            call ga_zero(g_zora_so(2))
            call ga_zero(g_zora_so(3))
            call ga_zero(g_zora_scale_so(1))
            call ga_zero(g_zora_scale_so(2))
            call ga_zero(g_zora_scale_so(3))
        end if
c
      end if  ! do_zora
c
c     check for fon input
c
      fon = .false.
      if (rtdb_get(rtdb,'dft:fon',mt_log,1,fon)) then 
c
c       variable 'fon' should be true, otherwise there's 
c       something fishy going on:
        if (.not.fon) call errquit(
     &     'dft_scf_so: fon stored in RTDB but not .true.', 1,
     &       RTDB_ERR)

c       note: *_fon variables are read here not as arrays with
c       two elements, assuming that the input didn't specify
c       spects for alpha and beta separately

        if (.not.rtdb_get(rtdb,'dft:nmo_fon',mt_int,2,ntmp_fon)) then
          if (me.eq.0) then
            write(LuOut,*)"Error: fractional occupation 
     &         calculation specified without setting number of orbitals"
          end if
          call errquit('dft_scf_so: nmo_fon rtdb_get failed', 1,
     &       RTDB_ERR)
        else
          nmo_fon = ntmp_fon(1)
        end if
        if (.not.rtdb_get(rtdb,'dft:nel_fon',mt_dbl,2,rtmp_fon)) then
          if (me.eq.0) then
            write(LuOut,*)"Error: fractional occupation 
     &         specified without setting number of electrons"  
          end if
          call errquit('dft_scf_so: nel_fon rtdb_get failed', 1,
     &       RTDB_ERR)
        else
          nel_fon = rtmp_fon(1)
        end if
        if (.not.rtdb_get(rtdb,'dft:ncore_fon',mt_int,2,ntmp_fon)) then
          if (me.eq.0) then
            write(LuOut,*)"Error: fractional occupation 
     &         calculation specified without setting filled levels"  
          end if
          call errquit('dft_scf_so:  nel_core rtdb_get failed', 1,
     &       RTDB_ERR)
        else
          ncore_fon = ntmp_fon(1)
        end if
        if (rtdb_get(rtdb, 'dft:debugfon', mt_log, 1,
     &     debug_fon)) continue

      else ! keyword not in RTDB
        fon = .false.
        debug_fon = .false.
      end if ! fon
c
      call ga_sync()
      call ga_zero(g_densso(1))
      call ga_zero(g_densso(2))
      spinor = .false. 
      spinor=spinor_guess(movecs_in)
c
      if(.not.spinor)then 
        
c       fractional occupations:
        if (fon) then
c ... jochen: presumably good enough for initial guess. We won't tinker
c         with that. later, the fractional occupations
c         are caluclated explicitly
          anoca = noc(1)
          anocb = noc(2)
          noc(1)=(noc(1)+noc(2)-nel_fon)/2
          noc(2)=(noc(1)+noc(2)-nel_fon)/2
        endif                   ! fon
c
         call dft_guessin(movecs_in,ldmix,ncanorg,fon,
     &     vecs_or_dens, ipol,nbf_ao,g_movecs,g_gmovecs,
     &     toll_s,svals)
c
         call scf_vectors_guess(rtdb, tol2e_sleazy, geom, ao_bas_han, 
     &        basis_trans, movecs_in, movecs_out, 
     &        movecs_guess, scftype, nclosed, nopen, 
     &        nbf, nmo, noc(1), noc(2),  k_eval, k_occ, 
     &        k_ir, g_gmovecs, g_dens, 'density', 
     &        'dft', title, oskel, oadapt, 
     &        .true.) 
c
         call dft_guessout(nmo,nbf_ao,g_gmovecs,g_movecs,ipol)
c
c        fon: undo temp setting of noc(:)
         if (fon)then 
           noc(1)=anoca
           noc(2)=anocb
         endif
c     
c     spinor occupancies
c     
         call dfill(nbf_mo, 0.0d0, dbl_mb(k_occ), 1)
         do i = 1, nTotEl
            dbl_mb(i-1+k_occ) = 1.0d0
         enddo
c     
c     map initial guess movecs from spin-free calculations g_moso(1) 
c     noc(1).ge.noc(2) is assumed
c         
         do i=1,min(noc(1),noc(2))
            call ga_dadd_patch(1.d0,g_movecs(1),1,nbf_ao,i,i, 
     $           0.d0,g_moso(1),1,nbf_ao,2*(i-1)+1,2*(i-1)+1,
     $           g_moso(1),1,nbf_ao,2*(i-1)+1,2*(i-1)+1) 
            call ga_dadd_patch(1.d0,g_movecs(2),1,nbf_ao,i,i, 
     $           0.d0,g_moso(1),1+nbf_ao,nbf_mo,2*(i-1)+2,2*(i-1)+2,
     $           g_moso(1),1+nbf_ao,nbf_mo,2*(i-1)+2,2*(i-1)+2)
         enddo
         do i=noc(2)+1,noc(1)
            call ga_dadd_patch(1.d0,g_movecs(1),1,nbf_ao,i,i, 
     $           0.d0,g_moso(1),1,nbf_ao,noc(2)+i,noc(2)+i,
     $           g_moso(1),1,nbf_ao,noc(2)+i,noc(2)+i) 
         enddo
         do i=noc(2)+1,noc(1)
            call ga_dadd_patch(1.d0,g_movecs(2),1,nbf_ao,i,i, 
     $           0.d0,g_moso(1),1+nbf_ao,nbf_mo,noc(1)+i,noc(1)+i,
     $           g_moso(1),1+nbf_ao,nbf_mo,noc(1)+i,noc(1)+i) 
         enddo
         do i=noc(1)+1,nbf_ao
            call ga_dadd_patch(1.d0,g_movecs(1),1,nbf_ao,i,i, 
     $           0.d0,g_moso(1),1,nbf_ao,2*(i-1)+1,2*(i-1)+1,
     $           g_moso(1),1,nbf_ao,2*(i-1)+1,2*(i-1)+1) 
            call ga_dadd_patch(1.d0,g_movecs(2),1,nbf_ao,i,i, 
     $           0.d0,g_moso(1),1+nbf_ao,nbf_mo,2*(i-1)+2,2*(i-1)+2,
     $           g_moso(1),1+nbf_ao,nbf_mo,2*(i-1)+2,2*(i-1)+2)
         enddo
      endif  !if not spinor

      if(spinor)then 
c     
c     read spinors from files 
c     
c     get MO vectors from file
c     
c         if (.not. rtdb_cget(rtdb, 'dft:input vectors', 1, movecs_in))
c     $        call errquit('dft_scf_so: DFT MO vectors not defined',0)
         status = movecs_read_header_so(movecs_in, title1, basis_name1,
     $        scftype1, nbf1)
c     
c     Should check much more info than just nbf for consistency
c     
c     
c     get mo eigevectors
c     
         if (2*nbf_ao .ne. nbf1)then
            write(6,*)'dft_scf_so movecs output = ',movecs_in
            call errquit('dft_scf_so: could not read mo vectors',911,
     &       DISK_ERR)
         else 
            status = .true.
            status = status .and.
     $           movecs_read_so(movecs_in, dbl_mb(k_occ),
     $           dbl_mb(k_eval(1)), g_moso)
         endif
c     
         if (.not.status)then
            write(6,*)'dft_scf_so movecs output = ',movecs_in
            call errquit('dft_scf_so: could not read mo vectors',917,
     &       DISK_ERR)
         endif
c     
         call movecs_swap_so(rtdb,'dft',scftype,g_moso,
     &        dbl_mb(k_occ),dbl_mb(k_eval(1)))
      endif  !spinor
c     
c     Form Re and Im of density matrix
c     
c ... jochen 10/11: implemented new way of applying 
c     fractional occupations. See dft_densm.F of the scalar 
c     branch of the code

c$$$      if (.not.rtdb_get(rtdb,'sodft:fon',mt_log,1,fon))
c$$$     &    fon = .false.
c$$$      if (.not.rtdb_get(rtdb,'sodft:nmo_fon',mt_int,1,nmo_fon))
c$$$     &    nmo_fon = 0
c$$$      if (.not.rtdb_get(rtdb,'sodft:nel_fon',mt_int,1,nel_fon))
c$$$     &    nel_fon = 0
c$$$      nTotOcc = (nTotEl-nel_fon) + nmo_fon 
c
      nTotOcc = nTotEl  ! if there is no fractional occupation 
      if(fon)then 
c$$$         avg_fon = dble(nel_fon)/dble(nmo_fon) 
c$$$         do i = (nTotEl-nel_fon)+1, nTotOcc  
c$$$            dbl_mb(i-1+k_occ) = avg_fon 
c$$$         enddo

        if (nmo_fon.lt.1) call errquit(
     &     'dft_scf_so:fon nmo_fon <1',
     &     1, INPUT_ERR)
        if (nel_fon.lt.0d0) call errquit(
     &     'dft_scf_so:fon nel_fon <0',
     &     1, INPUT_ERR)

        avg_fon = nel_fon/dble(nmo_fon)
        nTotOcc = ncore_fon + nmo_fon 

c       debug code: 
c$$$        write (luout,*) 'DEBUG: occupations before applying FON'
c$$$        do i = 1,nbf_ao
c$$$          write (luout,*) i, dbl_mb(i-1+k_occ), dbl_mb(i-1+nbf_ao+k_occ)
c$$$        end do

        ncheck = 0d0
        do i = 1, ncore_fon
          if (i> 2*nbf_ao) call errquit(
     &       'dft_densm:fon focc index exceeds 2nbf error 1',
     &       i, INPUT_ERR)
          dbl_mb(i-1+k_occ) = 1d0
          ncheck = ncheck + 1d0
        end do
        do i = ncore_fon + 1, ncore_fon + nmo_fon
          if (i> 2*nbf_ao) call errquit(
     &       'dft_densm:fon focc index exceeds 2nbf error 2',
     &       i, INPUT_ERR)
          dbl_mb(i-1+k_occ) = avg_fon
          ncheck = ncheck + avg_fon
        end do

        if(abs(ncheck-dble(nTotEl)).gt.1d-3 .and. me.eq.0) then
          write(luout,*) ' frac. electrons ',ncheck,' vs ',nTotEl
        end if

c$$$        write (luout,*) 'DEBUG: occupations AFTER applying FON'
c$$$        do i = 1,nbf_ao
c$$$          write (luout,*) i, dbl_mb(i-1+k_occ), dbl_mb(i-1+nbf_ao+k_occ)
c$$$        end do
      endif ! fon
c
      switch_sclMO_so=0 ! FA-09-26-11
c     
c     the fractionally occupied mo's are scaled by the sqrt of the fon
c     
      if (fon) then 
         do i = ncore_fon + 1, ncore_fon + nmo_fon
           if (i> nbf_mo) call errquit(
     &        'dft_densm:fon g_moso index exceeds nbf_mo',
     &        i, INPUT_ERR)
           scale = sqrt(dbl_mb(i-1+k_occ)) ! sqrt(occupation number)
           call ga_scale_patch(g_moso(1), 
     &        1, nbf_mo, i, i, scale)
           call ga_scale_patch(g_moso(2), 
     &        1, nbf_mo, i, i, scale)
         end do
         if(me.eq.0) write(luout,'(5x,a)')  'FON applied'
       end if ! fon
c      else
c ---- FA-02-18-11 : occupations keyword ---- START
c       call dft_scaleMO_so(rtdb,g_moso,dbl_mb(k_occ),g_densso,
c     &                     nbf_mo,nTotOcc,switch_sclMO_so)
c      endif
c
c     calculate the spin-orbit density matrix using the scaled mo's
c 
c       if (switch_sclMO_so.ne.1) then
c         if (ga_nodeid().eq.0)
c     &   write(*,*) 'ENTER calc so-density matrix std-way'
         call dft_densm_so(g_densso, g_moso, nbf_ao, nTotOcc)
c       endif
c
c     restore the scaled mo's
c
      if (fon) then 
         do i = ncore_fon + 1, ncore_fon + nmo_fon
           if (i> nbf_mo) call errquit(
     &        'dft_densm:fon g_moso index exceeds nbf_mo',
     &        i, INPUT_ERR)
           if (dbl_mb(i-1+k_occ) < 1d-4) call errquit(
     &        'dft_densm:fon frac occup < 1E-4. Aborting suspisciously',
     &        i, INPUT_ERR)           
           scale = 1d0/sqrt(dbl_mb(i-1+k_occ)) ! 1/sqrt(occupation number)
           call ga_scale_patch(g_moso(1), 
     &        1, nbf_mo, i, i, scale)
           call ga_scale_patch(g_moso(2), 
     &        1, nbf_mo, i, i, scale)
         end do
       end if ! fon
c      
c     calculate the spin-free density matrix from the spin-orbit density matrix
c
      call ga_zero(g_dens(1))
      call ga_zero(g_dens(2))
      call ga_dens_sf(g_dens, g_densso(1), nbf_ao)

c     check fon, calculate tr[P S] and print
c      if (debug_fon) call dft_pstrace(g_dens,ao_bas_han,nbf_ao,oskel)
      if (fon) then
        pstrace=ga_ddot(g_dens(1),g_s)
        pstrace=pstrace + ga_ddot(g_dens(2),g_s)
        if(ga_nodeid().eq.0) write (luout,'(5x,a,1x,e15.7)')
     &     'tr(P*S): ',pstrace
        if (.not. rtdb_put(rtdb, 'dft:pstrace', mt_dbl, 1, pstrace))
     &     call errquit('dft_scf: rtdb_put pstrace failed', 1, RTDB_ERR)
      end if
c
c     Tidy up SCF
c     
      call fock_2e_tidy(rtdb)
c     
c     set initial coulomb acc
c     
c     write(6,*)' movecs_guess = ',movecs_guess
      if (movecs_guess.eq.'restart')ltight=.true.
c     
c     May not want levelshifting initially until sure that the
c     transformed Fock matrix will be diagonally dominant, or
c     alternatively shift the piss out of it.
c     
      if (movecs_guess.eq.'restart')then
         levelshifting = .true.
      else
         levelshifting = .false.

c     rlshift = 2.0
      endif
      iswitc = 0
      if (ltight)then
         itol2e = itol_max
         iAOacc = iAOacc_max
         tol_rho = tol_rho_max
         iswitc = 1
      else
         itol2e = min(itol_min,itol_max)
         iAOacc = min(iAOacc_min,iAOacc_max)
         tol_rho = max(tol_rho_min,tol_rho_max)
      endif
c     
      tol2e = 10.d0**(-itol_max)
c     
c     Restore SCF parameters
c     
      call scf_get_fock_param(rtdb, tol2e)
c     
c     If open shell put the total density matrix in g_dens(1)
c     
      call ga_dadd(one,g_dens(1),one,g_dens(2),g_dens(1))
c
c     
c     Call to Mulliken Pop Analysis for initial density
c     
      if (mulliken)then
         if (me.eq.0)call dft_header
     &        (' Total Density - Mulliken Population Analysis')
         call mull_pop(geom,ao_bas_han,g_dens(1),g_s,'total')
c     
c     analysis of spin density
c     
         if (me.eq.0) call dft_header
     &        (' Spin Density - Mulliken Population Analysis')
         call ga_dadd(one,g_dens(1),-2.d0,g_dens(2),g_dens(2))
         call mull_pop(geom,ao_bas_han,g_dens(2),g_s,'spin') 
c     
c     restore beta density in g_dens(2)
c     
         call ga_dadd(one,g_dens(1),-1.d0,g_dens(2),g_dens(2))
         call ga_dscal(g_dens(2),0.5d0)
      endif
 
      iter = 1 
 
cso   ma for complex diagonalizer 
      if (.not.MA_Push_Get(MT_DCpl,nbf_mo*nbf_mo,'cpl a',la,ia))
     &     call errquit('dft_scf: cannot allocate cpl a',0, MA_ERR)
      if (.not.MA_Push_Get(MT_Dbl,nbf_mo,'cpl eval',lw,iw))
     &     call errquit('dft_scf: cannot allocate cpl eval',0, MA_ERR)
      llwork = max(1, 2*nbf_mo-1)
      if (.not.MA_Push_Get(MT_DCpl,llwork,'cpl work',lwork,iwork))
     &     call errquit('dft_scf: cannot allocate cpl work',0, MA_ERR)
      if (.not.MA_Push_Get(MT_Dbl,max(1,3*nbf_mo-2),'w.s',lrwork,
     &     irwork))
     &     call errquit('dft_scf: cannot allocate w.s',0, MA_ERR)
      if (.not.ma_push_get(mt_dbl,nbf_mo,'buff',lbuff,ibuff))
     &     call errquit('dft_scf:push_get failed', 13, MA_ERR)
cso
cso     
c     
c     Top of infinite SCF iteration loop
c
c     Write prep time required
c
      call ga_sync()
      if (me.eq.0.and.oprint)then
         current_cpu = util_cpusec()
         write(LuOut,20)current_cpu
   20    format(2x,' Time prior to 1st pass: ',f8.1)
      endif
c     
c     start DFT_SCF timer
c     
      start_wall = util_wallsec()
      start_cpu = util_cpusec()
      dft_time = -start_cpu
c
      if (oprint_time)
     &     call dft_tstamp('   Before SCF iter loop. ')
c
      last_time_energy = .false.
c
      det_eng = .false. 
      idet = 0 
 1000 continue   ! top of the scf loop

      if (me.eq.0 .and. oprint_conv_details)
     &   write(LuOut,124)damping, levelshifting, diising
 124  format(10x,' DAMPING=',l1,' LEVELSHIFTING=',l1,
     &           ' DIISING=',l1)
c
      if (me.eq.0.and.oprint_tol)write(LuOut,3234)itol2e,iAOacc,iXCacc
 3234 format(10x,'itol2e=',i2,' iAOacc=',i2,' iXCacc=',i2)

      Ecoul  = ZERO
      Exc(1) = ZERO
      Exc(2) = ZERO

      call ga_zero(g_fockso(1))
      call ga_zero(g_fockso(2))
c     
c     Accumulate core hamiltonian into Fock matrix; 
c     compute core energy
c     
      call ga_zero(g_fock)
      call int_1e_ga(ao_bas_han, ao_bas_han, g_fock, 'kinetic', oskel)
c
      call int_1e_ga(ao_bas_han, ao_bas_han, g_fock, 'potential', oskel)
      call ga_fock_sf(g_fock, g_fockso(1), nbf_ao) 
cso
cso   Re(Dsf)=Re(Daa)+Re(Dbb)=g_dens(1) 
cso   <Hsf> = Re(Dsf) dot T+Vsf 
      Ecore = ga_ddot(g_dens(1), g_fock)
cso  
cso   Accumulate s.o. contribution to fock matrix 
cso   
      noso=Ecore 
      call ga_zero(g_so(1))
      call ga_zero(g_so(2))
      call ga_zero(g_so(3))
cso
cso   Calculate the spin-orbit contributions from the ecp
      if( .not. do_zora .and. .not. do_purescalar) then
         call int_1e_ga(ao_bas_han, ao_bas_han, g_so, 'so', oskel)
         call ga_scale(g_so(1),dble(0.5d0))  ! z
         call ga_scale(g_so(2),dble(0.5d0))  ! y
         call ga_scale(g_so(3),dble(0.5d0))  ! x
cso
cso   Add in the s.o. contribution to the fock matrix
         call ga_fock_so(g_so, g_fockso, nbf_ao)
      end if
cso  
cso   Accumulate z-component s.o. contribution
cso   Re(Dz)=-Im(Daa)+Im(Dbb) 
cso   <Hz>=Re(Dz) dot Vz 
cso   
      call ga_zero(g_tmp) 
      call ga_dens_so(g_tmp, g_densso, nbf_ao, 'z') 
      Ecore = Ecore + ga_ddot(g_tmp, g_so(1)) 
!     write(*,*)"Ecore+=so(z)", ecore
cso
c     == add in the spin-orbit zora contribution (z) ==
      if (do_zora) Ecore = Ecore + ga_ddot(g_tmp, g_zora_so(1))
cso  
cso   Accumulate y-component s.o. contribution 
cso   Re(Dy)=Re(Dab)-Re(Dba) 
cso   <Hy>=Re(Dy) dot Vy 
cso   
      call ga_zero(g_tmp) 
      call ga_dens_so(g_tmp, g_densso, nbf_ao, 'y') 
      Ecore = Ecore + ga_ddot(g_tmp, g_so(2)) 
!     write(*,*)"Ecore+=so(y)", ecore
cso
c     == add in the spin-orbit zora contribution (y) ==
      if (do_zora) Ecore = Ecore + ga_ddot(g_tmp, g_zora_so(2))
cso  
cso   Accumulate x-component s.o. contribution 
cso   Re(Dx)=-Im(Dab)-Im(Dba) 
cso   <Hx>=Re(Dx) dot Vx 
cso
      call ga_zero(g_tmp) 
      call ga_dens_so(g_tmp, g_densso, nbf_ao, 'x') 
      Ecore = Ecore + ga_ddot(g_tmp, g_so(3))
!     write(*,*)"Ecore+=so(x)", ecore
cso
c     == add in the spin-orbit zora contribution (x) ==
      if (do_zora) Ecore = Ecore + ga_ddot(g_tmp, g_zora_so(3))
cso
       noso = Ecore-noso 
c
c     Pre-compute reduced total density matrices over atoms
c 
      call dfill(ipol*natoms*natoms, 0.0d0, dbl_mb(irdens_atom), 1)
      nscr = nbf_ao_mxnbf_ce*nbf_ao_mxnbf_ce
      if (.not.MA_Push_Get(MT_Dbl,nscr,'scr',lscr,iscr))
     &   call errquit('dft_scf: cannot allocate scr',0, MA_ERR)
      call util_ga_mat_reduce(nbf_ao, natoms, int_mb(icetobfr), g_dens, 
     &                        ipol, dbl_mb(irdens_atom), 'rms', 
     &                        dbl_mb(iscr), nbf_ao_mxnbf_ce,.true.)
c      write(*,'("irdens",5f10.7)')
c     &     (dbl_mb(irdens_atom+i),i=0,ipol*natoms*natoms-1)
      if (.not.ma_pop_stack(lscr))
     &   call errquit('dft_scf: cannot pop stack:lscr',0, MA_ERR)
c
c
      if (CDFIT)then
c
c        == attenuation == 
         if (cam_exch) call case_setflags(.false.)
c     
c        Fit the electron charge density.
c     
         if (.not.MA_Push_Get(MT_Dbl,nbf_cd,'cd_coef',lcd_coef,
     &        icd_coef))
     &        call errquit('dft_scf: cannot allocate cd_coef',0, MA_ERR)
         if (oprint_time)
     &        call dft_tstamp(' Before call to FITCD.   ')
         call dft_fitcd(1,Dbl_MB(icd_coef), dbl_mb(k_3cERI), Ecoul, 
     &        g_dens, nTotEl, n_batch, n3c_int,
     &        int_mb(k_3cwhat), n3c_dbl, iwhat_max, 
     &        n_semi_bufs, fd, IOLGC, 
     .        natoms,
     &        .false., 0d0, .false.)
      endif
c     
      if (oprint_time)
     &     call dft_tstamp(' Before call to GETVCOUL.')
      call dft_getvc(Dbl_MB(icd_coef), dbl_mb(k_3cERI), Ecoul,
     &     g_tmp, iVcoul_opt, n_batch, 
     &     n3c_int, int_mb(k_3cwhat), n3c_dbl, iwhat_max,
     &     n_semi_bufs, fd, IOLGC,
     &     .false., 1)
c     
c     Add V coul to Fock Matrix
c     
cso   call ga_dadd(one, g_tmp, one, g_fock, g_fock)
      call ga_fock_sf(g_tmp, g_fockso(1), nbf_ao) 
      if (CDFIT)then
         if (.not.ma_pop_stack(lcd_coef))
     &        call errquit('dft_scf: cannot pop stacklcd_coef',0,
     &       MA_ERR)
      endif
c     
c     Restore alpha and beta densities.
c     
      call ga_dadd(one, g_dens(1), onem, g_dens(2), g_dens(1))
c     
c     Note that g_dens(1) now contains the alpha density
c     matrix and g_dens(2) contains the beta
c     
c     Pre-compute reduced alpha and beta density matrices over atoms
c     
      call dfill(ipol*natoms*natoms, 0.0d0, dbl_mb(irdens_atom), 1)
      nscr = nbf_ao_mxnbf_ce*nbf_ao_mxnbf_ce
      if (.not.MA_Push_Get(MT_Dbl,nscr,'scr',lscr,iscr))
     &     call errquit('dft_scf: cannot allocate scr',0, MA_ERR)
      call util_ga_mat_reduce(nbf_ao, natoms, int_mb(icetobfr), 
     &     g_dens, ipol, dbl_mb(irdens_atom), 
     &     'rms', dbl_mb(iscr), nbf_ao_mxnbf_ce,.true.)
      if (.not.ma_pop_stack(lscr))
     &  call errquit('dft_scf: cannot pop stacklscr:',0, MA_ERR)
c     
c     Compute the XC potential and energy.
c     
      g_vxc(1) = g_tmp
      call ga_zero(g_vxc(1))
      rho_n = 0.0d0
      call ga_zero(g_vxc(2))
c
c     == attenuation ==
      if (cam_exch) call case_setflags(.true.) ! set attenuation
c
      if (oprint_time)call dft_tstamp(' Before call to GETVXC.  ')
      call xc_getv
     &   (rtdb, Exc, Ecoul,nExc, iVxc_opt, g_xcinv, g_dens, 
     &   g_vxc, IOLGC, g_wght, g_xyz, g_nq,wght_GA, rho_n,
     &   dbl_mb(irdens_atom), int_mb(icetobfr), natoms)
c     write(*,*)"Ecoul, Exc(1)", Ecoul, Exc(1)
c
c     == attenuation ==
      if (cam_exch) call case_setflags(.false.) ! unset attenuation if set
c
c     == add in zora contributions ==
      if (do_zora) then
c
c        == calculate scalar energy ==
         Ezora_sf = ga_ddot(g_dens(1),g_zora_sf(1))
     &      + ga_ddot(g_dens(2),g_zora_sf(2))
         Ecore = Ecore + Ezora_sf
c
c        == combine scalar part with the xc matrices ==
         call ga_dadd(1.d0,g_vxc(1),1.d0,g_zora_sf(1),g_vxc(1))
         call ga_dadd(1.d0,g_vxc(2),1.d0,g_zora_sf(2),g_vxc(2))
c
c        == add spin-orbit zora to fock matrix ==
         call ga_fock_so(g_zora_so, g_fockso, nbf_ao)

      end if  ! do_zora
c
c     == add in the exchange-correlation to the fock matrix ==
      call ga_sync()
      call ga_dadd_patch( 1.d0, g_fockso(1), 1, nbf_ao, 
     &     1, nbf_ao, 
     &     1.0d0, g_vxc(1),  1, nbf_ao, 
     &     1, nbf_ao,
     &     g_fockso(1), 1, nbf_ao, 
     &     1, nbf_ao) 
      call ga_dadd_patch( 1.d0, g_fockso(1), 1+nbf_ao, nbf_mo, 
     &     1+nbf_ao, nbf_mo,
     &     1.0d0, g_vxc(2),  1, nbf_ao, 
     &     1, nbf_ao,
     &     g_fockso(1), 1+nbf_ao, nbf_mo, 
     &     1+nbf_ao, nbf_mo)
c     
c     == get the exact exchange contribution ==
      call xc_exso(rtdb,Exc,Ecoul,nExc,g_densso,g_fockso)
c
      if (oprint_time)
     &     call dft_tstamp(' End of parallel region. ')
c     
c     Calculate the total electronic energy.
c     
      if (nExc.eq.1)then
         Etnew = Ecore + Ecoul + Exc(1)
         if(det_eng)goto 2001
      else
         Etnew = Ecore + Ecoul + Exc(1) + Exc(2)
         if(det_eng)goto 2001
      endif

      if (last_time_energy)then
c     
c     If open shell put the total density matrix back in 
c     g_dens(1) and quit.
c     
         call ga_dadd(one, g_dens(1), one, g_dens(2), g_dens(1))
         goto 2000
      endif
c     
      delta = abs(etold-etnew)
c     
      call ga_sync
      rms(1) = 0.d0
      rms(2) = 0.d0
      homo_lumo_gap = 200.0d0
c     
c     Symmetrize the Fock matrix
c
      if (oskel)
     &   call sym_symmetrize(geom, AO_bas_han, .false., g_fock)
c
      call ga_symmetrize(g_fock)
c
c     DIIS step taken here.
c     
      if (diising)then
        call diis_driver_so(toll_s, derr, icall, nfock, 
     &           nbf_mo, g_fockso, g_densso, 
     &           g_svecs, svals, diising, nodiis)
        derr(2)=derr(1)
      endif
c     
      g_scr = ga_create_atom_blocked(geom, AO_bas_han, 'ga scr')
c     
c     Put s-1/2 in g_scr.
c     
      iw = 2
      call diis_bld12_so(toll_s, svals, g_svecs, g_scr, 
     &     g_tmp, nbf_ao, iw)
c     
c     map s-1/2 to the nbf_mo by nbf_mo g_scr2 
c    
      if(.not.ga_create(mt_dbl, 2*nbf, 2*nbf,'scr2', 0, 0, g_scr2))
     &     call errquit('dft_scf_so: error creating scr2',0, GA_ERR)
      call ga_zero(g_scr2)
      call ga_fock_sf(g_scr, g_scr2, nbf_ao)
c     
c     Transform Fock matrix.
c     
      call ga_zero(g_tmp_ri)   
      call ga_dgemm('T', 'N', nbf_mo, nbf_mo, nbf_mo, one, 
     &                 g_scr2, g_fockso(1), zero, g_tmp_ri)
      call ga_dgemm('N', 'N', nbf_mo, nbf_mo, nbf_mo, one, 
     &                 g_tmp_ri, g_scr2, zero, g_fockso(1))

      call ga_zero(g_tmp_ri)   
      call ga_dgemm('T', 'N', nbf_mo, nbf_mo, nbf_mo, one, 
     &                 g_scr2, g_fockso(2), zero, g_tmp_ri)
      call ga_dgemm('N', 'N', nbf_mo, nbf_mo, nbf_mo, one, 
     &                 g_tmp_ri, g_scr2, zero, g_fockso(2)) 
c     
c     Level shifting is implemented here (similarity 
c     transformation before standard eigensolver).  Note,
c     levelshifting is appropriate once a transformation
c     is available which makes the resulting Fock matrix 
c     diagonally dominant, e.g., in an approximate MO basis.  
c     Also note, there are many matrix multiplies with S^+-1/2 
c     which are redundant if one is sure that the former basis
c     is orthonormal.
c     
c     levelshifting = .false. 
      if (levelshifting)then
c     
c     save the old vectors 
c     
         call ga_copy(g_moso(1), g_old(1))
         call ga_copy(g_moso(2), g_old(2))
c     
c        Build a matrix which is diagonal in the "MO" rep,
c        back-transform, and shift the current Fock matrix
c     
c        Use S^+1/2 * old movecs (as a transform).
c     
         iw = 3
         call diis_bld12_so(toll_s, svals, g_svecs, g_scr, 
     &                   g_tmp, nbf_ao, iw)
         call ga_zero(g_scr2)
         call ga_fock_sf(g_scr, g_scr2, nbf_ao)
         call ga_zero(g_tmp_ri)
         call ga_dgemm('N', 'N', nbf_mo, nbf_mo, nbf_mo, one, 
     &                 g_scr2, g_moso(1), zero, g_tmp_ri)
         call ga_copy(g_tmp_ri,  g_moso(1)) 

         call ga_zero(g_tmp_ri) 
         call ga_dgemm('N', 'N', nbf_mo, nbf_mo, nbf_mo, one, 
     &                 g_scr2, g_moso(2), zero, g_tmp_ri)
         call ga_copy(g_tmp_ri,  g_moso(2)) 
c     
c        Build diagonal matrix.
c     
         call ga_zero(g_tmp_ri)
         do j = nTotOcc+1+me, nbf_mo, nproc
            call ga_put(g_tmp_ri, j, j, j, j, rlshift, 1)
         enddo
c     
c        Transform this into "AO" basis and add to current 
c        Fock matrix
c     
         call ga_dgemm('N', 'N', nbf_mo, nbf_mo, nbf_mo, one, 
     &                 g_moso(1), g_tmp_ri, zero, g_scr2)
         call ga_dgemm('N', 'T', nbf_mo, nbf_mo, nbf_mo, one, 
     &                 g_scr2, g_moso(1), one, g_fockso(1))
         call ga_dgemm('N', 'N', nbf_mo, nbf_mo, nbf_mo, one, 
     &                 g_moso(2), g_tmp_ri, zero, g_scr2)
         call ga_dgemm('N', 'T', nbf_mo, nbf_mo, nbf_mo, one, 
     &                 g_scr2, g_moso(2), one, g_fockso(1))

         call ga_dgemm('N', 'N', nbf_mo, nbf_mo, nbf_mo, one, 
     &                 g_moso(1), g_tmp_ri, zero, g_scr2)
         call ga_dgemm('N', 'T', nbf_mo, nbf_mo, nbf_mo, mone, 
     &                 g_scr2, g_moso(2), one, g_fockso(2))
         call ga_dgemm('N', 'N', nbf_mo, nbf_mo, nbf_mo, one, 
     &                 g_moso(2), g_tmp_ri, zero, g_scr2)
         call ga_dgemm('N', 'T', nbf_mo, nbf_mo, nbf_mo, one, 
     &                 g_scr2, g_moso(1), one, g_fockso(2))
      else
        rlshift = 0.0
      endif
c     
c     Solve for the eigenvalues and eigenvectors of the Hamiltonian.
c     
      call ga_symmetrize(g_fock)
      if (oprint_intermediate_fock)then     
      endif
cso#if defined(PARALLEL_DIAG)
cso      call ga_diag_std(g_fock, g_tmp, Dbl_MB(k_eval(ispin)))
cso#else 
cso      call ga_diag_std_seq(g_fock, g_tmp, Dbl_MB(k_eval(ispin)))
cso#endif
cso      call ga_diag_compl(g_fockso(1), g_fockso(2), g_moso(1), 
cso     &                   g_moso(2),  Dbl_MB(k_eval(1))) 
c      write(*,*)"compare"
      do i = 1, nbf_mo 
         do j = 1, nbf_mo
            DCpl_mb(ia+(nbf_mo)*(i-1)+(j-1))=dcmplx(0.0, 0.0)
         enddo
      enddo
      do i = 1, nbf_mo 
         call ga_get(g_fockso(1), 1,i, i,i, dbl_mb(ibuff),1)
c         write(*,*)"i=", i 
c         write(*,*)(dbl_mb(ibuff+ijk), ijk=0,nbf_mo-1)
         do j=1,i 
            DCpl_mb(ia+(nbf_mo)*(i-1)+(j-1))=
     =           dcmplx(dbl_mb(ibuff+j-1),0d0)
         enddo 
         call ga_get(g_fockso(2), 1,i, i,i, dbl_mb(ibuff),1)
         do j=1,i 
            DCpl_mb(ia+(nbf_mo)*(i-1)+(j-1))=
     $               DCpl_mb(ia+(nbf_mo)*(i-1)+(j-1))
     $           +dcmplx(0d0,dbl_mb(ibuff+j-1))
         enddo 
      enddo
      call ga_zero(g_moso(1))
      call ga_zero(g_moso(2))
      call zheev( 'V', 'U', nbf_mo, DCpl_mb(ia), nbf_mo, 
     $            Dbl_mb(k_eval(1)), 
     $            DCpl_mb(iwork), LLWORK, Dbl_mb(irwork), INFO )
      do i = 1, nbf_mo
         do j = 1, nbf_mo 
            dbl_mb(ibuff+j-1)=0.0d0
            dbl_mb(ibuff+j-1)=dble(DCpl_mb(ia+nbf_mo*(i-1)+(j-1)))
         enddo 
         i1=i
         call ga_put(g_moso(1),1,nbf_mo,i1,i1,dbl_mb(ibuff),1)
         trace = ddot(nbf_mo,dbl_mb(ibuff),1,dbl_mb(ibuff),1) 
         do j = 1, nbf_mo 
            dbl_mb(ibuff+j-1)=0.0d0
            dbl_mb(ibuff+j-1)=
     $             dimag(dcmplx(DCpl_mb(ia+nbf_mo*(i-1)+(j-1))))
         enddo
         i1=i 
         call ga_put(g_moso(2),1,nbf_mo,i1,i1,dbl_mb(ibuff),1)
         trace = ddot(nbf_mo,dbl_mb(ibuff),1,dbl_mb(ibuff),1) 
      enddo
c      write(*,'("before transform")')
c      write(*,*)(Dbl_mb(k_eval(1)+i),i=0,nbf_mo-1)
c      call ga_print(g_moso(1))
c      call ga_print(g_moso(2))
c     
c     Check HOMO/LUMO gap.
c     
      homo = Dbl_MB(k_eval(1)+nTotEl-1)
      lumo = Dbl_MB(k_eval(1)+nTotEl)
c     
c     If levelshifting then tidy up.
c  
      if (levelshifting)then
c     
c        Put S^-1/2 back in g_scr2 (use g_fock as temp scr).
c     
         iw = 2
         call diis_bld12_so(toll_s, svals, g_svecs, g_scr, 
     &                   g_fock, nbf_ao, iw)
         call ga_zero(g_scr2)
         call ga_fock_sf(g_scr, g_scr2, nbf_ao)
      endif
c     
c     Back-transform eigenvectors with S^-1/2.
c     
      call ga_dgemm('N', 'N', nbf_mo, nbf_mo, nbf_mo, one, 
     &     g_scr2, g_moso(1), zero, g_fockso(1))
      call ga_dgemm('N', 'N', nbf_mo, nbf_mo, nbf_mo, one, 
     &     g_scr2, g_moso(2), zero, g_fockso(2))
      if (.not. ga_destroy(g_scr)) call errquit
     &     ('dft_scf: could not destroy g_scr', 0, GA_ERR)
      if (.not. ga_destroy(g_scr2)) call errquit
     &     ('dft_scf_so: could not destroy g_scr2', 0, GA_ERR)
c     
c     Keep orbital ordering according to principle
c     of maximum overlap with previous iteration.
c
      if (lmaxov)
     .     call dft_mxovl(ao_bas_han, dbl_mb(k_eval(ispin)),
     &                  g_tmp, g_movecs(ispin), g_s,g_fock,
     .        noc,oprint_conv_details,homo,lumo)

      call ga_zero(g_moso(1))
      call ga_zero(g_moso(2))
      call ga_copy(g_fockso(1), g_moso(1))
      call ga_copy(g_fockso(2), g_moso(2))
c        
c     determine homo-lumo gap 
c
      homo_lumo_gap = min(homo_lumo_gap, (lumo-homo-rlshift))
      if (me.eq.0 .and. oprint_conv_details)
     &   write(LuOut,4224)homo,lumo,rlshift, homo_lumo_gap
 4224 format(10x,' HOMO = ',f6.2,' LUMO = ',f6.2,
     &           ' RLSHIFT = ',f6.2,' HL_GAP = ',f6.2)
c     
      call ga_sync
c     
c     Save previous density for convergence check.
c     
      call ga_copy(g_dens(1), g_movecs(1))
      call ga_copy(g_dens(2), g_movecs(2))
c
c     symmetry adapt vectors?
c
      if (oadapt)then
         call scf_sym_adapt_so(ao_bas_han, g_moso,
     &        oprint_syma, 2*nbf_ao, name,
     &        .true., 
     &        int_mb(k_ir))
      endif      
c
c     save the old density matrix for damping 
c
      call ga_copy(g_densso(1), g_damp_so(1))
      call ga_copy(g_densso(2), g_damp_so(2))
c     
c     Form a new density matrix.
c     
      call ga_sync 
      call ga_zero(g_densso(1))
      call ga_zero(g_densso(2)) 

      switch_sclMO_so=0 ! FA=09-26-11 set OFF scaleMO_so
c     
c     the fractionally occupied mo's are scaled by the sqrt of the fon
c     
      if(fon)then 
c         scale = sqrt(avg_fon)
         do i = ncore_fon + 1, ncore_fon + nmo_fon
           if (i> nbf_mo) call errquit(
     &        'dft_densm:fon g_moso index exceeds nbf_mo',
     &        i, INPUT_ERR)
           scale = sqrt(dbl_mb(i-1+k_occ)) ! sqrt(occupation number)
           call ga_scale_patch(g_moso(1), 
     &        1, nbf_mo, i, i, scale)
           call ga_scale_patch(g_moso(2), 
     &        1, nbf_mo, i, i, scale)
         end do
         if(me.eq.0) write(luout,'(5x,a)')  'FON applied'
c      else
c ---- FA-02-15-11 : occupations keyword ---- START
c       call dft_scaleMO_so(rtdb,g_moso,dbl_mb(k_occ),g_densso,
c     &                     nbf_mo,nTotOcc,switch_sclMO_so)
c ---- FA-02-15-11 : occupations keyword ---- END
      endif

c       if (switch_sclMO_so.ne.1) then
c         if (ga_nodeid().eq.0)
c     &   write(*,*) 'ENTER calc so-density matrix std-way'
         call dft_densm_so(g_densso, g_moso, nbf_ao, nTotOcc)
c       endif
c     
c     restore the scaled mo's
c     
      if (fon) then 
c         scale = sqrt(avg_fon)
         do i = ncore_fon + 1, ncore_fon + nmo_fon
           if (i> nbf_mo) call errquit(
     &        'dft_densm:fon g_moso index exceeds nbf_mo',
     &        i, INPUT_ERR)
           if (dbl_mb(i-1+k_occ) < 1d-4) call errquit(
     &        'dft_densm:fon frac occup < 1E-4. Aborting suspisciously',
     &        i, INPUT_ERR)           
           scale = 1d0/sqrt(dbl_mb(i-1+k_occ)) ! 1/sqrt(occupation number)
           call ga_scale_patch(g_moso(1), 
     &        1, nbf_mo, i, i, scale)
           call ga_scale_patch(g_moso(2), 
     &        1, nbf_mo, i, i, scale)
         end do
       end if ! fon
cso
cso   g_dens(1)=Re(Daa)+Re(Dbb) and g_dens(2)=Re(Dbb)
cso   For coulomb and xc potentials only the alpha, Re(Daa), and  
cso   beta, Re(Dbb) densities are needed 
cso
      call ga_zero(g_dens(1))
      call ga_zero(g_dens(2))
      call ga_dens_sf(g_dens, g_densso(1), nbf_ao)

c     check g_dens, calculate tr[P S] and print
c     if (debug_fon) call dft_pstrace(g_dens,ao_bas_han,nbf_ao,oskel)
      if (fon) then
        pstrace=ga_ddot(g_dens(1),g_s)
        pstrace=pstrace + ga_ddot(g_dens(2),g_s)
        if(ga_nodeid().eq.0) write (luout,'(5x,a,1x,e15.7)')
     &       'tr(P*S): ',pstrace 
        if (.not. rtdb_put(rtdb, 'dft:pstrace', mt_dbl, 1, pstrace))
     &     call errquit('dft_scf: rtdb_put pstrace failed', 1, RTDB_ERR)
      end if
c
      call ga_sync
c     
c     Check convergence on Density.
c     
      rms(1) = dft_dencvg(g_dens(1), g_movecs(1), nbf_ao)
      rms(2) = dft_dencvg(g_dens(2), g_movecs(2), nbf_ao)
      call ga_sync
c     
      if (oprint_conv.and.iter.eq.1.and.me.eq.0)then
         nheap = MA_Inquire_Heap(MT_Dbl)
         nstack = MA_Inquire_Stack(MT_Dbl)
         write(LuOut,21)
         write(LuOut,'(10x,a,f10.2,i20)')
     &        ' Heap Space remaining (MW):  ',dble(nheap)*1.D-06,nheap
         write(LuOut,'(10x,a,f10.2,i20)')
     &        'Stack Space remaining (MW):  ',dble(nstack)*1.D-06,nstack
         call util_flush(LuOut)
         write(LuOut,1)
      endif
 21   format(/,10x,' Memory utilization after 1st SCF pass: ')
    1 format(/,
     &     1x,'  convergence    iter        energy       DeltaE   ',
     &     'RMS-Dens  Diis-err    time'/
     &     1x,'---------------- ----- ----------------- --------- ',
     &     '--------- ---------  ------')
      if (oprint_conv.and.me.eq.0)then
         current_cpu = util_cpusec()
         if (diising)then
            write(LuOut,2)ndamp,rlshift,
     &           iter, Etnew+Enuc,
     &           -etold+etnew,sqrt(rms(1)),derr(1),current_cpu
            if (ipol.eq.2)write(LuOut,3)sqrt(rms(2)),derr(2)
         else
            write(LuOut,22)ndamp,rlshift,
     &           iter, Etnew+Enuc,
     &           -etold+etnew,sqrt(rms(1)), current_cpu
            if (ipol.eq.2)write(LuOut,23)sqrt(rms(2))
         endif
         call util_flush(LuOut)
      endif
    2 format(1x,'d=',i2,',ls=',f3.1,',diis',1x,i5,f18.10,
     &     1p,3d10.2,0p,f8.1)
    3 format(51x,1p,2d10.2)
 22   format(1x,'d=',i2,',ls=',f3.1,6x,i5,f18.10,
     &     1p,2d10.2,10x,0p,f8.1)
 23   format(51x,1p,1d10.2)
c
c     ecce ouput
c
      call ecce_print1 ('iteration counter', mt_int, iter, 1)
      call ecce_print1 ('iterative total energy difference', 
     &                  mt_dbl, -etold+etnew, 1)
      call ecce_print1 ('iterative total density difference', 
     &                  mt_dbl, sqrt(rms(1)), 1)
c
      call ga_sync
c     
c     save eigenvectors to movecs file
c     
      if (.not.movecs_write_so
     $     (rtdb, ao_bas_han, movecs_out, 'sodft', title,
     &     nbf_mo, dbl_mb(k_occ), dbl_mb(k_eval(1)), g_moso))
     &     call errquit('dft_scf_so: movec_write failed', 0, DISK_ERR)
c     
      call ga_sync
c     
      if (me .eq. 0.and.oprint_eval)then
         if (util_print('intermediate evals', print_default))then
            call util_print_centered(LuOut,'eigenvalues',
     &           20,.true.)
            call output(dbl_mb(k_eval(1)), 1, min(nTotEl+10,nbf_mo),
     &           1, 1, nbf_mo, 1, 1)
            call util_flush(6)
         endif
      endif
      if (oprint_vecs)then
         if (me .eq. 0)then
            write(LuOut,*)
            call util_print_centered(LuOut,
     &           'Intermediate MO vectors',40,.true.)
            write(LuOut,*)
            call util_flush(LuOut)
         endif
      endif
c     
c     If open shell compute overlap of alpha orbitals with beta 
c     orbitals.
c     
      if ((ipol.gt.1).and.(oprint_interm_overlap)) then
         call dft_mxspin_ovlp(nbf_ao,nmo,ao_bas_han,g_movecs(1), 
     &       g_movecs(2), g_tmp)
      endif
c     
c     computation of <S2> for open shell
c     
      if ((ipol.gt.1).and.(oprint_interm_S2)) then
         
         call dft_s2_value(geom, AO_bas_han, .false., noc(1), noc(2),
     &        nbf_ao, g_dens(1), g_dens(2))
      endif
c     
c     Form the total density matrix.
c     
      call ga_dadd(one, g_dens(1), one, g_dens(2), g_dens(1))
      call ga_sync
c     
c     Check for SCF convergence.
c     
      call ga_sync
      if (do_zora) then
        call dft_zora_scfcvg(rms, derr, Etold, Etnew, 
     &     e_conv, d_conv, g_conv, ipol, 
     &     iter, iterations, idone, rtdb,
     &     converged, diising)
      else
        call dft_scfcvg(rms, derr, Etold, Etnew, 
     &     e_conv, d_conv, g_conv, ipol, 
     &     iter, iterations, idone, rtdb,
     &     converged, diising)
      end if ! do_zora
      if (delta.lt.1.d-3)then
c     
c     Set coulomb acc to max (e.g., input parameter).
c     (note, may also require re-initializing DIIS)
c     
         itol2e = itol_max
         iAOacc = iAOacc_max
         tol_rho = tol_rho_max
         iswitc = iswitc+1
      endif
c     
c     Damping implemented here.
c    
      if (damping)then
         pp = dble(ndamp)*1.d-2
         onempp = 1.0d0 - pp
         call ga_dadd(pp, g_damp_so(1), onempp, g_densso(1), 
     &        g_densso(1))
         call ga_dadd(pp, g_damp_so(2), onempp, g_densso(2), 
     &        g_densso(2))
         call ga_zero(g_dens(1))
         call ga_zero(g_dens(2))
         call ga_dens_sf(g_dens, g_densso(1), nbf_ao)
         call ga_dadd(one, g_dens(1), one, g_dens(2), g_dens(1))
         call ga_sync
      else
         ndamp = 0
      endif
      call ga_sync
      iter = iter + 1
c     
c     Check convergence parameters.
c     
      if ((delta.lt.dampon.and.delta.gt.dampoff).or.iter.le.ncydp)then
         damping = .true.
         ndamp = ndamp_input
      else
         damping = .false.
         ndamp = ndamp_def
      endif
c     
      if ((delta.lt.levlon.and.delta.gt.levloff).or.
     &     (iter.le.ncysh))then
         if (homo_lumo_gap.lt.hl_tol)then
            levelshifting = .true.
            rlshift = rlshift_input
            if (check_shift)then
               if (lumo .lt. homo)then
                  levelshifting = .false.
                  if (me.eq.0 .and. oprint_conv_details)
     &                 write(LuOut,2224)homo, lumo
               endif
            endif
         else
            levelshifting = .false.
            rlshift = rlshift_def
         endif 
      else
         levelshifting = .false.
         rlshift = rlshift_def
      endif
 2224 format(10x,'HOMO = ',f6.2,' LUMO (with shift) = ',f6.2,
     &     /,10x,'Unshifted LUMO is less than HOMO.',
     &     /,10x,'Turning levelshifting OFF this iteration.')
c     
      if ((delta.lt.diison.and.delta.gt.diisoff).or.
     &     iter.le.ncyds.or.keep_diis_on)then
         diising = .true.
c     
c     Once started, keep DIIS on until diisoff threshold.
c     
         keep_diis_on = .true.
      else
         diising = .false.
      endif
      if (delta.lt.diisoff.or.(ncyds.gt.0.and.iter.gt.ncyds))then
         diising = .false.
         keep_diis_on = .false.
      endif
c     
      if (nodamping)damping = .false.
      if (nolevelshifting) then 
         levelshifting = .false.
         rlshift=rlshift_def
      endif        
      if (nodiis)diising = .false.
c     
c     vdw bit
c
c     activate disp if is present in rtdb
c     or if dispersion is automatically included in the xc functional
      if (.not.rtdb_get(rtdb, 'dft:disp', mt_log, 1, disp))
     &   disp=.false.
c
      if(disp.or.xc_chkdispauto())
     &     call xc_vdw(rtdb,geom,Etnew,dum,'energy')
c
      Etold = Etnew
c     
      lmaxov = lmaxov_sv
      if ((lumo - homo).lt.-hl_tol.and.lmaxov)then
         lmaxov = .false.
         if (me.eq.0 .and. oprint_conv_details)
     &        write(LuOut,224)homo, lumo
 224     format(10x,' HOMO = ',f6.2,' LUMO = ',f6.2,
     &        /,10x,'Significant orbital reordering with',
     &        ' maximum overlap',
     &        /,10x,'turned ON.  Turning max_ovl OFF.')
      endif
c
      if (oprint_energy_step.and.me.eq.0)then         
         current_cpu = util_cpusec()
         if (nexc.le.1)then
            write(LuOut,222)etnew+enuc, ecore, Ecoul, Exc(1), enuc, 
     &           rho_n, current_cpu
         else
            write(LuOut,223)etnew+enuc, ecore, Ecoul, Exc(1), Exc(2),
     &           enuc, rho_n, current_cpu
         endif
      endif
c     
c     Check for remaining time to exit "gracefully"
c     
      current_wall = util_wallsec()
      if ((iter-1).gt.1)then
         elapsed_wall = current_wall - save_wall
         save_wall = current_wall
      else
         elapsed_wall = current_wall - start_wall
         save_wall = current_wall
      endif
c     
      if (converged)then
c     
c     If converged probably need a few seconds to clean things up 
c     and calculate a few properties.
c     
         wall_time_reqd = 5.0
c     
c        == scale zora eigenvalues and energy ==
         ener_scal = 0.d0
         if (do_zora) then
            call dft_zora_scale_so(
     &                   rtdb,g_dens_at,nexc, ! Added by FA
     &                   geom,
     &                   ao_bas_han,
     &                   nbf,
     &                   nbf_ao,
     &                   nbf_mo,
     &                   g_dens,
     &                   g_s,
     &                   g_moso,
     &                   g_zora_scale_sf,
     &                   g_zora_scale_so,
     &                   dbl_mb(k_eval(1)),
     &                   dbl_mb(k_occ),
     &                   nTotOcc,
     &                   noc, ! FA
     &                   ipol,
     &                   ener_scal)
         end if
c
      else
c     
c     If not converged probably need at least the amount time
c     required for previous iteration (multiply by 1.2 to be on the safe side).
c     
         wall_time_reqd = elapsed_wall*1.2d0
      endif
      int_wall_time_reqd = wall_time_reqd
      if (.not.util_test_time_remaining(rtdb, int_wall_time_reqd))then
         if (me.eq.0)then
            write(LuOut,*)
            call util_print_centered(LuOut,
     &           'Exiting due to time limitations.', 20, .true.)
            write(LuOut,*)
            goto 2000
         endif
      endif
      if (idone.eq.0.or.(iswitc.lt.2.and.iter.lt.iterations))
     &     go to 1000           ! begin new iteration
      if (idone.eq.1.and.(.not.last_time_energy))then
         last_time_energy = .true.
         go to 1000             ! build final total energies
      endif
c     
 2000 continue
c    
      if (me.eq.0.and.oprint)then
         if (.not.converged)then
            write(LuOut,*)
            call util_print_centered(LuOut,
     &           'Calculation failed to converge', 20, .true.)
            write(LuOut,*)
         endif
         dft_time = dft_time+util_cpusec()

         if (nexc.le.1)then
          write(LuOut,222)etnew+enuc,
     &                      ecore,
     &                      ecoul,
     &                      exc(1),
     &                      enuc
         else
          write(LuOut,223)etnew+enuc,
     &                      ecore,
     &                      ecoul,
     &                      exc(1),
     &                      exc(2),
     &                      enuc
         end if
         if (do_zora) write(luout,2221) ener_scal
         write(luout,2222) rho_n
         write(luout,2223) dft_time
c
 222  format(//
     &     '         Total DFT energy =', f20.12/
     &     '      One electron energy =', f20.12/
     &     '           Coulomb energy =', f20.12/
     &     '    Exchange-Corr. energy =', f20.12/
     &     ' Nuclear repulsion energy =', f20.12/)
c
 223  format(//
     &     '         Total DFT energy =', f20.12/
     &     '      One electron energy =', f20.12/
     &     '           Coulomb energy =', f20.12/
     &     '          Exchange energy =', f20.12/
     &     '       Correlation energy =', f20.12/
     &     ' Nuclear repulsion energy =', f20.12/)
c
 2221 format('       Scaling correction =', f20.12/)
 2222 format(' Numeric. integr. density =', f20.12/)
 2223 format('     Total iterative time =', f9.1,'s'//)
c
         call util_flush(LuOut)
      endif
c
c     print out the determinantal energies 
c
 2001 continue 
      if (me.eq.0.and.oprint.and.det_eng)then
         write(LuOut,*)
c         call util_print_centered(LuOut,
c     &        'Calculation failed to converge', 20, .true.)
         write(LuOut,*)
         dft_time = dft_time+util_cpusec()
         if (nexc.le.1)then
            write(LuOut,232)etnew+enuc, ecore, Ecoul, Exc(1), enuc, 
     &           rho_n, dft_time
            if (do_zora) write(luout,2221) ener_scal
         else
            write(LuOut,233)etnew+enuc, ecore, Ecoul, Exc(1), Exc(2),
     &           enuc, rho_n, dft_time
            if (do_zora) write(luout,2221) ener_scal
         endif
 232     format(//
     &        '       Determinant Energy'/
     &        '         Total DFT energy =', f20.12/
     &        '      One electron energy =', f20.12/
     &        '           Coulomb energy =', f20.12/
     &        '    Exchange-Corr. energy =', f20.12/
     &        ' Nuclear repulsion energy =', f20.12//
     &        ' Numeric. integr. density =', f20.12//
     &        '     Total iterative time =', f9.1,'s'//)
 233     format(//
     &        '       Determinant Energy'/
     &        '         Total DFT energy =', f20.12/
     &        '      One electron energy =', f20.12/
     &        '           Coulomb energy =', f20.12/
     &        '          Exchange energy =', f20.12/
     &        '       Correlation energy =', f20.12/
     &        ' Nuclear repulsion energy =', f20.12//
     &        ' Numeric. integr. density =', f20.12//
     &        '     Total iterative time =', f9.1,'s'//)
         call util_flush(LuOut)
      endif 
c
c     calculate the determinantal energy
c     
      if(.not.fon) goto 2002 
      if (.not.rtdb_get(rtdb,'sodft:ndet',mt_int,1,ndet)) ndet = 0
c
      if(idet .eq. 0 .and. ndet. gt. 0)then 
           if(.not.ma_push_get(mt_int,nmo_fon*ndet,'det',kfon_occ,
     &        lfon_occ))
     &        call errquit('cannot alloctate lfon_occ',0, MA_ERR)
           if (.not.rtdb_get(rtdb,'sodft:occupancy',mt_int,
     &        nmo_fon*ndet,int_mb(lfon_occ)))
     &        call errquit('no occupancy specified',0, RTDB_ERR)
       endif  ! (idet.eq.0) and (ndet.gt.0)
c
      if(idet.eq.ndet) goto 2002    ! all determinant energies have been calculated
c
c     Switch the mo order to match the determinantal occupancy 
c
      iswap = ncore_fon + 1  ! first partially filled orbital
      jswap = iswap + idet ! forms the pair (i,j) to be swapped
      swap(1) = iswap  ! assign the pair
      swap(2) = jswap  ! assign the pair
      if (jswap.ge.iswap) then
         if(.not.rtdb_put(rtdb,'sodft:swap',mt_int,2,swap))  ! swap is queried in movecs_swap_so
     &        call errquit('swap: failed to put nelem in rtdb', 0,
     &       RTDB_ERR)
         call movecs_swap_so(rtdb,'dft',scftype,g_moso,   ! do the swap
     &        dbl_mb(k_occ),dbl_mb(k_eval(1)))
      end if
c     
c     form a new density matrix and calculate the corresponding energy 
c     
      call dft_densm_so(g_densso,g_moso,nbf_ao,nTotEl) 
      call ga_zero(g_dens(1))
      call ga_zero(g_dens(2))
      call ga_dens_sf(g_dens, g_densso(1), nbf_ao)
      call ga_dadd(one, g_dens(1), one, g_dens(2), g_dens(1))
      call ga_sync
      det_eng = .true. 
c     
c     Restore the mo order so that we have a fixed reference
c     
      if (me.eq.0) write(luout,4443)
 4443 format(/,1x,'Restoring orbital order')
c
      if (jswap.ge.iswap) then
         if(.not.rtdb_put(rtdb,'sodft:swap',mt_int,2,swap))  ! swap is queried in movecs_swap_so
     &        call errquit('swap: failed to put nelem in rtdb', 0,
     &       RTDB_ERR)
         call movecs_swap_so(rtdb,'dft',scftype,g_moso,  ! do the swap
     &        dbl_mb(k_occ),dbl_mb(k_eval(1)))
      end if
c
c     Write determinant configuration     
c
      if(me .eq. 0)then 
         write(Luout,4444)
     &    (int_mb(lfon_occ+idet*(nmo_fon)+i-1), i=1,nmo_fon)
      endif
      idet = idet + 1   ! determinant counter
      go to 1000 
 4444 format(/,1x,'Determinant Occupancy: ',8(1x,i3,1x))
c
      if(fon.and.(ndet.gt.0))then 
       if (.not.ma_pop_stack(kfon_occ))
     &   call errquit('dft_scf: cannot pop stack:lfon_occ',0,MA_ERR)
      endif
c     
 2002 continue

      if (.not. ga_destroy(g_damp_so(1))) call errquit
     &     ('dft_scf_so: could not destroy g_damp_so', 0, GA_ERR)
      if (.not. ga_destroy(g_damp_so(2))) call errquit
     &     ('dft_scf_so: could not destroy g_damp_so', 0, GA_ERR)
c     
c     symmetry adapt vectors last time print symmetries, etc.
c     
c     if (oadapt)then
c     call scf_movecs_sym_adapt(ao_bas_han, g_movecs, oprint, 
c     &                             nbf_ao, '- alpha', .true., 
c     &                             int_mb(k_ir))
c         if (ipol.eq.2)
c     &      call scf_movecs_sym_adapt(ao_bas_han, g_movecs(2), oprint, 
c     &                                nbf_ao, '- beta', .true., 
c     &                                int_mb(k_ir+nbf_ao))
c      endif      
c
c     Vector analysis stolen from rohf.F
c
      if (util_print('final vectors analysis', print_default)) then
         do ilo = 1,max(1,nTotEl-10)
            if (dbl_mb(k_eval(1)+ilo-1) .ge. eval_pr_tol_lo) 
     &           goto 961
         enddo
 961     do ihi = min(nTotEl+10,nbf_mo), nbf_mo
            if (dbl_mb(k_eval(1)+ihi-1) .ge. eval_pr_tol_hi) 
     &           goto 9611
         enddo
         ihi = max(ihi-1,1)
 9611    continue
         if (util_print('final vectors analysis', print_high)) then
            ilo = 1
            ihi = nbf_mo
         endif
         call movecs_anal_so(ao_bas_han, ilo, ihi, 0.15d0, 
     &        g_moso, 
     &        'DFT Final Molecular Orbital Analysis', 
     &        .true., dbl_mb(k_eval(1)), oadapt, 
     &        int_mb(k_ir), .true., dbl_mb(k_occ))
      endif
c     
c     call to Mulliken Pop Ananlysis
c     
      if (mulliken)then
         if (me.eq.0)
     &      call dft_header
     &      (' Total Density - Mulliken Population Analysis')
         call mull_pop(geom,ao_bas_han,g_dens(1),g_s, 'total')
         if (ipol.eq.2)then
c     
c           analysis of spin density
c     
            if (me.eq.0)call dft_header
     &         (' Spin Density - Mulliken Population Analysis')
            call ga_dadd(one,g_dens(1),-2.d0,g_dens(2),g_dens(2))
            call mull_pop(geom,ao_bas_han,g_dens(2),g_s,'spin')
            call ga_dadd(one,g_dens(1),-1.d0,g_dens(2),g_dens(2))
            call ga_dscal(g_dens(2),0.5d0)
         endif
      endif
c     
c     end infinite loop for SCF iterations
c     
c     Store energy and convergence status ... must store before
c     write movecs since date of insertion is used.
c     
      if (.not. rtdb_put(rtdb, 'sodft:energy',MT_DBL,1,(Etnew+Enuc)))
     &   call errquit('dft_scf: failed to store energy in rtdb', 0,
     &       RTDB_ERR)
      if (.not. rtdb_put(rtdb, 'sodft:converged',MT_LOG,1,converged))
     &   call errquit('dft_scf: failed to store converged in rtdb',0,
     &       RTDB_ERR)
c      if (rtdb_get(rtdb, 'sodft:converged', mt_log, 1, oconverged))
c     &     write(*,*)"converged=", converged
c
c     output energies and eigenvectors to disk
c     
      if (.not.movecs_write_so
     $     (rtdb, ao_bas_han, movecs_out, 'sodft', title,
     &     nbf_mo, dbl_mb(k_occ), dbl_mb(k_eval(1)), g_moso))
     &     call errquit('dft_scf_so: movec_write failed', 0, DISK_ERR)
      call ga_sync()
c     
c     Shut down DIIS.
c     
      if (icall(1).gt.0)then
         icall(1) = -1
         call diis_driver_so(toll_s, derr, icall, nfock, 
     &        nbf_mo, g_fockso, g_densso, 
     &        g_svecs, svals, diising, nodiis)
      endif
c     
c     If open shell compute overlap of alpha orbitals with beta orbitals.
c     
      if (ipol.gt.1)then
         call dft_mxspin_ovlp(nbf_ao,nmo,ao_bas_han,g_movecs(1), 
     &        g_movecs(2), g_tmp)
      endif
c
      if (wght_GA)then
         if (.not. ga_destroy(g_wght)) call errquit
     &      ('dft_scf: could not destroy g_wght', 0, GA_ERR)
         if (.not. ga_destroy(g_xyz)) call errquit
     &      ('dft_scf: could not destroy g_xyz', 0, GA_ERR)
         if (.not. ga_destroy(g_nq)) call errquit
     &      ('dft_scf: could not destroy g_nq', 0, GA_ERR)
      endif
c     
c     Restore alpha and beta densities.
c
      if (ipol .gt. 1)
     &   call ga_dadd(one,g_dens(1),onem,g_dens(2),g_dens(1))
c     
c     computation of <S2> for open shell
c     
      if (ipol.gt.1)then
         call dft_s2_value(geom,AO_bas_han,.false.,noc(1),noc(2),
     &        nbf_ao,g_dens(1),g_dens(2))
      endif
c     
c     computation of multipole moments
c
      if (natoms .gt. 1)
     &   call dft_mpole(rtdb, ao_bas_han, ipol, g_dens(1), g_dens(2))
c     
c     print stolen for uhf.F
c     
      if (util_print('schwarz',print_high).and.(.not.CDFIT))then
         call schwarz_print(natoms, nshells_ao)
      endif
c     
      if (me .eq. 0)then
         if (util_print('final evals', print_high))then
            call util_print_centered(LuOut,'Final eigenvalues',
     &           20,.false.)
            call util_print_centered(LuOut,
     &           '(all occupied plus 10 virtual)',20,.true.)
            call output(dbl_mb(k_eval(1)),
     &           1, min(noc(1)+10,nbf_ao),
     &           1, 1, nbf_ao, 1, 1)
            call util_flush(6)
         endif  ! util_print
c
         if (oprint_final_vecs)then
            write(LuOut,*)
            call util_print_centered(
     &           LuOut,'Final MO vectors',40,.true.)
            write(LuOut,*)
            call util_flush(LuOut)
         endif
      endif
c
      if (oprint_final_vecs)then
cso         call ga_print(g_movecs)
cso         if (ipol.eq.2)call ga_print(g_movecs(2))
      endif
c     
c     ECCE printout
c     
      call movecs_ecce(nbf_ao, nmo, 1, nmo(1), dbl_mb(k_eval(1)),
     &                 dbl_mb(k_occ), int_mb(k_ir), 
     &                 g_movecs(1), 'dft', 'alpha')
      if (ipol.eq.2)then ! spin-unrestricted
         call movecs_ecce(nbf_ao, nmo, 1, nmo(2), dbl_mb(k_eval(2)),
     &                    dbl_mb(k_occ+nbf_ao), int_mb(k_ir+nbf_ao), 
     &                    g_movecs(2), 'dft', 'beta')
      endif
      call ecce_print1 ('total energy', mt_dbl, (Etold+Enuc), 1)
      call ecce_print1 ('nuclear repulsion energy', mt_dbl, Enuc, 1)
      call ecce_print1 ('coulomb energy', mt_dbl, Ecoul, 1)
      call ecce_print1 ('exchange energy', mt_dbl, Exc(1), 1)
      if (nexc.gt.1) then
         call ecce_print1 ('correlation energy', mt_dbl, Exc(2), 1)
      endif
c
      if (.not.ma_chop_stack(lbuff)) then
        call ma_summarize_allocated_blocks()
        call util_flush(6)
        call util_flush(0)
        call errquit('dft_scf: cannot pop stack:lbuff',lbuff, MA_ERR)
      endif
c
      if (.not.ma_pop_stack(lrwork))
     &   call errquit('dft_scf: cannot pop stacklrwork',0, MA_ERR)
      if (.not.ma_pop_stack(lwork))
     &   call errquit('dft_scf: cannot pop stack:lwork',0, MA_ERR)
      if (.not.ma_pop_stack(lw))
     &   call errquit('dft_scf: cannot pop stack:lw',0, MA_ERR)
      if (.not.ma_pop_stack(la))
cso
     &   call errquit('dft_scf: cannot pop stack:la',0, MA_ERR)
      if (.not.ma_pop_stack(l_ir))
     &   call errquit('dft_scf: cannot pop stack:l_ir',0, MA_ERR)
c     
      if (ipol.gt.1)then
         if (.not. ga_destroy(g_fockt)) call errquit
     &      ('dft_scf: could not destroy g_fockt', 0, GA_ERR)
      endif
      if (.not. ga_destroy(g_tmp)) call errquit
     &   ('dft_scf: could not destroy g_tmp', 0, GA_ERR)
c
      call fock_2e_tidy(rtdb)
c     
      if (converged)then
         call ecce_print_module_exit('dft', 'ok')
      else
         call ecce_print_module_exit('dft', 'failed')
      endif
c     
c     eval deallocation moved here from inside iteration loop
c     
      if (.not.ma_pop_stack(l_eval))
     &   call errquit('dft_scf: cannot pop stack:l_eval',0, MA_ERR)
      if (CDFIT)then
         if (.not.ma_pop_stack(l_3cwhat))
     &      call errquit('dft_scf: cannot pop stack:l_3cwhat',0, MA_ERR)
         if (.not.ma_pop_stack(l_3cERI))
     &      call errquit('dft_scf: cannot pop stack:l_3cERI',0, MA_ERR)
      endif
      if (.not.ma_pop_stack(l_occ))
     &   call errquit('dft_scf: cannot pop stack:l_occ',0, MA_ERR)
      if (.not.ma_pop_stack(lrdens_atom))
     &   call errquit('dft_scf: cannot pop stack:lrdens_atom',0, MA_ERR)
      if (.not.ma_pop_stack(lcetobfr))
     &   call errquit('dft_scf: cannot pop stack:lcetobfr',0, MA_ERR)
      if (.not.ma_pop_stack(lcntobfr))
     &   call errquit('dft_scf: cannot pop stack:lcntobfr',0, MA_ERR)
      if (.not.ma_pop_stack(lcntoce))
     &   call errquit('dft_scf: cannot pop stack:lcntoce',0, MA_ERR)
c
      if(.not.ga_destroy(g_moso(1)))     
     &     call errquit('dft_scf_so: error destroy Movecs Re',0, GA_ERR)
      if(.not.ga_destroy(g_moso(2)))     
     &     call errquit('dft_scf_so: error destroy Movecs Im',0, GA_ERR)
      if(.not.ga_destroy(g_fockso(1)))     
     &     call errquit('dft_scf_so: error destroy Fock Re',0, GA_ERR)
      if(.not.ga_destroy(g_fockso(2)))     
     &     call errquit('dft_scf_so: error destroy Fock Im',0, GA_ERR)
      if(.not.ga_destroy(g_densso(1)))     
     &     call errquit('dft_scf_so: error destroy DenMx Re',0, GA_ERR)
      if(.not.ga_destroy(g_densso(2)))     
     &     call errquit('dft_scf_so: error destroy DenMx Im',0, GA_ERR)
      if(.not.ga_destroy(g_tmp_ri))     
     &     call errquit('dft_scf_so: error destroy old re',0, GA_ERR)
      if(.not.ga_destroy(g_old(1)))     
     &     call errquit('dft_scf_so: error destroy old im',0, GA_ERR)
      if(.not.ga_destroy(g_old(2)))     
     &     call errquit('dft_scf_so: error destroy Tmp ReIm',0, GA_ERR)
      if(.not.ga_destroy(g_so(1)))     
     &     call errquit('dft_scf_so: error destroy so z',0, GA_ERR)
      if(.not.ga_destroy(g_so(2)))     
     &     call errquit('dft_scf_so: error destroy so y',0, GA_ERR)
      if(.not.ga_destroy(g_so(3)))     
     &     call errquit('dft_scf_so: error destroy so x',0, GA_ERR)
c
c     == deallocate zora arrays ==
      if (do_zora) then
c
c      == spin-free parts ==
       if (.not. ga_destroy(g_zora_sf(1))) call errquit(
     &   'dft_scf_so: ga_destroy failed zora sf 1',0, GA_ERR)
       if (.not. ga_destroy(g_zora_sf(2))) call errquit(
     &   'dft_scf_so: ga_destroy failed zora sf 2',0, GA_ERR)
c
       if (.not. ga_destroy(g_zora_scale_sf(1))) call errquit(
     &   'dft_scf_so: ga_destroy failed zora scale sf 1',0, GA_ERR)
       if (.not. ga_destroy(g_zora_scale_sf(2))) call errquit(
     &   'dft_scf_so: ga_destroy failed zora scale sf 2',0, GA_ERR)
c
c      == spin-orbit parts ==
       if(.not.ga_destroy(g_zora_so(1))) call errquit(
     &   'dft_scf_so: ga_destroy failed zora so z',0, GA_ERR)
       if(.not.ga_destroy(g_zora_so(2))) call errquit(
     &   'dft_scf_so: ga_destroy failed zora so y',0, GA_ERR)
       if(.not.ga_destroy(g_zora_so(3))) call errquit(
     &   'dft_scf_so: ga_destroy failed zora so x',0, GA_ERR)
c
       if(.not.ga_destroy(g_zora_scale_so(1))) call errquit(
     &   'dft_scf_so: ga_destroy failed zora so scale z',0, GA_ERR)
       if(.not.ga_destroy(g_zora_scale_so(2))) call errquit(
     &   'dft_scf_so: ga_destroy failed zora so scale y',0, GA_ERR)
       if(.not.ga_destroy(g_zora_scale_so(3))) call errquit(
     &   'dft_scf_so: ga_destroy failed zora so scale x',0, GA_ERR)
c
      end if  !do_zora
c
      dft_scf_so = converged
c
c !!! BGJ
      if (.not. rtdb_get(rtdb, 'bgj:poliz', mt_log,
     &     1, do_poliz)) then
         do_poliz = .false.
      endif
      if (do_poliz) then
c         write(*,*)'*** dft_scf: calling cphf_poliz'
         if (.not. cphf_poliz(rtdb)) ! Never executed.
     $        call errquit(' cphf_poliz: failed from dft_scf !',0,
     &       CALC_ERR)
      endif
c !!! BGJ
      return
c     
 1111 format(15x,'Core Energy:              ',f20.10)
c     
      end

