!{\src2tex{textfont=tt}}
!!****f* ABINIT/cgwf3
!! NAME
!! cgwf3
!!
!!
!! FUNCTION
!! Update one single wavefunction (cwavef), non self-consistently.
!! Uses a conjugate-gradient algorithm.
!! Try to keep close to the formulas in PRB55, 10337 (1997), for the
!! non-self-consistent case, except that we are computing here
!! the second-derivative of the total energy, and not E(2). There
!! is a factor of 2 between the two quantities ...
!! The wavefunction that is generated is always orthogonal to cgq .
!! It is orthogonal to the active Hilbert space, and will be complemented
!! by contributions from the active space in the calling routine, if needed.
!!
!! COPYRIGHT
!! Copyright (C) 1999-2007 ABINIT group (XG, DRH, XW)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!!  band=which particular band we are converging.
!!  cgq(2,mcgq)=wavefunction coefficients for ALL bands.
!!  cplex=1 if vlocal1 is real, 2 if vlocal1 is complex
!!  cwave0(2,npw*nspinor)=GS wavefunction at k, in reciprocal space
!!  dimekb=first dimension of ekb (see ekb_typ)
!!  dimffnlk=second dimension of ffnl (1+number of derivatives)
!!  dimffnl1=second dimension of ffnl1 and ffnlkq (1+number of derivatives)
!!  dkinpw(npw)=derivative of the (modified) kinetic energy for each plane wave at k (Hartree)
!!  eig0nk=0-order eigenvalue for the present wavefunction
!!  ekb_typ(dimekb,1)=
!!  ->Norm conserving : (Real) Kleinman-Bylander energies (hartree)
!!                             for the displaced atom
!!          for number of basis functions (l,n) (lnmax)
!!          dimekb=lnmax
!!    ->PAW : (Real, symmetric) Frozen part of Dij coefficients
!!                               to connect projectors
!!                               for the displaced atom
!!          for number of basis functions (l,m,n) (lmnmax)
!!          dimekb=lmnmax*(lmnmax+1)/2
!!  ffnlk(npw,dimffnlk,lmnmax,1)=nonloc form factors at k, for the displaced atom.
!!  ffnlkq(npw1,dimffnl1,lmnmax,1)=nonloc form fact at k+q for the displaced atom
!!  ffnl1(npw1,dimffnl1,lmnmax,ntypat)=nonloc form factors at k+q
!!  filstat=name of the status file
!!  grad_berry(2,mpw1,dtefield%nband_occ) = the gradient of the Berry phase term
!!  gbound(2*mgfft+8,2)=G sphere boundary at k
!!  gs_hamkq <type(gs_hamiltonian_type)>=all data for the Hamiltonian at k+q
!!  icgq=shift to be applied on the location of data in the array cgq
!!  idir=direction of the perturbation
!!  indlmn_typ(6,lmnmax,1)=indlmn info for the displaced atom
!!  ipert=type of the perturbation
!!  kg_k(3,npw)=coordinates of planewaves in basis sphere at k.
!!  kg1_k(3,npw1)=coordinates of planewaves in basis sphere at k+q.
!!  kinpw1(npw1)=(modified) kinetic energy for each plane wave at k+q (Hartree)
!!  kpg_k(npw,nkpg)= (k+G) components at k (only if useylm=1)
!!  kpg1_k(npw1,nkpg1)= (k+G) components at k+q (only if useylm=1)
!!  kpt(3)=coordinates of k point.
!!  lmnmax= max number of (l,n)   comp. over all type of psps
!!  matblk=dimension of the array ph3d
!!  mcgq=second dimension of the cgq array
!!  mgfft=maximum size of 1D FFTs
!!  mpi_enreg=informations about MPI parallelization
!!  mpw1=maximum number of planewave for first-order wavefunctions
!!  mpsang= 1+maximum angular momentum for nonlocal pseudopotentials
!!  mpssoang= 1+maximum (spin*angular momentum) for nonlocal pseudopotentials
!!  natom=number of atoms in cell.
!!  nband=number of bands.
!!  nbdbuf=number of buffer bands for the minimisation
!!  nkpg,nkpg1=second dimensions of kpg_k and kpg1_k (0 if useylm=0)
!!  nline=number of line minimizations per band.
!!  npw=number of planewaves in basis sphere at given k.
!!  npw1=number of planewaves in basis sphere at k+q
!!  nspinor=number of spinorial components of the wavefunctions
!!  ntypat=number of types of atoms in cell.
!!  n4,n5,n6 dimensions of vlocal and vlocal1
!!  ortalg=governs the choice of the algorithm for orthogonalisation.
!!  ph3d(2,npw1,matblk)=3-dim structure factors, for each atom and plane wave.
!!  prtvol=control print volume and debugging output
!!  pspso_typ(1)=spin-orbit info for the displaced atom
!!  qphon(3)=reduced coordinates for the phonon wavelength
!!  quit= if 1, proceeds to smooth ending of the job.
!!  sciss=scissor shift (Ha)
!!  tolwfr=tolerance on largest wf residual
!!  vlocal(n4,n5,n6)= GS local pot in real space, on the augmented fft grid
!!  vlocal1(cplex*n4,n5,n6)= RF local pot in real space, on the augmented fft grid
!!
!! OUTPUT
!!  eig1_k(2*nband**2)=matrix of first-order eigenvalues (hartree)
!!   ( eig1(:,ii,jj)=<C0 ii|H1|C0 jj> )
!!  ghc(2,npw1*nspinor)=<G|H0|C1 band,k>,
!!  gvnlc(2,npw1*nspinor)=<G|Vnl|C1 band,k>
!!  gvnl1(2,npw1*nspinor)=<G|Vnl1|C0 band,k>
!!  resid=wf residual for current band
!!
!! SIDE EFFECTS
!!  Input/Output:
!!  cwavef(2,npw1*nspinor)=RF wavefunction at k,q, in reciprocal space (updated)
!!  wfraug(2,n4,n5,n6)=is a dummy array
!!
!! NOTES
!!
!!
!! PARENTS
!!      vtowfk3
!!
!! CHILDREN
!!      dotprod_g,fourwf,getghc,leave_new,nonlop,precon,projbd,sqnorm_g,status
!!      timab,wrtout
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

subroutine cgwf3(band,berryopt,cgq,cplex,cwavef,cwave0,dimekb,dimffnlk,dimffnl1,dkinpw,eig0nk,eig1_k,&
& ekb_typ,ffnlk,ffnlkq,ffnl1,filstat,gbound,ghc,grad_berry,&
& gs_hamkq,gvnlc,gvnl1,icgq,idir,indlmn_typ,&
& ipert,kg_k,kg1_k,kinpw1,kpg_k,kpg1_k,&
& kpt,lmnmax,matblk,mcgq,mgfft,mpi_enreg,mpsang,mpssoang, mpw1,natom,nband,nbdbuf,&
& nkpg,nkpg1,nline,npw,npw1,nspinor,ntypat,n4,n5,n6,ortalg,ph3d,prtvol,pspso_typ,&
& qphon,quit,resid,sciss,tolwfr,vlocal,vlocal1,wfraug)

 use defs_basis
 use defs_datatypes

!This section has been created automatically by the script Abilint (TD). Do not modify these by hand.
#ifdef HAVE_FORTRAN_INTERFACES
 use interfaces_01manage_mpi
 use interfaces_11util
 use interfaces_12ffts
 use interfaces_12spacepar
 use interfaces_13nonlocal
 use interfaces_14wfs
#endif
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: band,berryopt,cplex,dimekb,dimffnl1,dimffnlk,icgq,idir
 integer,intent(in) :: ipert,lmnmax,matblk,mcgq,mgfft,mpsang,mpssoang,mpw1,n4
 integer,intent(in) :: n5,n6,natom,nband,nbdbuf,nkpg,nkpg1,nline,npw,npw1
 integer,intent(in) :: nspinor,ntypat,ortalg,prtvol,quit
 real(dp),intent(in) :: eig0nk,sciss,tolwfr
 real(dp),intent(out) :: resid
 character(len=fnlen),intent(in) :: filstat
 type(MPI_type),intent(inout) :: mpi_enreg
 type(gs_hamiltonian_type),intent(in) :: gs_hamkq
!arrays
 integer,intent(in) :: gbound(2*mgfft+8,2),indlmn_typ(6,lmnmax,1),kg1_k(3,npw1)
 integer,intent(in) :: kg_k(3,npw),pspso_typ(1)
 real(dp),intent(in) :: cgq(2,mcgq),dkinpw(npw),ekb_typ(dimekb,1)
 real(dp),intent(in) :: ffnl1(npw1,dimffnl1,lmnmax,ntypat)
 real(dp),intent(in) :: ffnlk(npw,dimffnlk,lmnmax,1)
 real(dp),intent(in) :: ffnlkq(npw1,dimffnl1,lmnmax,1),grad_berry(2,mpw1,nband)
 real(dp),intent(in) :: kinpw1(npw1),kpg1_k(npw1,nkpg1),kpg_k(npw,nkpg),kpt(3)
 real(dp),intent(in) :: qphon(3)
 real(dp),intent(inout) :: cwave0(2,npw*nspinor),cwavef(2,npw1*nspinor)
 real(dp),intent(inout) :: ph3d(2,npw1,matblk),vlocal(n4,n5,n6)
 real(dp),intent(inout) :: vlocal1(cplex*n4,n5,n6),wfraug(2,n4,n5,n6)
 real(dp),intent(out) :: eig1_k(2*nband**2),ghc(2,npw1*nspinor)
 real(dp),intent(out) :: gvnl1(2,npw1*nspinor),gvnlc(2,npw1*nspinor)

!Local variables-------------------------------
!scalars
 integer,parameter :: level=19,test=0,tim_getghc=2
 integer,save :: nskip=0
 integer :: choice,cpopt,i1,i2,i3,iband,iexit,igs,ii,iline,index,ipw,ipw1,ipws,isign
 integer :: ispinor,istr,istwf_k,matblk_der,n1,n2,n3,natom_der,nnlout
 integer :: ntypat_der,nvloc,paw_opt,print,shift1,shift2,shift3,signs
 integer :: tim_fourwf,tim_nonlop,tim_projbd
 real(dp) :: arg,cgwftol,chc,costh,d2edt2,d2te,d2teold,dedt,deltae,deold,dotgg
 real(dp) :: dotgp,doti,dotr,dum,eshift,gamma,ghc2,prod1,prod2,root,sinth,swap
 real(dp) :: tan2th,theta,u1h0me0u1,weight,wfim,wfre,xnorm
 character(len=500) :: message
!arrays
 integer :: atindx1_der(1),atindx_der(1),nattyp_der(1),nloalg_der(5)
 real(dp) :: dummy(2,1),nonlop_dum(1,1),phkxredin(2,1),phkxredout(2,1),tsec(2)
 real(dp) :: xred_der(3)
 real(dp),allocatable :: conjgr(:,:),cwave0_sp(:,:),cwaveq(:,:),direc(:,:)
 real(dp),allocatable :: dummy0(:,:),enlout(:),gh1(:,:),gh1_sp(:,:)
 real(dp),allocatable :: gh_direc(:,:),gresid(:,:),gsc_dummy(:,:)
 real(dp),allocatable :: gvnl_direc(:,:),pcon(:),ph1d_der(:,:),ph3din(:,:,:)
 real(dp),allocatable :: ph3dout(:,:,:),scprod(:,:)
 type(cprj_type) :: cprj_dum(1)

! *********************************************************************

!DEBUG Keep this debugging feature !
!write(6,*)' cgwf3 : enter'
!ENDDEBUG

 call timab(122,1,tsec)

 if(prtvol<0 .or. test*prtvol/=0)then
  call status(0,filstat,iexit,level,'enter         ')
 end if

!Structured debugging if prtvol==-level
 if(prtvol==-level)then
  write(message,'(80a,a,a)') ('=',ii=1,80),ch10,' cgwf3 : enter '
  call wrtout(06,message,'PERS')
 end if

!Tell us what is going on:
 if(prtvol>=10)then
  write(message, '(a,i6,2x,a,i3,a)' ) &
&   ' --- cgwf3 is called for band',band,'for',nline,' lines'
  call  wrtout(06,message,'PERS')
 end if

 eshift=eig0nk-sciss
 print=0
 if(prtvol==-level)print=1
 n1=gs_hamkq%ngfft(1) ; n2=gs_hamkq%ngfft(2) ; n3=gs_hamkq%ngfft(3)

!For the time being, non-collinear potential is not allowed in RF
 nvloc=1

 istwf_k=gs_hamkq%istwf_k

 allocate(gh1(2,npw1*nspinor))
 allocate(gvnl_direc(2,npw1*nspinor),gh_direc(2,npw1*nspinor))
 allocate(pcon(npw1))

!*************************************************************************
!
! Apply H(1) to phi(0)

!Phonon perturbation
 if(ipert <= natom) then

!DEBUG
! write(6,*)' cgwf3 : call fourwf 1st'
!ENDDEBUG

! First, apply the local potential operator
  weight=1.0_dp ; tim_fourwf=4
  call fourwf(cplex,vlocal1,cwave0,gh1,wfraug,gbound,gs_hamkq%gbound,&
&  istwf_k,kg_k,kg1_k,mgfft,mpi_enreg,1,gs_hamkq%ngfft,&
&  npw,npw1,n4,n5,n6,2,tim_fourwf,weight)

  if(nspinor==2)then
   allocate(cwave0_sp(2,npw),gh1_sp(2,npw1))
!$OMP PARALLEL DO PRIVATE(ipw) &
!$OMP&SHARED(cwave0,cwave0_sp,npw)
   do ipw=1,npw
    cwave0_sp(1,ipw)=cwave0(1,ipw+npw)
    cwave0_sp(2,ipw)=cwave0(2,ipw+npw)
   end do
!$OMP END PARALLEL DO
   call fourwf(cplex,vlocal1,cwave0_sp,gh1_sp,wfraug,gbound,gs_hamkq%gbound,&
&   istwf_k,kg_k,kg1_k,mgfft,mpi_enreg,1,gs_hamkq%ngfft,&
&   npw,npw1,n4,n5,n6,2,tim_fourwf,weight)
!$OMP PARALLEL DO PRIVATE(ipw) &
!$OMP&SHARED(gh1,gh1_sp,npw1)
   do ipw=1,npw1
    gh1(1,ipw+npw1)=gh1_sp(1,ipw)
    gh1(2,ipw+npw1)=gh1_sp(2,ipw)
   end do
!$OMP END PARALLEL DO
   deallocate(cwave0_sp,gh1_sp)
  end if


! Compute dVnonlocal/d(atomic displacement) |C(n,k)>
  signs=2 ; choice=2 ; nnlout=3 ; natom_der=1 ; nattyp_der(1)=1 ; ntypat_der=1
  cpopt=-1 ; paw_opt=0 ; allocate(enlout(nnlout))
  matblk_der=1
  xred_der(:)=gs_hamkq%xred(:,ipert)
  atindx_der(1)=1
  atindx1_der(1)=1

! Store at the right place the 1d phases
  allocate(ph1d_der(2,(2*n1+1)+(2*n2+1)+(2*n3+1)))
  shift1=(gs_hamkq%atindx(ipert)-1)*(2*n1+1)
  ph1d_der(:,1:2*n1+1)=gs_hamkq%ph1d(:,1+shift1:2*n1+1+shift1)
  shift2=(gs_hamkq%atindx(ipert)-1)*(2*n2+1)+natom*(2*n1+1)
  ph1d_der(:,1+2*n1+1:2*n2+1+2*n1+1)=gs_hamkq%ph1d(:,1+shift2:2*n2+1+shift2)
  shift3=(gs_hamkq%atindx(ipert)-1)*(2*n3+1)+natom*(2*n1+1+2*n2+1)
  ph1d_der(:,1+2*n1+1+2*n2+1:2*n3+1+2*n2+1+2*n1+1)=&
&  gs_hamkq%ph1d(:,1+shift3:2*n3+1+shift3)

! Will compute the 3D phase factors inside nonlop
  allocate(ph3din(2,npw,1),ph3dout(2,npw1,1))
  nloalg_der(:)=gs_hamkq%nloalg(:)
  nloalg_der(1)=-abs(gs_hamkq%nloalg(1))
  nloalg_der(4)=1

! Compute here phkxred for kpt and kpq
  arg=two_pi*( kpt(1)*gs_hamkq%xred(1,ipert)+ &
&              kpt(2)*gs_hamkq%xred(2,ipert)+ &
&              kpt(3)*gs_hamkq%xred(3,ipert)   )
  phkxredin(1,1)=cos(arg)  ; phkxredin(2,1)=sin(arg)
  arg=two_pi * ( gs_hamkq%kpoint(1) * gs_hamkq%xred(1,ipert) +&
&                gs_hamkq%kpoint(2) * gs_hamkq%xred(2,ipert) +&
&                gs_hamkq%kpoint(3) * gs_hamkq%xred(3,ipert)   )
  phkxredout(1,1)=cos(arg) ; phkxredout(2,1)=sin(arg)

  tim_nonlop=7
  call nonlop(atindx1_der,choice,cpopt,cprj_dum,dimekb,ntypat_der,dimffnlk,dimffnl1,&
&             ekb_typ,enlout,ffnlk,ffnlkq,gs_hamkq%gmet,&
&             gs_hamkq%gprimd,idir,indlmn_typ,istwf_k,kg_k,kg1_k,kpg_k,kpg1_k,kpt,&
&             gs_hamkq%kpoint,dum,lmnmax,matblk_der,mgfft,mpi_enreg,mpsang,&
&             mpssoang,natom_der,nattyp_der,gs_hamkq%ngfft,nkpg,nkpg1,nloalg_der,&
&             nnlout,npw,npw1,nspinor,ntypat_der,paw_opt,phkxredin,&
&             phkxredout,ph1d_der,ph3din,ph3dout,pspso_typ,&
&             signs,nonlop_dum,nonlop_dum,tim_nonlop,&
&             gs_hamkq%ucvol,gs_hamkq%useylm,cwave0,gvnl1)
  deallocate(enlout,ph1d_der,ph3din,ph3dout)

 else if(ipert==natom+1)then

! In the case of ddk operator, the non-local step (to which the kinetic
! part is assimilated) is the only contribution
! (no local contribution, also because no self-consistency)
!$OMP PARALLEL DO PRIVATE(ipw) &
!$OMP&SHARED(gh1,npw1,nspinor)
  do ipw=1,npw1*nspinor
   gh1(:,ipw)=0.0_dp
  end do
!$OMP END PARALLEL DO

! Non-local (remember, q=0, so can take all RF data)
  choice=5 ; nnlout=1 ; signs=2 ; tim_nonlop=8
  cpopt=-1 ; paw_opt=0 ; allocate(enlout(nnlout))
  call nonlop(gs_hamkq%atindx1,choice,cpopt,cprj_dum,gs_hamkq%dimekb1,gs_hamkq%dimekb2,&
&             dimffnl1,dimffnl1,gs_hamkq%ekb,enlout,ffnl1,ffnl1,&
&             gs_hamkq%gmet,gs_hamkq%gprimd,idir,gs_hamkq%indlmn,istwf_k,&
&             kg1_k,kg1_k,kpg1_k,kpg1_k,gs_hamkq%kpoint,gs_hamkq%kpoint,dum,lmnmax,matblk,&
&             mgfft,mpi_enreg,mpsang,mpssoang,natom,gs_hamkq%nattyp,&
&             gs_hamkq%ngfft,nkpg1,nkpg1,gs_hamkq%nloalg,nnlout,npw1,npw1,&
&             nspinor,ntypat,paw_opt,gs_hamkq%phkxred,gs_hamkq%phkxred,gs_hamkq%ph1d,&
&             ph3d,ph3d,gs_hamkq%pspso,signs,&
&             nonlop_dum,nonlop_dum,tim_nonlop,gs_hamkq%ucvol,&
&             gs_hamkq%useylm,cwave0,gvnl1)
  deallocate(enlout)

! Kinetic contribution. Remember that npw=npw1 for ddk perturbation
  do ispinor=1,nspinor
!$OMP PARALLEL DO PRIVATE(ipw,ipws) &
!$OMP&SHARED(cwave0,ispinor,gvnl1,dkinpw,kinpw1,npw,nspinor)
   do ipw=1,npw
    ipws=ipw+npw*(ispinor-1)
    if(kinpw1(ipw)<huge(0.0_dp)*1.d-11)then
     gvnl1(1,ipws)=gvnl1(1,ipws)+dkinpw(ipw)*cwave0(1,ipws)
     gvnl1(2,ipws)=gvnl1(2,ipws)+dkinpw(ipw)*cwave0(2,ipws)
    else
     gvnl1(1,ipws)=0.0_dp
     gvnl1(2,ipws)=0.0_dp
    end if
   end do
!$OMP END PARALLEL DO
  end do

 else if(ipert==natom+2)then

! Apply the local SCF potential operator to cwave0
  weight=1.0_dp ; tim_fourwf=4
  call fourwf(cplex,vlocal1,cwave0,gh1,wfraug,gbound,gs_hamkq%gbound,&
&  istwf_k,kg_k,kg1_k,mgfft,mpi_enreg,1,gs_hamkq%ngfft,&
&  npw,npw1,n4,n5,n6,2,tim_fourwf,weight)

  if(nspinor==2)then
   allocate(cwave0_sp(2,npw),gh1_sp(2,npw1))
!$OMP PARALLEL DO PRIVATE(ipw) &
!$OMP&SHARED(cwave0,cwave0_sp,npw)
   do ipw=1,npw
    cwave0_sp(1,ipw)=cwave0(1,ipw+npw)
    cwave0_sp(2,ipw)=cwave0(2,ipw+npw)
   end do
!$OMP END PARALLEL DO
   call fourwf(cplex,vlocal1,cwave0_sp,gh1_sp,wfraug,gbound,gs_hamkq%gbound,&
&   istwf_k,kg_k,kg1_k,mgfft,mpi_enreg,1,gs_hamkq%ngfft,&
&   npw,npw1,n4,n5,n6,2,tim_fourwf,weight)
!$OMP PARALLEL DO PRIVATE(ipw) &
!$OMP&SHARED(gh1,gh1_sp,npw1)
   do ipw=1,npw1
    gh1(1,ipw+npw1)=gh1_sp(1,ipw)
    gh1(2,ipw+npw1)=gh1_sp(2,ipw)
   end do
!$OMP END PARALLEL DO
   deallocate(cwave0_sp,gh1_sp)
  end if

! gvnl1 was already initialized in the calling routine, by reading a ddk file

!section for strain perturbation
 else if(ipert==natom+3 .or. ipert==natom+4)then
  if(ipert==natom+3) then
   istr=idir
  else
   istr = idir+3
  end if

! Apply the local SCF potential operator to cwave0
  weight=1.0_dp ; tim_fourwf=4
  call fourwf(cplex,vlocal1,cwave0,gh1,wfraug,gbound,gs_hamkq%gbound,&
&  istwf_k,kg_k,kg1_k,mgfft,mpi_enreg,1,gs_hamkq%ngfft,&
&  npw,npw1,n4,n5,n6,2,tim_fourwf,weight)

  if(nspinor==2)then
   allocate(cwave0_sp(2,npw),gh1_sp(2,npw1))
!$OMP PARALLEL DO PRIVATE(ipw) &
!$OMP&SHARED(cwave0,cwave0_sp,npw)
   do ipw=1,npw
    cwave0_sp(1,ipw)=cwave0(1,ipw+npw)
    cwave0_sp(2,ipw)=cwave0(2,ipw+npw)
   end do
!$OMP END PARALLEL DO
   call fourwf(cplex,vlocal1,cwave0_sp,gh1_sp,wfraug,gbound,gs_hamkq%gbound,&
&   istwf_k,kg_k,kg1_k,mgfft,mpi_enreg,1,gs_hamkq%ngfft,npw,npw1,n4,n5,n6,2,tim_fourwf,weight)
!$OMP PARALLEL DO PRIVATE(ipw) &
!$OMP&SHARED(gh1,gh1_sp,npw1)
   do ipw=1,npw1
    gh1(1,ipw+npw1)=gh1_sp(1,ipw)
    gh1(2,ipw+npw1)=gh1_sp(2,ipw)
   end do
!$OMP END PARALLEL DO
   deallocate(cwave0_sp,gh1_sp)
  end if

! Non-local (remember, q=0, so can take all RF data)
! copied from d/dk above; changes may be needed; tim_nonlop may need changing
  signs=2 ; choice=3 ; nnlout=6 ; tim_nonlop=8
  cpopt=-1 ; paw_opt=0 ; allocate(enlout(nnlout))
  call nonlop(gs_hamkq%atindx1,choice,cpopt,cprj_dum,gs_hamkq%dimekb1,gs_hamkq%dimekb2,&
&             dimffnl1,dimffnl1,gs_hamkq%ekb,enlout,ffnl1,ffnl1,&
&             gs_hamkq%gmet,gs_hamkq%gprimd,istr,gs_hamkq%indlmn,istwf_k,&
&             kg1_k,kg1_k,kpg1_k,kpg1_k,gs_hamkq%kpoint,gs_hamkq%kpoint,dum,lmnmax,matblk,&
&             mgfft,mpi_enreg,mpsang,mpssoang,natom,gs_hamkq%nattyp,&
&             gs_hamkq%ngfft,nkpg1,nkpg1,gs_hamkq%nloalg,nnlout,npw1,npw1,&
&             nspinor,ntypat,paw_opt,gs_hamkq%phkxred,gs_hamkq%phkxred,gs_hamkq%ph1d,&
&             ph3d,ph3d,gs_hamkq%pspso,signs,&
&             nonlop_dum,nonlop_dum,tim_nonlop,gs_hamkq%ucvol,&
&             gs_hamkq%useylm,cwave0,gvnl1)
  deallocate(enlout)

!DEBUG
! write(6,'(a,2i6,2d24.16)') ('cgwf3:gvnl1',band,ii,&
!&  gvnl1(1,ii),gvnl1(2,ii),ii=1,npw1,39)
!ENDDEBUG

! Kinetic contribution. Remember that npw=npw1 for strain perturbation

  do ispinor=1,nspinor
!$OMP PARALLEL DO PRIVATE(ipw,ipws) &
!$OMP&SHARED(cwave0,ispinor,gvnl1,dkinpw,kinpw1,npw,nspinor)
   do ipw=1,npw
    ipws=ipw+npw*(ispinor-1)
    if(kinpw1(ipw)<huge(0.0_dp)*1.d-11)then
     gvnl1(1,ipws)=gvnl1(1,ipws)+dkinpw(ipw)*cwave0(1,ipws)
     gvnl1(2,ipws)=gvnl1(2,ipws)+dkinpw(ipw)*cwave0(2,ipws)
    else
     gvnl1(1,ipws)=0.0_dp
     gvnl1(2,ipws)=0.0_dp
    end if
   end do
!$OMP END PARALLEL DO
  end do

! end section for strain perturbation
 end if

!This is the result of the application of H(1) to the 0-order wf.
!$OMP PARALLEL DO PRIVATE(ipw) &
!$OMP&SHARED(gh1,gvnl1,npw1,nspinor)
 do ipw=1,npw1*nspinor
  gh1(1,ipw)=gh1(1,ipw)+gvnl1(1,ipw)
  gh1(2,ipw)=gh1(2,ipw)+gvnl1(2,ipw)
 end do

!DEBUG
! write(6,'(a,2i6,2d24.16)') ('cgwf3:gh1',band,ii,&
!&  gh1(1,ii),gh1(2,ii),ii=1,npw1,39)
!ENDDEBUG
!$OMP END PARALLEL DO

!DEBUG
!write(6,*)' cgwf3 : eshift=',eshift
!write(6,*)' cgwf3 : debug, ipw, gh1(:,ipw), gvnl1(:,ipw)'
!do ipw=1,npw1*nspinor
! write(6, '(i4,4es16.6)' )ipw,gh1(:,ipw),gvnl1(:,ipw)
!end do
!stop
!ENDDEBUG

! End of application of H(1) to phi(0)
!*************************************************************************

!Projecting out all bands (this could be avoided).
 allocate(dummy0(2,0),scprod(2,nband))
 tim_projbd=2
 call projbd(cgq,gh1,-1,icgq,0,istwf_k,mcgq,mpi_enreg,0,nband,npw1*nspinor,&
&            ortalg,print,dummy0,scprod,tim_projbd,0)
!The array eig1_k contains  <u_(iband,k+q)^(0)|H_(k+q,k)^(1)|u_(band,k)^(0)>
 do iband=1,nband
  eig1_k(2*iband-1 +(band-1)*2*nband)=scprod(1,iband)
  eig1_k(2*iband   +(band-1)*2*nband)=scprod(2,iband)
 end do

!Filter the wavefunctions for large modified kinetic energy (see routine mkkin.f)
 do ispinor=1,nspinor
  ipws=(ispinor-1)*npw1
!$OMP PARALLEL DO PRIVATE(ipw) &
!$OMP&SHARED(cwavef,gh1,kinpw1,ipws,npw1)
  do ipw=1+ipws,npw1+ipws
   if(kinpw1(ipw-ipws)>huge(0.0_dp)*1.d-11)then
    gh1(1,ipw)=0.0_dp
    gh1(2,ipw)=0.0_dp
    cwavef(1,ipw)=0.0_dp
    cwavef(2,ipw)=0.0_dp
   end if
  end do
!$OMP END PARALLEL DO
 end do

!Project out all bands from cwavef (this is needed when there are some
!partially or unoccupied states)
 tim_projbd=2
 call projbd(cgq,cwavef,-1,icgq,0,istwf_k,mcgq,mpi_enreg,0,nband,npw1*nspinor,&
&            ortalg,print,dummy0,scprod,tim_projbd,0)

!Treat the case of buffer bands
 if(band>max(1,nband-nbdbuf))then
!$OMP PARALLEL DO PRIVATE(ipw) &
!$OMP&SHARED(cwavef,ghc,gvnlc,npw1,nspinor)
  do ipw=1,npw1*nspinor
   cwavef(1,ipw)=zero
   cwavef(2,ipw)=zero
   ghc(1,ipw)  =zero
   ghc(2,ipw)  =zero
   gvnlc(1,ipw)=zero
   gvnlc(2,ipw)=zero
  end do
!$OMP END PARALLEL DO
! A small negative residual will be associated with these
  resid=-0.1_dp
! Number of one-way 3D ffts skipped
  nskip=nskip+nline

 else
! If not a buffer band, perform the optimisation

  allocate(conjgr(2,npw1*nspinor),direc(2,npw1*nspinor))
  allocate(gresid(2,npw1*nspinor))
  allocate(cwaveq(2,npw1*nspinor))

  cwaveq(:,:)=cgq(:,1+npw1*nspinor*(band-1)+icgq:npw1*nspinor*band+icgq)

  dotgp=1.0_dp

!$OMP PARALLEL DO PRIVATE(ipw) &
!$OMP&SHARED(conjgr,npw1,nspinor)
  do ipw=1,npw1*nspinor
   conjgr(1,ipw)=0.0_dp
   conjgr(2,ipw)=0.0_dp
  end do
!$OMP END PARALLEL DO

  if(prtvol<0 .or. test*prtvol/=0)then
   call status(0,filstat,iexit,level,'call getghc(1)')
  end if

! Here apply H(0) at k+q to input orthogonalized 1st-order wfs
  call getghc(cwavef,dimffnl1,ffnl1,filstat,&
&  ghc,gsc_dummy,gs_hamkq,gvnlc,kg1_k,kinpw1,lmnmax,matblk,&
&  mgfft,mpi_enreg,mpsang,mpssoang,natom,1,npw1,nspinor,ntypat,nvloc,n4,n5,n6,&
&  ph3d,prtvol,0,tim_getghc,vlocal)

!DEBUG
!  call sqnorm_g(prod1,istwf_k,mpi_enreg,npw1*nspinor,ghc)
!  call sqnorm_g(prod2,istwf_k,mpi_enreg,npw1*nspinor,cwavef)
!  write(6,*)' cgwf3: ghc2,cwavef2=',prod1,prod2
!  stop
!ENDDEBUG

! In cgwf3, ghc also includes the eigenvalue shift
!$OMP PARALLEL DO PRIVATE(ipw) &
!$OMP&SHARED(cwavef,eshift,ghc,npw1,nspinor)
  do ipw=1,npw1*nspinor
   ghc(1,ipw)=ghc(1,ipw)-eshift*cwavef(1,ipw)
   ghc(2,ipw)=ghc(2,ipw)-eshift*cwavef(2,ipw)
  end do
!$OMP END PARALLEL DO

  if(prtvol<0 .or. test*prtvol/=0)then
   call status(0,filstat,iexit,level,'after getghc(1')
  end if

! Initialize resid, in case of nline==0
  resid=zero

!----------------------------------
!    Begin main do-loop           !
!----------------------------------

  do iline=1,nline

!DEBUG
!  write(6,*)' cgwf3 : iline=',iline
!ENDDEBUG

   if(prtvol<0 .or. test*prtvol/=0)then
    call status(iline,filstat,iexit,level,'loop iline    ')
   end if

!DEBUG
!  call sqnorm_g(prod1,istwf_k,mpi_enreg,npw1*nspinor,ghc)
!  call sqnorm_g(prod2,istwf_k,mpi_enreg,npw1*nspinor,gh1)
!  write(6,*)' cgwf3: prod1,prod2=',prod1,prod2
!ENDDEBUG

!  Compute residual
!  Note that gresid (=steepest-descent vector, Eq.(26))
!  is precomputed to garantee cancellation of errors
!  and allow residuals to reach values as small as 1.0d-24 or better.


if (berryopt == 4) then
!$OMP PARALLEL DO PRIVATE(ipw) &
!$OMP&SHARED(ghc,gh1,npw1,nspinor,gresid,grad_berry,band)
 do ipw=1,npw1*nspinor
  gresid(1,ipw)=-ghc(1,ipw)-gh1(1,ipw)+grad_berry(2,ipw,band)
  gresid(2,ipw)=-ghc(2,ipw)-gh1(2,ipw)-grad_berry(1,ipw,band)
 end do
!$OMP END PARALLEL DO
else
!$OMP PARALLEL DO PRIVATE(ipw) &
!$OMP&SHARED(ghc,gh1,npw1,nspinor,gresid)
   do ipw=1,npw1*nspinor
    gresid(1,ipw)=-ghc(1,ipw)-gh1(1,ipw)
    gresid(2,ipw)=-ghc(2,ipw)-gh1(2,ipw)
   end do
!$OMP END PARALLEL DO
end if




!  Project all bands from gresid into direc

   if(prtvol<0 .or. test*prtvol/=0)then
    call status(iline,filstat,iexit,level,'projbd(1)     ')
   end if

!  The following projection over the subspace orthogonal to occupied bands
!  is not optional in the RF case, unlike the GS case.
!  However, the order of operations could be changed, so that
!  as to make it only applied at the beginning, to H(1) psi(0),
!  so, THIS IS TO BE REEXAMINED
   print=0
   if(prtvol==-level)print=1
   tim_projbd=2
   call projbd(cgq,gresid,-1,icgq,0,istwf_k,mcgq,mpi_enreg,0,nband,npw1*nspinor,&
&              ortalg,print,dummy0,scprod,tim_projbd,0)

!$OMP PARALLEL DO PRIVATE(ipw) &
!$OMP&SHARED(direc,npw1,nspinor,gresid)
   do ipw=1,npw1*nspinor
    direc(1,ipw)=gresid(1,ipw)
    direc(2,ipw)=gresid(2,ipw)
   end do
!$OMP END PARALLEL DO

!  Compute second-order derivative of the energy using a variational
!  expression
   call dotprod_g(prod1,doti,istwf_k,mpi_enreg,npw1*nspinor,1,cwavef,gresid)
   call dotprod_g(prod2,doti,istwf_k,mpi_enreg,npw1*nspinor,1,cwavef,gh1)
   d2te=2.0_dp*(-prod1+prod2)

!DEBUG
!  write(6,*)' cgwf3: prod1,prod2=',prod1,prod2
!ENDDEBUG

!  Compute <u_m(1)|H(0)-e_m(0)|u_m(1)>, that should be positive,
!  except when the eigenvalue eig_mk(0) is higher than
!  the lowest non-treated eig_mk+q(0). For insulators, this
!  has no influence, but for metallic occupations,
!  the conjugate gradient algorithm breaks down. The solution adopted here
!  is very crude, and rely upon the fact that occupancies of such
!  levels should be smaller and smaller with increasing nband, so that
!  a convergence study will give the right result.
!  The same trick is also used later.
   u1h0me0u1=-prod1-prod2
   if(u1h0me0u1<0.0_dp)then
!$OMP PARALLEL DO PRIVATE(ipw) &
!$OMP&SHARED(cwavef,ghc,gvnlc,npw1,nspinor)
    do ipw=1,npw1*nspinor
     cwavef(1,ipw)=0.0_dp
     cwavef(2,ipw)=0.0_dp
     ghc(1,ipw)  =0.0_dp
     ghc(2,ipw)  =0.0_dp
     gvnlc(1,ipw)=0.0_dp
     gvnlc(2,ipw)=0.0_dp
    end do
!$OMP END PARALLEL DO
!   A negative residual will be the signal of this problem ...
    resid=-1.0_dp
    write(message, '(a)' )&
&    ' cgwf3: problem of minimisation (likely metallic), set resid to -1'
    call wrtout(06,message,'PERS')
!   Number of one-way 3D ffts skipped
    nskip=nskip+(nline-iline+1)
!   Exit from the loop on iline
    exit
   end if

!  Compute residual
   call sqnorm_g(resid,istwf_k,mpi_enreg,npw1*nspinor,gresid)

   if( prtvol==-level .or. test*prtvol/=0 )then
    write(message,'(a,a,i3,f14.6,a,a,4es12.4)') ch10,&
&      ' cgwf3 : iline,eshift     =',iline,eshift,ch10,&
&      '         resid,prod1,prod2,d2te=',resid,prod1,prod2,d2te
    call wrtout(06,message,'PERS')
   end if

!  If residual sufficiently small stop line minimizations
   if (resid<tolwfr) then
    if(prtvol>=10)then
     write(message, '(a,i4,a,i2,a,es12.4)' ) &
&     ' cgwf3: band',band,' converged after ',iline,&
&     ' line minimizations : resid =',resid
     call wrtout(06,message,'PERS')
    end if
!   Number of two-way 3D ffts skipped
    nskip=nskip+(nline-iline+1)
!   Exit from the loop on iline
    exit
   end if

!  If user require exiting the job, stop line minimisations
   if (quit==1) then
    write(message, '(a,i4)' ) &
&     ' cgwf3: user require exiting => skip update of band ',band
    call wrtout(06,message,'PERS')
!   Number of two-way 3D ffts skipped
    nskip=nskip+(nline-iline+1)
!   Exit from the loop on iline
    exit
   end if

!  Check that d2te is decreasing on succeeding lines:
   if (iline/=1) then
    if (d2te>d2teold+1.d-12) then
     write(message, '(a,a,a,i8,a,1p,e14.6,a1,3x,a,1p,e14.6,a1)')&
&      ' cgwf3: WARNING -',ch10,&
&      '  New trial energy at line',iline,' = ',d2te,ch10,&
&      '  is higher than former:',d2teold,ch10
     call wrtout(06,message,'PERS')
    end if
   end if
   d2teold=d2te

   if(prtvol<0 .or. test*prtvol/=0)then
    call status(iline,filstat,iexit,level,'call precon   ')
   end if

!DEBUG Keep this debugging feature !
!  call sqnorm_g(dotr,istwf_k,mpi_enreg,npw1*nspinor,direc)
!  write(6,*)' cgwf3 : before precon, direc**2=',dotr
!  call sqnorm_g(dotr,istwf_k,mpi_enreg,npw1*nspinor,cwaveq)
!  write(6,*)' cgwf3 : before precon, cwaveq**2=',dotr
!ENDDEBUG

! Precondition each component of direc
   call precon(cwaveq,0.0_dp,istwf_k,kinpw1,mpi_enreg,npw1,nspinor,0,pcon,direc)

!DEBUG Keep this debugging feature !
!  call sqnorm_g(dotr,istwf_k,mpi_enreg,npw1*nspinor,direc)
!  write(6,*)' cgwf3 : after precon, direc**2=',dotr
!ENDDEBUG

   if(prtvol<0 .or. test*prtvol/=0)then
    call status(0,filstat,iexit,level,'prjbd(2)      ')
   end if

!  Projecting again out all bands.
   tim_projbd=2
   call projbd(cgq,direc,-1,icgq,0,istwf_k,mcgq,mpi_enreg,0,nband,npw1*nspinor,&
&              ortalg,print,dummy0,scprod,tim_projbd,0)

!DEBUG Keep this debugging feature !
!  call sqnorm_g(dotr,istwf_k,mpi_enreg,npw1*nspinor,direc)
!  write(6,*)' cgwf3 : after projbd, direc**2=',dotr
!ENDDEBUG

!  Compute the conjugate gradient, not normalized

!  get dot of direction vector with residual vector
   call dotprod_g(dotgg,doti,istwf_k,mpi_enreg,npw1*nspinor,1,direc,gresid)

   gamma=dotgg/dotgp
   dotgp=dotgg
!  h = g + gamma * h
!$OMP PARALLEL DO PRIVATE(ipw) &
!$OMP&SHARED(conjgr,direc,gamma,npw1,nspinor)
   do ipw=1,npw1*nspinor
    conjgr(1,ipw)=direc(1,ipw)+gamma*conjgr(1,ipw)
    conjgr(2,ipw)=direc(2,ipw)+gamma*conjgr(2,ipw)
   end do
!$OMP END PARALLEL DO

   if(prtvol==-level .or. test*prtvol/=0)then
    write(message,'(a,2es16.6)') 'cgwf3: dotgg,gamma =',dotgg,gamma
    call wrtout(06,message,'PERS')
   end if

   if(prtvol==-level)then
    write(message,'(a)') &
&    'cgwf3: conjugate direction has been found'
    call wrtout(06,message,'PERS')
   end if

!---------------
!  The search direction vector (in conjgr) has been found
!  Now compute first and second derivative of energy
!    along the search direction
!---------------

!  Compute dedt, Eq.(29), with an additional factor of 2 for the difference
!  between E(2) and the 2DTE
   call dotprod_g(dedt,doti,istwf_k,mpi_enreg,npw1*nspinor,1,conjgr,gresid)
   dedt=-2.0_dp*2.0_dp*dedt

   if(prtvol<0 .or. test*prtvol/=0)then
    call status(iline,filstat,iexit,level,'call getghc(2)')
   end if

   call getghc(conjgr,dimffnl1,ffnl1,filstat,gh_direc,gsc_dummy,gs_hamkq,gvnl_direc,&
&   kg1_k,kinpw1,lmnmax,matblk,mgfft,mpi_enreg,mpsang,mpssoang,&
&   natom,1,npw1,nspinor,ntypat,nvloc,n4,n5,n6,&
&   ph3d,prtvol,0,tim_getghc,vlocal)

!$OMP PARALLEL DO PRIVATE(ipw) &
!$OMP&SHARED(conjgr,eshift,gamma,gh_direc,npw1,nspinor)
   do ipw=1,npw1*nspinor
    gh_direc(1,ipw)=gh_direc(1,ipw)-eshift*conjgr(1,ipw)
    gh_direc(2,ipw)=gh_direc(2,ipw)-eshift*conjgr(2,ipw)
   end do
!$OMP END PARALLEL DO

!  compute d2edt2, Eq.(30), with an additional factor of 2 for the difference
!  between E(2) and the 2DTE, and neglect of local fields (SC terms)
   call dotprod_g(d2edt2,doti,istwf_k,mpi_enreg,npw1*nspinor,1,conjgr,gh_direc)
   d2edt2=2.0_dp*2.0_dp*d2edt2

   if(prtvol<0 .or. test*prtvol/=0)then
    call status(iline,filstat,iexit,level,'after getghc(2')
   end if

   if(prtvol==-level .or. test*prtvol/=0)then
    write(message,'(a,2es14.6)') 'cgwf3: dedt,d2edt2=',dedt,d2edt2
    call wrtout(06,message,'PERS')
   end if

!---------------
!  Compute the mixing factor, see Eq.(31) of PRB55, 10337 (1997)
!---------------


   if(d2edt2<0.0_dp)then
!   This may happen when the eigenvalue eig_mk(0) is higher than
!   the lowest non-treated eig_mk+q(0). The solution adopted here
!   is very crude, and rely upon the fact that occupancies of such
!   levels should be smaller and smaller with increasing nband, so that
!   a convergence study will give the right result.
    theta=0.0_dp
!$OMP PARALLEL DO PRIVATE(ipw) &
!$OMP&SHARED(cwavef,ghc,gvnlc,npw1,nspinor)
    do ipw=1,npw1*nspinor
     cwavef(1,ipw)=0.0_dp
     cwavef(2,ipw)=0.0_dp
     ghc(1,ipw)  =0.0_dp
     ghc(2,ipw)  =0.0_dp
     gvnlc(1,ipw)=0.0_dp
     gvnlc(2,ipw)=0.0_dp
    end do
!$OMP END PARALLEL DO
!   A negative residual will be the signal of this problem ...
    resid=-2.0_dp
    write(message, '(a)' )&
&    ' cgwf3: problem of minimisation (likely metallic), set resid to -2'
    call wrtout(06,message,'PERS')
   else
!   Here, the value of theta that gives the minimum
    theta=-dedt/d2edt2
!DEBUG
!   write(6,*)' cgwf3: dedt,d2edt2=',dedt,d2edt2
!ENDDEBUG
   end if

!  Check that result is above machine precision
   if (1.0_dp+theta==1.0_dp) then
    write(message, '(a,es16.4)' ) ' cgwf3: converged with theta=',theta
    call wrtout(06,message,'PERS')
!   Number of one-way 3D ffts skipped
    nskip=nskip+2*(nline-iline)
!   Exit from the loop on iline
    exit
   end if

!----------
!  Mixing factor has been found
!  Generate new wavefunction, new H|wf>, new Vnl|wf>
!----------

!$OMP PARALLEL DO PRIVATE(ipw) &
!$OMP&SHARED(conjgr,cwavef,ghc,gh_direc,gvnlc,gvnl_direc,npw1,nspinor,theta)
   do ipw=1,npw1*nspinor
    cwavef(1,ipw)=cwavef(1,ipw)+conjgr(1,ipw)*theta
    cwavef(2,ipw)=cwavef(2,ipw)+conjgr(2,ipw)*theta
    ghc(1,ipw)  =ghc(1,ipw)  + gh_direc(1,ipw)*theta
    ghc(2,ipw)  =ghc(2,ipw)  + gh_direc(2,ipw)*theta
    gvnlc(1,ipw)=gvnlc(1,ipw)+ gvnl_direc(1,ipw)*theta
    gvnlc(2,ipw)=gvnlc(2,ipw)+ gvnl_direc(2,ipw)*theta
   end do
!$OMP END PARALLEL DO

!----------
!  Check reduction in trial energy deltae Eq.(28)
!----------

   deltae=0.5_dp*d2edt2*theta**2+theta*dedt

   if (iline==1) then
    deold=deltae
!  Use a lower value for comparison with RESPFN
!   cgwftol=tol14
    cgwftol=0.01_dp   ! Value used until v3.3
   else if (abs(deltae)<cgwftol*abs(deold) .and. iline/=nline ) then
!  else if (abs(deltae)<0.005_dp*abs(deold) .and. iline/=nline ) then
    if(prtvol>=10)then
     write(message, '(a,i4,1x,a,1p,e12.4,a,e12.4,a)' ) &
&     ' cgwf3: line',iline,&
&     ' deltae=',deltae,' < cgwftol*',deold,' =>skip lines'
     call wrtout(06,message,'PERS')
    end if
!   Number of one-way 3D ffts skipped
    nskip=nskip+2*(nline-iline)
!   Exit from the loop on iline
    exit
   end if

! End main loop, on iline. Note that there are three "exit" instruction
! inside the loop.
  end do ! iline

  deallocate(conjgr,cwaveq,direc,gresid)

!End condition of not being a buffer band
 end if

!At the end of the treatment of a set of bands, write the number
!of one-way 3D ffts skipped
#if !defined MPI
 if(band==nband .and. prtvol>=10)then
  write(message, '(a,i8)' )&
&  ' cgwf3: number of one-way 3D ffts skipped in cgwf3 until now =',nskip
  call wrtout(06,message,'PERS')
 end if
#endif

 if(prtvol<0 .or. test*prtvol/=0)then
  call status(0,filstat,iexit,level,'after iline   ')
 end if

 deallocate(dummy0,gh_direc,gh1,gvnl_direc,pcon,scprod)

!Structured debugging : if prtvol=-level, stop here.
 if(prtvol==-level)then
  write(message,'(a1,a,a1,a,i2,a)') ch10,&
&   ' cgwf3 : exit ',&
&   ch10,'  prtvol=-',level,', debugging mode => stop '
  call wrtout(06,message,'PERS')
  call leave_new('PERS')
 end if

 call timab(122,2,tsec)

 if(prtvol<0 .or. test*prtvol/=0)then
  call status(0,filstat,iexit,level,'exit          ')
 end if

!DEBUG
!write(6,*)' cgwf3 : exit'
!ENDDEBUG

end subroutine cgwf3
!!***
