subroutine pgmres ( n, im, rhs, sol, vv, eps, maxits, iout, &
  aa, ja, ia, alu, jlu, ju, ierr )

!*****************************************************************************80
!
!! PGMRES is an ILUT - Preconditioned GMRES solver.
!                                                                      
!  Discussion:
!
!    This is a simple version of the ILUT preconditioned GMRES algorithm. 
!    The ILUT preconditioner uses a dual strategy for dropping elements   
!    instead  of the usual level of-fill-in approach. See details in ILUT 
!    subroutine documentation. PGMRES uses the L and U matrices generated 
!    from the subroutine ILUT to precondition the GMRES algorithm.        
!    The preconditioning is applied to the right. The stopping criterion  
!    utilized is based simply on reducing the residual norm by epsilon.   
!    This preconditioning is more reliable than ilu0 but requires more    
!    storage. It seems to be much less prone to difficulties related to   
!    strong nonsymmetries in the matrix. We recommend using a nonzero tol 
!    (tol=.005 or .001 usually give good results) in ILUT. Use a large    
!    lfil whenever possible (e.g. lfil = 5 to 10). The higher lfil the    
!    more reliable the code is. Efficiency may also be much improved.     
!    Note that lfil=n and tol=0.0 in ILUT  will yield the same factors as 
!    Gaussian elimination without pivoting.                               
!                                                                      
!    ILU(0) and MILU(0) are also provided for comparison purposes         
!    USAGE: first call ILUT or ILU0 or MILU0 to set up preconditioner and 
!    then call pgmres.                                                    
!
!  Modified:
!
!    07 January 2004
!
!  Author:
!
!    Youcef Saad              
!
!  Parameters:
!
!    Input, integer N, the order of the matrix.
!
!    Input, integer IM, the size of the Krylov subspace.  IM should not 
!    exceed 50 in this version.  This restriction can be reset by changing
!    the parameter command for KMAX below.
!                                                
!    Input/output, real RHS(N), on input, the right hand side vector.
!    On output, the information in this vector has been destroyed.                                      
!
! sol   == real vector of length n containing an initial guess to the  
!          solution on input. approximate solution on output           
!
! eps   == tolerance for stopping criterion. process is stopped        
!          as soon as ( ||.|| is the euclidean norm):                  
!          || current residual||/||initial residual|| <= eps           
!
! maxits== maximum number of iterations allowed                        
!
! iout  == output unit number number for printing intermediate results 
!          if (iout <= 0) nothing is printed out.                    
!                                                                      
!    Input, real AA(*), integer JA(*), IA(N+1), the matrix in CSR
!    Compressed Sparse Row format.
!
!                                                                      
! alu,jlu== A matrix stored in Modified Sparse Row format containing   
!           the L and U factors, as computed by routine ilut.       
!                                                                      
! ju     == integer array of length n containing the pointers to       
!           the beginning of each row of U in alu, jlu as computed     
!           by routine ILUT.                                        
!                                                                      
! on return:                                                           
!                                                          
! sol   == contains an approximate solution (upon successful return).  
! ierr  == integer. Error message with the following meaning.          
!          ierr = 0 --> successful return.                            
!          ierr = 1 --> convergence not achieved in itmax iterations. 
!          ierr =-1 --> the initial guess seems to be the exact        
!                       solution (initial residual computed was zero) 
!                                                                      
! work arrays:                                                        
!                                                       
! vv    == work array of length  n x (im+1) (used to store the Arnoli  
!          basis)                                                      
!
  implicit none

 ! integer, parameter :: kmax = 50
  integer, parameter :: kmax = 50
  integer n

  real ( kind = 8 ) aa(*)
  real ( kind = 8 ) alu(*)
  real ( kind = 8 ) c(kmax)
  real ( kind = 8 ) ddot
  real ( kind = 8 ) eps
  real ( kind = 8 ) eps1
  real ( kind = 8 ), parameter :: epsmac = 1.0D-16
  real ( kind = 8 ) gam
  real ( kind = 8 ) hh(kmax+1,kmax)
  integer i
  integer i1
  integer ia(n+1)
  integer ierr
  integer ii
  integer im
  integer iout
  integer its
  integer j
  integer ja(*)
  integer jj
  integer jlu(*)
  integer ju(*)
  integer k
  integer k1
  integer maxits
  integer n1
  real ( kind = 8 ) rhs(n)
  real ( kind = 8 ) ro
  real ( kind = 8 ) rs(kmax+1)
  real ( kind = 8 ) s(kmax)
  real ( kind = 8 ) sol(n)
  real ( kind = 8 ) t
  real ( kind = 8 ) vv(n,*)
!
!  Arnoldi size should not exceed KMAX=50 in this version.
!  To reset modify parameter KMAX accordingly.
!
  n1 = n + 1
  its = 0
!
!  Outer loop starts here.
!  Compute initial residual vector.
!
  call ope ( n, sol, vv, aa, ja, ia )

  vv(1:n,1) = rhs(1:n) - vv(1:n,1)

  do

    ro = sqrt ( ddot ( n, vv, 1, vv, 1 ) )

    if ( 0 < iout .and. its == 0 ) then
      write(iout, 199) its, ro
    end if

    if ( ro == 0.0D+00 ) then
      ierr = -1
      exit
    end if

    t = 1.0D+00 / ro
    vv(1:n,1) = vv(1:n,1) * t

    if ( its == 0 ) then
      eps1 = eps * ro
    end if
!
!  Initialize first term of RHS of Hessenberg system.
!
     rs(1) = ro
     i = 0

 4   continue

     i = i + 1
     its = its + 1
     i1 = i + 1
     call lusol0 ( n, vv(1,i), rhs, alu, jlu, ju )
     call ope ( n, rhs, vv(1,i1), aa, ja, ia )
!
!  Modified Gram - Schmidt.
!
     do j = 1, i
       t = ddot ( n, vv(1,j), 1, vv(1,i1), 1 )
       hh(j,i) = t
       call daxpy ( n, -t, vv(1,j), 1, vv(1,i1), 1 )
     end do

     t = sqrt ( ddot ( n, vv(1,i1), 1, vv(1,i1), 1 ) )
     hh(i1,i) = t

     if ( t /= 0.0D+00 ) then
       t = 1.0D+00 / t
       vv(1:n,i1) = vv(1:n,i1) * t
     end if
!
!  Update factorization of HH.
!
    if ( i == 1 ) then
      go to 121
    end if
!
!  Perform previous transformations on I-th column of H.
!
    do k = 2, i
       k1 = k-1
       t = hh(k1,i)
       hh(k1,i) = c(k1) * t + s(k1) * hh(k,i)
       hh(k,i) = -s(k1) * t + c(k1) * hh(k,i)
    end do

121 continue

    gam = sqrt ( hh(i,i)**2 + hh(i1,i)**2 )
!
!  If GAMMA is zero then any small value will do.
!  It will affect only residual estimate.
!
    if ( gam == 0.0D+00 ) then
      gam = epsmac
    end if
!
!  Get the next plane rotation.
!
    c(i) = hh(i,i) / gam
    s(i) = hh(i1,i) / gam
    rs(i1) = -s(i) * rs(i)
    rs(i) = c(i) * rs(i)
!
!  Determine residual norm and test for convergence.
!
    hh(i,i) = c(i) * hh(i,i) + s(i) * hh(i1,i)
    ro = abs ( rs(i1) )
131 format(1h ,2e14.4)

    if ( 0 < iout ) then
      write(iout, 199) its, ro
    end if

    if ( i < im .and. eps1 < ro ) then
      go to 4
    end if
!
!  Now compute solution.  First solve upper triangular system.
!
    rs(i) = rs(i) / hh(i,i)

    do ii = 2, i
      k = i - ii + 1
      k1 = k + 1
      t = rs(k)
      do j = k1, i
        t = t - hh(k,j) * rs(j)
      end do
      rs(k) = t / hh(k,k)
    end do
!
!  Form linear combination of V(*,i)'s to get solution.
!
    t = rs(1)
    rhs(1:n) = vv(1:n,1) * t

    do j = 2, i
      t = rs(j)
      rhs(1:n) = rhs(1:n) + t * vv(1:n,j)
    end do
!
!  Call preconditioner.
!
    call lusol0 ( n, rhs, rhs, alu, jlu, ju )

    sol(1:n) = sol(1:n) + rhs(1:n)
!
!  Restart outer loop when necessary.
!
    if ( ro <= eps1 ) then
      ierr = 0
      exit
    end if

    if ( maxits < its ) then
      ierr = 1
      exit
    end if
!
!  Else compute residual vector and continue.
!
    do j = 1, i
      jj = i1 - j + 1
      rs(jj-1) = -s(jj-1) * rs(jj)
      rs(jj) = c(jj-1) * rs(jj)
    end do

    do j = 1, i1
      t = rs(j)
      if ( j == 1 ) then
        t = t - 1.0D+00
      end if
      call daxpy ( n, t, vv(1,j), 1,  vv, 1 )
    end do

199 format(' its =', i4, ' res. norm =', G14.6)

  end do

  return
end




subroutine lusol0 ( n, y, x, alu, jlu, ju )

!*****************************************************************************80
!
!! LUSOL0 performs a forward followed by a backward solve
! for LU matrix as produced by  ILUT
!
!  Modified:
!
!    07 January 2004
!
!  Author:
!
!    Youcef Saad
!
!  Parameters:
!
!    Input, integer N, the order of the matrix.
!
!    Input, real Y(N), the right hand side of the linear system.
!
!    Output, real X(N), the solution.
!
!    ALU, JLU, JU, ...
!
  implicit none

  integer n

  real ( kind = 8 ) alu(*)
  integer i
  integer jlu(*)
  integer ju(*)
  integer k
  real ( kind = 8 ) x(n)
  real ( kind = 8 ) y(n)
!
!  Forward solve
!
  do i = 1, n
    x(i) = y(i)
    do k = jlu(i), ju(i)-1
      x(i) = x(i) - alu(k) * x(jlu(k))
    end do
  end do
!
!  Backward solve.
!
  do i = n, 1, -1
    do k = ju(i), jlu(i+1)-1
      x(i) = x(i) - alu(k) * x(jlu(k))
    end do
    x(i) = alu(i) * x(i)
  end do

  return
end



subroutine ope ( n, x, y, a, ja, ia )

!*****************************************************************************80
!
!! OPE sparse matrix * vector multiplication
!
!  Modified:
!
!    07 January 2004
!
!  Author:
!
!    Youcef Saad
!
!  Parameters:
!
!    Input, integer N, the order of the matrix.
!
!    Input, real X(N), the vector to be multiplied.
!
!    Output, real Y(N), the product A * X.
!
!    Input, real A(*), integer JA(*), IA(N+1), the matrix in CSR
!    Compressed Sparse Row format.
!
  implicit none

  integer n

  real ( kind = 8 ) a(*)
  integer i
  integer ia(n+1)
  integer ja(*)
  integer k
  integer k1
  integer k2
  real ( kind = 8 ) x(n)
  real ( kind = 8 ) y(n)

  do i = 1, n
    k1 = ia(i)
    k2 = ia(i+1) -1
    y(i) = 0.0D+00
    do k = k1, k2
      y(i) = y(i) + a(k) * x(ja(k))
    end do
  end do

  return
end



subroutine ilut ( n, a, ja, ia, lfil, tol, alu, jlu, ju, iwk, wu, wl, jr, &
  jwl, jwu, ierr )

!*****************************************************************************80
!
!! ILUT is an ILUT preconditioner.
!
!  Discussion:
!
!    This routine carries ouot incomplete LU factorization with dual
!    truncation mechanism.  Sorting is done for both L and U. 
!
!    The dual drop-off strategy works as follows:
!
!    1) Theresholding in L and U as set by TOL.  Any element whose size
!       is less than some tolerance (relative to the norm of current
!       row in u) is dropped.
!         
!    2) Keeping only the largest lenl0+lfil elements in L and the
!       largest lenu0+lfil elements in U, where lenl0=initial number 
!       of nonzero elements in a given row of lower part of A 
!       and lenlu0 is similarly defined.
!  
!    Flexibility: one can use tol=0 to get a strategy based on keeping the
!    largest elements in each row of L and U. Taking tol /= 0 but lfil=n
!    will give the usual threshold strategy (however, fill-in is then     
!    unpredictible).                                                      
!
!    A must have all nonzero diagonal elements.
!
!  Modified:
!
!    07 January 2004
!
!  Author:
!
!    Youcef Saad
!                                                                      
!  Parameters:
!
!  Parameters:
!
!    Input, integer N, the order of the matrix.
!
!    Input, real A(*), integer JA(*), IA(N+1), the matrix in CSR
!    Compressed Sparse Row format.
!
! lfil    = integer. The fill-in parameter. Each row of L and
!           each row of U will have a maximum of lfil elements
!           in addition to the original number of nonzero elements.
!           Thus storage can be determined beforehand.
!           lfil must be >= 0.
!
! iwk     = integer. The minimum length of arrays alu and jlu
!
! On return:
!
! alu,jlu = matrix stored in Modified Sparse Row (MSR) format containing
!           the L and U factors together. The diagonal (stored in
!           alu(1:n) ) is inverted. Each i-th row of the alu,jlu matrix
!           contains the i-th row of L (excluding the diagonal entry=1)
!           followed by the i-th row of U.
!
! ju      = integer array of length n containing the pointers to
!           the beginning of each row of U in the matrix alu,jlu.
!
! ierr    = integer. Error message with the following meaning.
!           ierr  = 0    --> successful return.
!           ierr > 0  --> zero pivot encountered at step number ierr.
!           ierr  = -1   --> Error. input matrix may be wrong.
!                            (The elimination process has generated a
!                            row in L or U whose length is >  n.)
!           ierr  = -2   --> The matrix L overflows the array al.
!           ierr  = -3   --> The matrix U overflows the array alu.
!           ierr  = -4   --> Illegal value for lfil.
!           ierr  = -5   --> zero pivot encountered.
!
! work arrays:
!
! jr,jwu,jwl, integer work arrays of length n.
! wu, wl, real work arrays of length n+1, and n resp.
!
  implicit none

  integer n

  real ( kind = 8 ) a(*)
  real ( kind = 8 ) alu(*)
  real ( kind = 8 ) fact
  integer ia(n+1)
  integer idiag
  integer ierr
  integer ii
  integer iwk
  integer j 
  integer j1
  integer j2
  integer ja(*)
  integer jj
  integer jlu(*)
  integer jpos
  integer jr(*)
  integer jrow
  integer ju(*)
  integer ju0
  integer jwl(n)
  integer jwu(n)
  integer k
  integer len
  integer lenl
  integer lenl0
  integer lenu
  integer lenu0
  integer lfil
  integer nl
  real ( kind = 8 ) s
  real ( kind = 8 ) t
  real ( kind = 8 ) tnorm
  real ( kind = 8 ) tol
  real ( kind = 8 ) wl(n)
  real ( kind = 8 ) wu(n)

print*, 'debut ilut'

  if ( lfil < 0 ) then
    ierr = -4
    return
  end if
!
!  Initialize JU0 (points to next element to be added to ALU, JLU)
!  and pointer.
!
  ju0 = n + 2
  jlu(1) = ju0
!
!  Integer double pointer array.
!
  jr(1:n) = 0
!
!  The main loop.
!
  do ii = 1, n

    j1 = ia(ii)
    j2 = ia(ii+1) - 1
    lenu = 0
    lenl = 0

    tnorm = 0.0D+00
    do k = j1, j2
      tnorm = tnorm + abs ( a(k) )
    end do
    tnorm = tnorm / real ( j2-j1+1, kind = 8 )
!
!  Unpack L-part and U-part of row of A in arrays WL, WU.
!
    do j = j1, j2

      k = ja(j)
      t = a(j)

      if ( tol * tnorm <= abs ( t ) ) then

        if ( k < ii ) then
          lenl = lenl + 1
          jwl(lenl) = k
          wl(lenl) = t
          jr(k) = lenl
        else
          lenu = lenu+1
          jwu(lenu) = k
          wu(lenu) = t
          jr(k) = lenu
        end if

      end if

    end do

    lenl0 = lenl
    lenu0 = lenu
    jj = 0
    nl = 0
!
!  Eliminate previous rows.
!
150 continue

    jj = jj + 1

    if ( lenl < jj ) then
      go to 160
    end if
!
!  In order to do the elimination in the correct order we need to
!  exchange the current row number with the one that has
!  smallest column number, among JJ, JJ+1, ..., LENL.
!
    jrow = jwl(jj)
    k = jj
!
!  Determine the smallest column index.
!
    do j = jj+1, lenl
       if ( jwl(j) < jrow ) then
          jrow = jwl(j)
          k = j
       end if
    end do
!
!  Exchange in JWL.
!
    j = jwl(jj)
    jwl(jj) = jrow
    jwl(k) = j
!
!  Exchange in JR.
!
    jr(jrow) = jj
    jr(j) = k
!
!  Exchange in WL.
!
    s = wl(k)
    wl(k) = wl(jj)
    wl(jj) = s

    if ( ii <= jrow ) then
      go to 160
    end if
!
!  Get the multiplier for row to be eliminated: JROW.
!
    fact = wl(jj) * alu(jrow)
    jr(jrow) = 0

    if ( abs ( fact ) * wu(n+2-jrow) <= tol * tnorm ) then
      go to 150
    end if
!
!  Combine current row and row JROW.
!
    do k = ju(jrow), jlu(jrow+1)-1
       s = fact * alu(k)
       j = jlu(k)
       jpos = jr(j)
!
!  If fill-in element and small disregard.
!
       if ( abs ( s ) < tol * tnorm .and. jpos == 0 ) then
         cycle
       end if

       if ( ii <= j ) then
!
!  Dealing with upper part.
!
          if ( jpos == 0 ) then
!
!  This is a fill-in element.
!
             lenu = lenu + 1

             if ( n < lenu ) then
               go to 995
             end if

             jwu(lenu) = j
             jr(j) = lenu
             wu(lenu) = - s
          else
!
!  No fill-in element.
!
             wu(jpos) = wu(jpos) - s
          end if
       else
!
!  Dealing with lower part.
!
          if ( jpos == 0 ) then
!
!  This is a fill-in element.
!
             lenl = lenl + 1

             if ( n < lenl ) then
               go to 995
             end if

             jwl(lenl) = j
             jr(j) = lenl
             wl(lenl) = -s
          else
!
!  No fill-in element.
!
             wl(jpos) = wl(jpos) - s
          end if
       end if

  end do

    nl = nl + 1
    wl(nl) = fact
    jwl(nl) = jrow
  go to 150
!
!  Update the L matrix.
!
 160 continue

    len = min ( nl, lenl0 + lfil )

    call bsort2 ( wl, jwl, nl, len )

    do k = 1, len

       if ( iwk < ju0 ) then
         ierr = -2
         return
       end if

       alu(ju0) =  wl(k)
       jlu(ju0) =  jwl(k)
       ju0 = ju0 + 1

    end do
!
!  Save pointer to beginning of row II of U.
!
    ju(ii) = ju0
!
!  Reset double pointer JR to zero (L-part - except first
!  JJ-1 elements which have already been reset).
!
  do k = jj, lenl
    jr(jwl(k)) = 0
  end do
!
!  Be sure that the diagonal element is first in W and JW.
!
    idiag = jr(ii)

    if ( idiag == 0 ) then
      go to 900
    end if

    if ( idiag /= 1 ) then

       s = wu(1)
       wu(j) = wu(idiag)
       wu(idiag) = s

       j = jwu(1)
       jwu(1) = jwu(idiag)
       jwu(idiag) = j

    end if

    len = min ( lenu, lenu0 + lfil )

    call bsort2 ( wu(2), jwu(2), lenu-1, len )
!
! Update the U-matrix.
!
    t = 0.0D+00

    do k = 2, len

       if ( iwk < ju0 ) then
         ierr = -3
         return
       end if

       jlu(ju0) = jwu(k)
       alu(ju0) = wu(k)
       t = t + abs ( wu(k) )
       ju0 = ju0 + 1

    end do
!
!  Save norm in WU (backwards). Norm is in fact average absolute value.
!
    wu(n+2-ii) = t / real ( len + 1, kind = 8 )
!
!  Store inverse of diagonal element of U.
!
    if ( wu(1) == 0.0D+00 ) then
      ierr = -5
      return
    end if

    alu(ii) = 1.0D+00 / wu(1)
!
!  Update pointer to beginning of next row of U.
!
  jlu(ii+1) = ju0
!
!  Reset double pointer JR to zero (U-part).
!
  do k = 1, lenu
    jr(jwu(k)) = 0
  end do

  end do

  ierr = 0

  return
!
!  Zero pivot :
!
 900    ierr = ii
    return
!
!  Incomprehensible error. Matrix must be wrong.
!
 995    ierr = -1
    return
end


subroutine bsort2 ( w, ind, n, ncut )

!*****************************************************************************80
!
!! BSORT2 returns the NCUT largest elements of an array, using bubble sort.
!
!  Discussion:
!
!    This routine carries out a simple bubble sort for getting the NCUT largest
!    elements in modulus, in array W.  IND is sorted accordingly.
!    (Ought to be replaced by a more efficient sort especially
!    if NCUT is not that small).
!
!  Modified:
!
!    07 January 2004
!
!  Author:
!
!    Youcef Saad
!
!  Parameters:
!
  implicit none

  integer n

  integer i
  integer ind(*)
  integer iswp
  integer j
  integer ncut
  logical test
  real ( kind = 8 ) w(n)
  real ( kind = 8 ) wswp

  i = 1

  do

    test = .false.

    do j = n-1, i, -1

      if ( abs ( w(j) ) < abs ( w(j+1) ) ) then
!
!  Swap.
!
        wswp = w(j)
        w(j) = w(j+1)
        w(j+1) = wswp
!
!  Reorder the original ind array accordingly.
!
        iswp = ind(j)
        ind(j) = ind(j+1)
        ind(j+1) = iswp
!
!  Set indicator that sequence is still unsorted.
!
        test = .true.

      end if

    end do

    i = i + 1

    if ( .not. test .or. ncut < i ) then
      exit
    end if

  end do

  return
end



subroutine coocsr ( nrow, nnz, a, ir, jc, ao, jao, iao )

!*****************************************************************************80
!
!! COOCSR converts COO to CSR.
!
!  Discussion:
!
!    This routine converts a matrix that is stored in COO coordinate format
!    a, ir, jc into a CSR row general sparse ao, jao, iao format.
!
!  Modified:
!
!    07 January 2004
!
!  Author:
!
!    Youcef Saad
!
!  Parameters:
!
!    Input, integer NROW, the row dimension of the matrix.
!
!    Input, integer NNZ, the number of nonzero elements in the matrix.
!
! a,
! ir,
! jc    = matrix in coordinate format. a(k), ir(k), jc(k) store the nnz
!         nonzero elements of the matrix with a(k) = actual real value of
!         the elements, ir(k) = its row number and jc(k) = its column
!        number. The order of the elements is arbitrary.
!
! on return:
!
! ir       is destroyed
!
!    Output, real AO(*), JAO(*), IAO(NROW+1), the matrix in CSR
!    Compressed Sparse Row format.
!
  implicit none

  integer nrow

  real ( kind = 8 ) a(*)
  real ( kind = 8 ) ao(*)
  integer i
  integer iad
  integer iao(nrow+1)
  integer ir(*)
  integer j
  integer jao(*)
  integer jc(*)
  integer k
  integer k0
  integer nnz
  real ( kind = 8 ) x

  iao(1:nrow+1) = 0
!
!  Determine the row lengths.
!
  do k = 1, nnz
    iao(ir(k)) = iao(ir(k)) + 1
  end do
!
!  The starting position of each row.
!
  k = 1
  do j = 1, nrow+1
     k0 = iao(j)
     iao(j) = k
     k = k + k0
  end do
!
!  Go through the structure once more.  Fill in output matrix.
!
  do k = 1, nnz
     i = ir(k)
     j = jc(k)
     x = a(k)
     iad = iao(i)
     ao(iad) = x
     jao(iad) = j
     iao(i) = iad + 1
  end do
!
!  Shift back IAO.
!
  do j = nrow, 1, -1
    iao(j+1) = iao(j)
  end do
  iao(1) = 1

  return
end


subroutine coocsr_inplace ( n, nnz, job, a, ja, ia, iwk )

!*****************************************************************************80
!
!! COOCSR_INPLACE converts COO to CSR in place.
!
!  Discussion:
!
!    This routine converts a matrix stored in coordinate format into
!    the CSR format.  The conversion is done in place in that the arrays
!    a,ja,ia of the result are overwritten onto the original arrays.
!
!    The entries of the output matrix are not sorted (the column
!    indices in each are not in increasing order) use COOCSR
!    if you want them sorted.
!
!  Modified:
!
!    07 January 2004
!
!  Author:
!
!    Youcef Saad
!
!  Parameters:
!
!    Input, integer N, the row dimension of the matrix.
!
!    Input, integer NNZ, the number of nonzero elements in A.
!
!    Input, integer JOB.  When JOB = 1, the real values in A are
!    filled.  Otherwise A is not touched and the structure of the
!    array only (i.e. JA, IA)  is obtained.
!
!    Input/output, real A(NNZ).  On input, the matrix numeric values,
!    stored in the COO format.  On output, the numeric values, stored
!    in CSR format.
!
! ja      = integer array of length nnz containing the column positions
!         of the corresponding elements in a.
!
! ia      = integer array of length nnz containing the row positions
!         of the corresponding elements in a.
!
! iwk      = integer work array of length n.
!
! on return:
!
!    Output, real A(*), integer JA(*), IA(NROW+1), the matrix in CSR
!    Compressed Sparse Row format.
!
  implicit none

  integer n
  integer nnz

  real ( kind = 8 ) a(*)
  integer i
  integer ia(nnz)
  integer inext
  integer init
  integer ipos
  integer iwk(n)
  integer j
  integer ja(nnz)
  integer jnext
  integer job
  integer k
  real ( kind = 8 ) t
  real ( kind = 8 ) tnext
  logical values

  values = (job == 1)
!
!  Find pointer array for resulting matrix.
!
  iwk(1:n+1) = 0

  do k = 1, nnz
    i = ia(k)
    iwk(i+1) = iwk(i+1) + 1
  end do

  iwk(1) = 1
  do i = 2, n
    iwk(i) = iwk(i-1) + iwk(i)
  end do
!
!  Loop for a cycle in chasing process.
!
  init = 1
  k = 0

 5    continue

  if ( values ) then
    t = a(init)
  end if

  i = ia(init)
  j = ja(init)
  ia(init) = -1

 6 continue
   k = k + 1
!
!  Current row number is I.  Determine where to go.
!
  ipos = iwk(i)
!
!  Save the chased element.
!
  if ( values ) then
    tnext = a(ipos)
  end if

  inext = ia(ipos)
  jnext = ja(ipos)
!
!  Then occupy its location.
!
  if ( values ) then
    a(ipos) = t
  end if

  ja(ipos) = j
!
!  Update pointer information for next element to come in row I.
!
  iwk(i) = ipos + 1
!
!  Determine the next element to be chased.
!
  if ( ia(ipos) < 0 ) then
    go to 65
  end if

  t = tnext
  i = inext
  j = jnext
  ia(ipos) = -1

  if ( k < nnz ) then
    go to 6
  end if

  go to 70

 65 continue

  init = init + 1

  if ( nnz < init ) then
    go to 70
  end if

  if ( ia(init) < 0 ) then
    go to 65
  end if
!
!  Restart chasing.
!
  go to 5

 70   continue

  ia(1) = 1
  ia(2:n+1) = iwk(1:n)

  return
end






subroutine daxpy ( n, da, dx, incx, dy, incy )

!*****************************************************************************80
!
!! DAXPY computes constant times a vector plus a vector.
!
!  Discussion:
!
!    Uses unrolled loops for increments equal to one.
!
!  Author:
!
!    Jack Dongarra
!
!  Reference:
!
!    Dongarra, Moler, Bunch, Stewart,
!    LINPACK User's Guide,
!    SIAM, 1979.
!
!    Lawson, Hanson, Kincaid, Krogh,
!    Basic Linear Algebra Subprograms for Fortran Usage,
!    Algorithm 539,
!    ACM Transactions on Mathematical Software,
!    Volume 5, Number 3, September 1979, pages 308-323.
!
!  Parameters:
!
!    Input, integer N, the number of elements in DX and DY.
!
!    Input, real ( kind = 8 ) DA, the multiplier of DX.
!
!    Input, real ( kind = 8 ) DX(*), the first vector.
!
!    Input, integer INCX, the increment between successive entries of DX.
!
!    Input/output, real ( kind = 8 ) DY(*), the second vector.
!    On output, DY(*) has been replaced by DY(*) + DA * DX(*).
!
!    Input, integer INCY, the increment between successive entries of DY.
!
  implicit none

  real ( kind = 8 ) da
  real ( kind = 8 ) dx(*)
  real ( kind = 8 ) dy(*)
  integer i
  integer incx
  integer incy
  integer ix
  integer iy
  integer m
  integer n

  if ( n <= 0 ) then
    return
  end if

  if ( da  == 0.0D+00 ) then
    return
  end if
!
!  Code for unequal increments or equal increments
!  not equal to 1.
!
  if ( incx /= 1 .or. incy /= 1 ) then

    if ( 0 <= incx ) then
      ix = 1
    else
      ix = ( - n + 1 ) * incx + 1
    end if

    if ( 0 <= incy ) then
      iy = 1
    else
      iy = ( - n + 1 ) * incy + 1
    end if

    do i = 1, n
      dy(iy) = dy(iy) + da * dx(ix)
      ix = ix + incx
      iy = iy + incy
    end do
!
!  Code for both increments equal to 1.
!
  else

    m = mod ( n, 4 )

    do i = 1, m
      dy(i) = dy(i) + da * dx(i)
    end do

    do i = m+1, n, 4
      dy(i  ) = dy(i  ) + da * dx(i  )
      dy(i+1) = dy(i+1) + da * dx(i+1)
      dy(i+2) = dy(i+2) + da * dx(i+2)
      dy(i+3) = dy(i+3) + da * dx(i+3)
    end do

  end if

  return
end


function ddot ( n, dx, incx, dy, incy )

!*****************************************************************************80
!
!! DDOT forms the dot product of two vectors.
!
!  Discussion:
!
!    This routine uses unrolled loops for increments equal to one.
!
!  Author:
!
!    Jack Dongarra
!
!  Reference:
!
!    Dongarra, Moler, Bunch, Stewart,
!    LINPACK User's Guide,
!    SIAM, 1979.
!
!  Parameters:
!
!    Input, integer N, the number of entries in the vectors.
!
!    Input, real ( kind = 8 ) DX(*), the first vector.
!
!    Input, integer INCX, the increment between successive entries in X.
!
!    Input, real ( kind = 8 ) DY(*), the second vector.
!
!    Input, integer INCY, the increment between successive entries in Y.
!
!    Output, real DDOT, the sum of the product of the corresponding
!    entries of X and Y.
!
  implicit none

  real ( kind = 8 ) ddot
  real ( kind = 8 ) dtemp
  real ( kind = 8 ) dx(*)
  real ( kind = 8 ) dy(*)
  integer i
  integer incx
  integer incy
  integer ix
  integer iy
  integer m
  integer n

  ddot = 0.0D+00
  dtemp = 0.0D+00

  if ( n <= 0 ) then
    return
  end if
!
!  Code for unequal increments or equal increments
!  not equal to 1.
!
  if ( incx /= 1 .or. incy /= 1 ) then

    if ( 0 <= incx ) then
      ix = 1
    else
      ix = ( - n + 1 ) * incx + 1
    end if

    if ( 0 <= incy ) then
      iy = 1
    else
      iy = ( - n + 1 ) * incy + 1
    end if

    do i = 1, n
      dtemp = dtemp + dx(ix) * dy(iy)
      ix = ix + incx
      iy = iy + incy
    end do
!
!  Code for both increments equal to 1.
!
  else

    m = mod ( n, 5 )

    do i = 1, m
      dtemp = dtemp + dx(i) * dy(i)
    end do

    do i = m+1, n, 5

      dtemp = dtemp + dx(i  ) * dy(i  ) &
                    + dx(i+1) * dy(i+1) &
                    + dx(i+2) * dy(i+2) &
                    + dx(i+3) * dy(i+3) &
                    + dx(i+4) * dy(i+4)
    end do

  end if

  ddot = dtemp

  return
end



!----------------------------------------------------------------------c
      subroutine ilut0(n,a,ja,ia,lfil,droptol,alu,jlu,ju,iwk,w,jw,ierr)
!c-----------------------------------------------------------------------
      implicit none 
      integer n 
      real*8 a(*),alu(*),w(n+1),droptol
      integer ja(*),ia(n+1),jlu(*),ju(n),jw(2*n),lfil,iwk,ierr
!----------------------------------------------------------------------*
!                      *** ILUT preconditioner ***                     *
!      incomplete LU factorization with dual truncation mechanism      *
!----------------------------------------------------------------------*
!     Author: Yousef Saad *May, 5, 1990, Latest revision, August 1996  *
!----------------------------------------------------------------------*
! PARAMETERS                                                           
!-----------                                                           
!
! on entry:
!========== 
! n       = integer. The row dimension of the matrix A. The matrix 
!
! a,ja,ia = matrix stored in Compressed Sparse Row format.              
!
! lfil    = integer. The fill-in parameter. Each row of L and each row
!           of U will have a maximum of lfil elements (excluding the 
!           diagonal element). lfil must be .ge. 0.
!           ** WARNING: THE MEANING OF LFIL HAS CHANGED WITH RESPECT TO
!           EARLIER VERSIONS. 
!
! droptol = real*8. Sets the threshold for dropping small terms in the
!           factorization. See below for details on dropping strategy.
!
!  
! iwk     = integer. The lengths of arrays alu and jlu. If the arrays
!           are not big enough to store the ILU factorizations, ilut
!           will stop with an error message. 
!
! On return:
!===========
!
! alu,jlu = matrix stored in Modified Sparse Row (MSR) format containing
!           the L and U factors together. The diagonal (stored in
!           alu(1:n) ) is inverted. Each i-th row of the alu,jlu matrix
!           contains the i-th row of L (excluding the diagonal entry=1)
!           followed by the i-th row of U.
!
! ju      = integer array of length n containing the pointers to
!           the beginning of each row of U in the matrix alu,jlu.
!
! ierr    = integer. Error message with the following meaning.
!           ierr  = 0    --> successful return.
!           ierr .gt. 0  --> zero pivot encountered at step number ierr.
!           ierr  = -1   --> Error. input matrix may be wrong.
!                            (The elimination process has generated a
!                            row in L or U whose length is .gt.  n.)
!           ierr  = -2   --> The matrix L overflows the array al.
!           ierr  = -3   --> The matrix U overflows the array alu.
!           ierr  = -4   --> Illegal value for lfil.
!           ierr  = -5   --> zero row encountered.
!
! work arrays:
!=============
! jw      = integer work array of length 2*n.
! w       = real work array of length n+1.
!  
!----------------------------------------------------------------------
! w, ju (1:n) store the working array [1:ii-1 = L-part, ii:n = u] 
! jw(n+1:2n)  stores nonzero indicators
! 
! Notes:
! ------
! The diagonal elements of the input matrix must be  nonzero (at least
! 'structurally'). 
!
!----------------------------------------------------------------------* 
!---- Dual drop strategy works as follows.                             *
!                                                                      *
!     1) Theresholding in L and U as set by droptol. Any element whose *
!        magnitude is less than some tolerance (relative to the abs    *
!        value of diagonal element in u) is dropped.                   *
!                                                                      *
!     2) Keeping only the largest lfil elements in the i-th row of L   * 
!        and the largest lfil elements in the i-th row of U (excluding *
!        diagonal elements).                                           *
!                                                                      *
! Flexibility: one  can use  droptol=0  to get  a strategy  based on   *
! keeping  the largest  elements in  each row  of L  and U.   Taking   *
! droptol .ne.  0 but lfil=n will give  the usual threshold strategy   *
! (however, fill-in is then mpredictible).                             *
!----------------------------------------------------------------------*
!     locals
      integer ju0,k,j1,j2,j,ii,i,lenl,lenu,jj,jrow,jpos,len 
      real*8 tnorm, t, abs, s, fact 
      if (lfil .lt. 0) goto 998
!-----------------------------------------------------------------------
!     initialize ju0 (points to next element to be added to alu,jlu)
!     and pointer array.
!-----------------------------------------------------------------------
      ju0 = n+2
      jlu(1) = ju0
!
!     initialize nonzero indicator array. 
!
      do 1 j=1,n
         jw(n+j)  = 0
 1    continue
!-----------------------------------------------------------------------
!     beginning of main loop.
!-----------------------------------------------------------------------
      do 500 ii = 1, n
         j1 = ia(ii)
         j2 = ia(ii+1) - 1
         tnorm = 0.0d0
         do 501 k=j1,j2
            tnorm = tnorm+abs(a(k))
 501     continue
         if (tnorm .eq. 0.0) goto 999
         tnorm = tnorm/real(j2-j1+1)
!     
!     unpack L-part and U-part of row of A in arrays w 
!     
         lenu = 1
         lenl = 0
         jw(ii) = ii
         w(ii) = 0.0
         jw(n+ii) = ii
!
         do 170  j = j1, j2
            k = ja(j)
            t = a(j)
            if (k .lt. ii) then
               lenl = lenl+1
               jw(lenl) = k
               w(lenl) = t
               jw(n+k) = lenl
            else if (k .eq. ii) then
               w(ii) = t
            else
               lenu = lenu+1
               jpos = ii+lenu-1 
               jw(jpos) = k
               w(jpos) = t
               jw(n+k) = jpos
            endif
 170     continue
         jj = 0
         len = 0 
!     
!     eliminate previous rows
!     
 150     jj = jj+1
         if (jj .gt. lenl) goto 160
!-----------------------------------------------------------------------
!     in order to do the elimination in the correct order we must select
!     the smallest column index among jw(k), k=jj+1, ..., lenl.
!-----------------------------------------------------------------------
         jrow = jw(jj)
         k = jj
!     
!     determine smallest column index
!     
         do 151 j=jj+1,lenl
            if (jw(j) .lt. jrow) then
               jrow = jw(j)
               k = j
            endif
 151     continue
!
         if (k .ne. jj) then
!     exchange in jw
            j = jw(jj)
            jw(jj) = jw(k)
            jw(k) = j
!     exchange in jr
            jw(n+jrow) = jj
            jw(n+j) = k
!     exchange in w
            s = w(jj)
            w(jj) = w(k)
            w(k) = s
         endif
!
!     zero out element in row by setting jw(n+jrow) to zero.
!     
         jw(n+jrow) = 0
!
!     get the multiplier for row to be eliminated (jrow).
!     
         fact = w(jj)*alu(jrow)
         if (abs(fact) .le. droptol) goto 150
!     
!     combine current row and row jrow
!
         do 203 k = ju(jrow), jlu(jrow+1)-1
            s = fact*alu(k)
            j = jlu(k)
            jpos = jw(n+j)
            if (j .ge. ii) then
!     
!     dealing with upper part.
!     
               if (jpos .eq. 0) then
!
!     this is a fill-in element
!     
                  lenu = lenu+1
                  if (lenu .gt. n) goto 995
                  i = ii+lenu-1
                  jw(i) = j
                  jw(n+j) = i
                  w(i) = - s
               else
!
!     this is not a fill-in element 
!
                  w(jpos) = w(jpos) - s

               endif
            else
!     
!     dealing  with lower part.
!     
               if (jpos .eq. 0) then
!
!     this is a fill-in element
!     
                  lenl = lenl+1
                  if (lenl .gt. n) goto 995
                  jw(lenl) = j
                  jw(n+j) = lenl
                  w(lenl) = - s
               else
!     
!     this is not a fill-in element 
!     
                  w(jpos) = w(jpos) - s
               endif
            endif
 203     continue
!     
!     store this pivot element -- (from left to right -- no danger of
!     overlap with the working elements in L (pivots). 
!     
         len = len+1 
         w(len) = fact
         jw(len)  = jrow
         goto 150
 160     continue
!     
!     reset double-pointer to zero (U-part)
!     
         do 308 k=1, lenu
            jw(n+jw(ii+k-1)) = 0
 308     continue
!     
!     update L-matrix
!     
         lenl = len 
         len = min0(lenl,lfil)
!     
!     sort by quick-split
!
         !call qsplit (w,jw,lenl,len)
         call bsort2 (w,jw,lenl,len)
!
!     store L-part
! 
         do 204 k=1, len 
            if (ju0 .gt. iwk) goto 996
            alu(ju0) =  w(k)
            jlu(ju0) =  jw(k)
            ju0 = ju0+1
 204     continue
!     
!     save pointer to beginning of row ii of U
!     
         ju(ii) = ju0
!
!     update U-matrix -- first apply dropping strategy 
!
         len = 0
         do k=1, lenu-1
            if (abs(w(ii+k)) .gt. droptol*tnorm) then 
               len = len+1
               w(ii+len) = w(ii+k) 
               jw(ii+len) = jw(ii+k) 
            endif
         enddo
         lenu = len+1
         len = min0(lenu,lfil)
!
       !  call qsplit (w(ii+1), jw(ii+1), lenu-1,len)
         call bsort2 (w(ii+1), jw(ii+1), lenu-1,len)
!
!     copy
! 
         t = abs(w(ii))
         if (len + ju0 .gt. iwk) goto 997
         do 302 k=ii+1,ii+len-1 
            jlu(ju0) = jw(k)
            alu(ju0) = w(k)
            t = t + abs(w(k) )
            ju0 = ju0+1
 302     continue
!     
!     store inverse of diagonal element of u
!     
         if (w(ii) .eq. 0.0) w(ii) = (0.0001 + droptol)*tnorm
!     
        alu(ii) = 1.0d0/ w(ii) 
!     
!     update pointer to beginning of next row of U.
!     
         jlu(ii+1) = ju0
!-----------------------------------------------------------------------
!     end main loop
!-----------------------------------------------------------------------
 500  continue
      ierr = 0
      return
!
!     incomprehensible error. Matrix must be wrong.
!     
 995  ierr = -1
      return
!     
!     insufficient storage in L.
!     
 996  ierr = -2
      return
!     
!     insufficient storage in U.
!     
 997  ierr = -3
      return
!     
!     illegal lfil entered.
!     
 998  ierr = -4
      return
!     
!     zero row encountered
!     
 999  ierr = -5
      return
!----------------end-of-ilut0--------------------------------------------
!-----------------------------------------------------------------------
      end
