! f90 include file
!
! modified for MUESLI Fortran library
! (C) É. Canot -- IPR/CNRS -- 20 Oct 2017
!_______________________________________________________________________
!
   subroutine lmdif( fcn, m, n, x, fvec, ftol, xtol, gtol, maxfev,      &
                     epsfcn, diag, mode, factor, info, nfev, fjac,      &
                     ipvt, qtf, wa1, wa2, wa3, wa4, max_iter, print,    &
                     print_using_transf, f_inv_transf, iter,            &
                     residue_hist, x_hist, box_constrained, lbounds,    &
                     ubounds, identifiability, sensitiv_ratio,          &
                     sing_jac_tol, print_sing_val )

      use mod_mfdebug, only: mf_message_level, MF_NUMERICAL_CHECK,      &
                             muesli_trace
      use mod_mfaux, only: mf_save_and_disable_fpe, mf_restore_fpe,     &
                           func_ptr
      use mod_mfarray, only: PrintMessage, mfArray
      use mod_core, only: mf, msDisplay, msSilentRelease, msAssign,     &
                          msFormat
      use mod_ops, only: operator(/)
      use mod_datafun, only: mfMax
      use mod_matfun, only: mfNull, mfSVD

      interface
         subroutine fcn( m, n, p, fvec, flag )
            integer,          intent(in) :: m, n
            double precision, intent(in) :: p(n)
            double precision             :: fvec(m)
            integer                      :: flag
         end subroutine fcn
      end interface
      integer,          intent(in)  :: m, n, maxfev, mode, max_iter
      double precision, intent(in)  :: ftol, xtol, gtol, factor
      double precision, intent(in)  :: epsfcn(n)
      logical,          intent(in)  :: print, print_using_transf
      type(func_ptr)                :: f_inv_transf(n)

      double precision              :: x(n), diag(n),                   &
                                       wa1(n), wa2(n), wa3(n), wa4(m)

      integer,          intent(out) :: info, nfev, ipvt(n), iter
      double precision, intent(out) :: fvec(m), fjac(m,n), qtf(n),      &
                                       residue_hist(max_iter),          &
                                       x_hist(max_iter,n)
      logical,          intent(in)  :: box_constrained
      double precision, intent(in)  :: lbounds(*), ubounds(*)
      logical,          intent(in)  :: identifiability
      double precision, intent(out) :: sensitiv_ratio(n)
      double precision, intent(in)  :: sing_jac_tol
      logical,          intent(in)  :: print_sing_val

!     **********
!
!     subroutine lmdif
!
!     The purpose of lmdif is to minimize the sum of the squares of
!     m nonlinear functions in n variables by a modification of
!     the Levenberg-Marquardt algorithm. The user must provide a
!     subroutine which calculates the functions. The jacobian is
!     then calculated by a finite-difference approximation.
!
!     The subroutine statement is
!
!       subroutine lmdif( fcn, m, n, x, fvec, ftol, xtol, gtol, maxfev,
!                         epsfcn, diag, mode, factor, info, nfev, fjac,
!                         ipvt, qtf, wa1, wa2, wa3, wa4, max_iter, print,
!                         print_using_transf, f_inv_transf, iter,
!                         residue_hist, x_hist, box_constrained, lbounds,
!                         ubounds, identifiability, sensitiv_ratio,
!                         sing_jac_tol, print_sing_val )
!
!     where
!
!       fcn is the name of the user-supplied subroutine which
!         calculates the functions. See the interface in the header.
!         The value of flag should not be changed by fcn unless
!         the user wants to terminate execution of lmdif; in this
!         case, set flag to a negative integer.
!
!       m is a positive integer input variable set to the number
!         of functions.
!
!       n is a positive integer input variable set to the number
!         of variables. n must not exceed m.
!
!       x is an array of length n. On input x must contain
!         an initial estimate of the solution vector. On output x
!         contains the final estimate of the solution vector.
!
!       fvec is an output array of length m which contains
!         the functions evaluated at the output x.
!
!       ftol is a nonnegative input variable. Termination
!         occurs when both the actual and predicted relative
!         reductions in the sum of squares are at most ftol.
!         Therefore, ftol measures the relative error desired
!         in the sum of squares.
!
!       xtol is a nonnegative input variable. Termination
!         occurs when the relative error between two consecutive
!         iterates is at most xtol. Therefore, xtol measures the
!         relative error desired in the approximate solution.
!
!       gtol is a nonnegative input variable. Termination
!         occurs when the cosine of the angle between fvec and
!         any column of the jacobian is at most gtol in absolute
!         value. Therefore, gtol measures the orthogonality
!         desired between the function vector and the columns
!         of the jacobian.
!
!       maxfev is a positive integer input variable. Termination
!         occurs when the number of calls to fcn is at least
!         maxfev by the end of an iteration.
!
!       epsfcn is a array of length n. On input, its components are used
!         in determining a suitable step length for the forward-difference
!         approximation. See in the code of fdjac2.
!
!       diag is an array of length n. If mode = 1 (see
!         below), diag is internally set. If mode = 2, diag
!         must contain positive entries that serve as
!         multiplicative scale factors for the variables.
!
!       mode is an integer input variable. If mode = 1, the
!         variables will be scaled internally. If mode = 2,
!         the scaling is specified by the input diag. Other
!         values of mode are equivalent to mode = 1.
!
!       factor is a positive input variable used in determining the
!         initial step bound. This bound is set to the product of
!         factor and the euclidean norm of diag*x if nonzero, or else
!         to factor itself. In most cases factor should lie in the
!         interval (.1,100.). 100. is a generally recommended value.
!
!       info is an integer output variable. If the user has
!         terminated execution, info is set to the (negative)
!         value of flag (see description of fcn); otherwise,
!         info is set as follows.
!
!         info = 0  Improper input parameters.
!
!         info = 1  Both actual and predicted relative reductions
!                   in the sum of squares are at most ftol.
!
!         info = 2  Relative error between two consecutive iterates
!                   is at most xtol.
!
!         info = 3  Conditions for info = 1 and info = 2 both hold.
!
!         info = 4  The cosine of the angle between fvec and any
!                   column of the jacobian is at most gtol in
!                   absolute value.
!                   This means that the gradient is approximatively
!                   null (with respect to gtol): it corresponds to
!                   a min, a max, or a saddle point, so the algorithm
!                   cannot continue.
!
!         info = 5  Number of calls to fcn has reached maxfev.
!
!         info = 6  ftol is too small. No further reduction in
!                   the sum of squares is possible.
!
!         info = 7  xtol is too small. No further improvement in
!                   the approximate solution x is possible.
!
!         info = 8  gtol is too small. fvec is orthogonal to the
!                   columns of the jacobian to machine precision.
!                   This means that the gradient is approximatively
!                   null (with respect to epsmch): it corresponds to
!                   a min, a max, or a saddle point, so the algorithm
!                   cannot continue.
!
!         info = 14 A NaN value has been found in fvec, after calling
!                   fcn().
!
!       nfev is an integer output variable set to the number of
!         calls to fcn.
!
!       fjac is an output m by n array. The upper n by n submatrix
!         of fjac contains an upper triangular matrix r with
!         diagonal elements of nonincreasing magnitude such that
!
!                t     t           t
!               p *(jac *jac)*p = r *r,
!
!         where p is a permutation matrix and jac is the final
!         calculated jacobian. Column j of p is column ipvt(j)
!         (see below) of the identity matrix. The lower trapezoidal
!         part of fjac contains information generated during
!         the computation of r.
!
!       ipvt is an integer output array of length n. ipvt
!         defines a permutation matrix p such that jac*p = q*r,
!         where jac is the final calculated jacobian, q is
!         orthogonal (not stored), and r is upper triangular
!         with diagonal elements of nonincreasing magnitude.
!         Column j of p is column ipvt(j) of the identity matrix.
!
!       qtf is an output array of length n which contains
!         the first n elements of the vector (q transpose)*fvec.
!
!       wa1, wa2, and wa3 are work arrays of length n.
!
!       wa4 is a work array of length m.
!
!       max_iter (integer) is the maximum number of iteration required
!         by the user.
!
!       print (logical)
!
!       iter (integer)
!
!       residue_hist
!
!       x_hist
!
!     subprograms called
!
!       user-supplied ...... fcn
!
!       minpack-supplied ... enorm, fdjac2, lmpar, qrfac
!
!       fortran-supplied ... abs, max, min, sqrt
!
!     Argonne National Laboratory. Minpack project. March 1980.
!     Burton S. Garbow, Kenneth E. Hillstrom, Jorge J. Moré
!
!     Modified by É. Canot (implement constraints on parameters)
!
!     **********
      integer :: i, flag, j, l, count_retry
      double precision :: actred, delta, dirder, epsmch, fnorm, fnorm1, &
                          gnorm, par, pnorm, prered, ratio, sum, temp,  &
                          temp1, temp2, xnorm

      double precision, parameter :: one = 1.0d0, zero = 0.0d0,         &
                          p1 = 1.0d-1, p5 = 5.0d-1, p25 = 2.5d-1,       &
                          p75 = 7.5d-1, p0001 = 1.0d-4

      character(len=5) :: str

      logical :: changed_starting_pt, full_rank
      double precision, allocatable :: fjac_save(:,:)
      type(mfArray) :: nullspace, sigma
      double precision :: tol2

!--------- end of declarations -- execution starts hereafter  ------

      ! epsmch is the machine precision.
      epsmch = epsilon(1.0d0)

      info = 0
      flag = 0

      ! Count calls of fcn only when flag is negative or zero
      ! Take into account the approximation of the jacobian via fcn.
      nfev = 0

      ! Check the input parameters for errors.
      if( n <= 0 .or. m < n .or. ftol < zero .or. xtol < zero           &
          .or. gtol < zero .or. maxfev <= 0 .or. factor <= zero ) then
         ! improper input parameters (should never occur because 'lmdif'
         ! is called by LsqNonLin.
         go to 99
      end if
      if( mode == 2 ) then
         do j = 1, n
            if( diag(j) <= zero ) go to 99
         end do
      end if

      ! Check that the starting point is acceptable, if constraints exist.
      changed_starting_pt = .false.
      if( box_constrained ) then
         do i = 1, n
            if( x(i) < lbounds(i) ) then
               x(i) = lbounds(i)
               changed_starting_pt = .true.
            end if
            if( ubounds(i) < x(i) ) then
               x(i) = ubounds(i)
               changed_starting_pt = .true.
            end if
         end do
         if( changed_starting_pt ) then
            call PrintMessage( "LsqNonLin", "W",                        &
                               "Due to the use of box constraints,",    &
                               "starting point has been moved.")
         end if
      end if

      ! Evaluate the function at the starting point and calculate its norm
      flag = 0

      call mf_restore_fpe( )
      call fcn( m, n, x, fvec, flag )
      call mf_save_and_disable_fpe( )

      nfev = 1

      ! check NaN values
      if( MF_NUMERICAL_CHECK .and. flag >= 0 ) then
         do i = 1, m
            if( isnan(fvec(i)) ) then
               print "(/,A)", "(MUESLI LsqNonLin:) [lmdif] ERROR"
               print "(20X,A)", "A NaN value has been found after calling the", &
                                "user-supplied FCN routine."
               print "(20X,A,I0)", "This occured in fvec(i) for i = ", i
               call muesli_trace( pause="yes" )
               info = 14
               return
            end if
         end do
      end if

      if( flag < 0 ) then
         flag = -10
         go to 99
      end if

      fnorm = enorm(m,fvec)

      ! Initialize Levenberg-Marquardt parameter and iteration counter.
      par = zero
      iter = 1
      residue_hist(iter) = fnorm
      x_hist(iter,1:n) = x(1:n)

      if( print ) then
         print "(A,I0,A,/,A)", '  *** iter = ', iter, ' ***',           &
                               '  parameters ='
         if( print_using_transf ) then
            do i = 1, n
               print "(2X,ES13.6)", f_inv_transf(i)%ptr(x(i))
            end do
         else
            do i = 1, n
               print "(2X,ES13.6)", x(i)
            end do
         end if
         print "(A,ES10.3,/)", '  norm of residue = ', fnorm
      end if
if( iter >= max_iter ) then
   info = 5
   go to 99
end if

      outer:do

         ! Calculate the jacobian matrix.
         ! Precision is: max( epsfcn(j), sqrt(epsmch) ) for each
         ! parameter j.
         ! (flag is now set to 0 inside the loop of fdjac2)
         call fdjac2( fcn, m, n, x, fvec, fjac, flag, epsfcn, wa4,      &
                      box_constrained, lbounds, ubounds )

         nfev = nfev + n

         if( flag > 0 ) then
            flag = -9
            exit outer
         end if

         if( flag < 0 ) then
            flag = -10
            exit outer
         end if

      if( MF_NUMERICAL_CHECK ) then
         ! Save the jacobian (it will be overwritten by qrfac) in case
         ! where we have to compute its nullspace or its SVD (see below)
         if( .not. allocated(fjac_save) ) then
            allocate( fjac_save(m,n) )
         end if
         fjac_save(:,:) = fjac(:,:)
      endif

         ! Compute the QR factorization of the jacobian.
         call qrfac( m, n, fjac, m, .true., ipvt, n, wa1, wa2, wa3 )

         ! If needed, compute identifiability of parameters.
         if( identifiability ) then
            do i = 1, n
               sensitiv_ratio(i) = abs(wa1(i)/wa1(1))
            end do
            ! Retrieve parameters' initial order.
            sensitiv_ratio(ipvt) = sensitiv_ratio(:)
         end if

         if( MF_NUMERICAL_CHECK ) then

            ! In debug mode only, try to detect a singular jacobian by using
            ! the absolute value of the diagonal elements of the R factor.
            if( n > 1 ) then
               full_rank = .true.
               do i = 2, n
                  if( abs(wa1(i)/wa1(1)) < sing_jac_tol ) then
                     full_rank = .false.
                     exit
                  end if
               end do
               if( .not. full_rank ) then
                  write(str,"(I0)") i-1
                  call PrintMessage( "LsqNonLin", "W",                  &
                     "parameters are detected as dependant,",           &
                     "because the jacobian is not full rank.",          &
                     "(for your information: rank=" // trim(str) // ")" )
                  if( mf_message_level >= 2 ) then
                     ! Output of the basis of the nullspace
                     ! (i.e. the relationship(s) between the dependant
                     !  parameter, if any)
                     !######################################################
                     ! The setting of this tolerance is very important!
                     tol2 = max( sing_jac_tol, sqrt(epsmch) )
                     !######################################################
                     call msAssign( nullspace, mfNull( mf(fjac_save),   &
                                                      rational=.true., tol=tol2 ) )
                     print "(A)", " (MUESLI LsqNonLin:) the following"  &
                                 // " matrix shows the linear relationship"
                     print "(21X,A)", "between the dependant parameters:"
                     call msFormat( exponent="sci" )
                     call msDisplay( nullspace, "nullspace" )
                     call msFormat( )
                  end if
               end if
            end if

            if( n > 1 ) then
               if( print ) then
                  if( print_sing_val ) then
                     ! In debug mode only, compute the singular values of the
                     ! Jacobian, via an Singular Value Decomposition
                     call msAssign( sigma, mfSVD( mf(fjac_save) ) )
                     print "(A)", " (MUESLI LsqNonLin:) As requested"   &
                                 // ", hereafter can be found the normalized"
                     print "(A)", "                     singular values" &
                                 // " of the jacobian matrix."
                     call msAssign( sigma, sigma/mfMax(sigma) )
                     call msFormat( exponent="sci" )
                     call msDisplay( sigma, "(normalized) sigma" )
                     call msFormat( )
                     call msSilentRelease( sigma )
                  end if
               end if
            end if

         endif

         ! On the first iteration and if mode is 1, scale according
         ! to the norms of the columns of the initial jacobian.
         if( iter == 1 ) then
            if( mode /= 2 ) then
               do j = 1, n
                  diag(j) = wa2(j)
                  if( wa2(j) == zero ) diag(j) = one
               end do
            end if

            ! On the first iteration, calculate the norm of the scaled x
            ! and initialize the step bound delta.
            do j = 1, n
               wa3(j) = diag(j)*x(j)
            end do
            xnorm = enorm(n,wa3)
            delta = factor*xnorm
            if( delta == zero ) delta = factor
         end if

         ! Form (q transpose)*fvec and store the first n components in qtf
         do i = 1, m
            wa4(i) = fvec(i)
         end do
         do j = 1, n
            if( fjac(j,j) /= zero ) then
               sum = zero
               do i = j, m
                  sum = sum + fjac(i,j)*wa4(i)
               end do
               temp = -sum/fjac(j,j)
               do i = j, m
                  wa4(i) = wa4(i) + fjac(i,j)*temp
               end do
            end if
            fjac(j,j) = wa1(j)
            qtf(j) = wa4(j)
         end do

         ! Compute the norm of the scaled gradient.
         gnorm = zero
         if( fnorm /= zero ) then
            do j = 1, n
               l = ipvt(j)
               if( wa2(l) /= zero ) then
                  sum = zero
                  do i = 1, j
                     sum = sum + fjac(i,j)*(qtf(i)/fnorm)
                  end do
                  gnorm = max( gnorm, abs(sum/wa2(l)) )
               end if
            end do
         end if

         ! Test for convergence of the gradient norm.
         if( gnorm <= gtol ) then
            info = 4
            exit outer
         end if

         ! Rescale if necessary.
         if( mode /= 2 ) then
            do j = 1, n
               diag(j) = max( diag(j), wa2(j) )
            end do
         end if

         inner:do

            count_retry = 0

            ! Determine the Levenberg-Marquardt parameter.
            call lmpar( n, fjac, m, ipvt, diag, qtf, delta, par,        &
                        wa1, wa2, wa3, wa4 )

            ! Store the direction p and x + p. Calculate the norm of p.
            do j = 1, n
               wa1(j) = -wa1(j)
               wa2(j) = x(j) + wa1(j)
               wa3(j) = diag(j)*wa1(j)
            end do
            pnorm = enorm(n,wa3)

            ! On the first iteration, adjust the initial step bound.
            if( iter == 1 ) delta = min(delta,pnorm)

            ! In case of constraints, project the new x on the box.
            if( box_constrained ) then
               do i = 1, n
                  if( wa2(i) < lbounds(i) ) wa2(i) = lbounds(i)
                  if( ubounds(i) < wa2(i) ) wa2(i) = ubounds(i)
               end do
            end if

            ! Evaluate the function at x + p and calculate its norm.
            flag = 0

            call mf_restore_fpe( )
            call fcn( m, n, wa2, wa4, flag )
            call mf_save_and_disable_fpe( )

            nfev = nfev + 1

            ! check NaN values
            if( MF_NUMERICAL_CHECK .and. flag >= 0 ) then
               do i = 1, m
                  if( isnan(wa4(i)) ) then
                     print "(/,A)", "(MUESLI LsqNonLin:) [lmdif] ERROR"
                     print "(20X,A)", "A NaN value has been found after calling the"
                     print "(20X,A)", "user-supplied FCN routine."
                     print "(20X,A,I0)", "This occured in fvec(i) for i = ", i
                     call muesli_trace( pause="yes" )
                     info = 14
                     return
                  end if
               end do
            end if

            if( flag < 0 ) then
               flag = -10
               exit outer
            end if

            fnorm1 = enorm(m,wa4)

            ! Compute the scaled actual reduction.
            actred = -one
            if( p1*fnorm1 < fnorm ) actred = one - (fnorm1/fnorm)**2

            ! Compute the scaled predicted reduction and the scaled
            ! directional derivative.
            do j = 1, n
               wa3(j) = zero
               l = ipvt(j)
               temp = wa1(l)
               do i = 1, j
                  wa3(i) = wa3(i) + fjac(i,j)*temp
               end do
            end do
            temp1 = enorm(n,wa3)/fnorm
            temp2 = (sqrt(par)*pnorm)/fnorm
            prered = temp1**2 + temp2**2/p5
            dirder = -(temp1**2 + temp2**2)

            ! Compute the ratio of the actual to the predicted reduction.
            ratio = zero
            if( prered /= zero ) ratio = actred/prered

            ! Update the step bound.
            if( ratio > p25 ) then
               if( par == zero .or. ratio >= p75 ) then
                  delta = pnorm/p5
                  par = p5*par
               end if
            else
               if( actred >= zero ) temp = p5
               if( actred < zero ) temp = p5*dirder/(dirder + p5*actred)
               if( p1*fnorm1 >= fnorm .or. temp < p1 ) temp = p1
               delta = temp*min(delta,pnorm/p1)
               par = par/temp
            end if

            ! Test for successful iteration.
            if( ratio >= p0001 ) then

               ! Successful iteration. Update x, fvec, and their norms.
               do j = 1, n
                  x(j) = wa2(j)
                  wa2(j) = diag(j)*x(j)
               end do
               do i = 1, m
                  fvec(i) = wa4(i)
               end do
               xnorm = enorm(n,wa2)
               fnorm = fnorm1
               iter = iter + 1
               if( iter > max_iter ) then
                  print *
                  print *, "(MUESLI:) LsqNonLin: internal error"
                  print *, "          in lmdif(), line 630: iter > max_iter"
                  print *, "          cannot store results in log files"
                  print *, "          Please report this bug to: Edouard.Canot@univ-rennes.fr"
                  print *
                  print *, "  pause for debugging purpose"
                  print *, "  [RETURN] to resume..."
                  read *
                  stop
               end if
               residue_hist(iter) = fnorm
               x_hist(iter,1:n) = x(1:n)

               if( print ) then
                  print "(A,I0,A,/,A)", '  *** iter = ', iter, ' ***',  &
                                        '  parameters ='
                  if( print_using_transf ) then
                     do i = 1, n
                        print "(2X,ES13.6)", f_inv_transf(i)%ptr(x(i))
                     end do
                  else
                     do i = 1, n
                        print "(2X,ES13.6)", x(i)
                     end do
                  end if
                  print "(A,ES10.3,/)", '  norm of residue = ', fnorm
               end if

            end if
if( iter >= max_iter ) then
   info = 5
   exit outer
end if

            ! Tests for convergence.
            if( abs(actred) <= ftol .and. prered <= ftol                &
                .and. p5*ratio <= one ) info = 1

            ! modified (EC): Stopping criterion depends on |x| and xtol.
            if( xnorm <= xtol ) then
               ! Take an absolute comparison
               if( delta <= xtol ) then
                  if( print ) then
                     print *, "(LsqNonLin:) terminate with absolute " //&
                              "comparison for xtol"
                  end if
                  info = 2
               end if
            else
               ! Keep the old relative comparison
               if( delta <= xtol*xnorm ) then
                  if( print ) then
                     print *, "(LsqNonLin:) terminate with relative " //&
                              "comparison for xtol"
                  end if
                  info = 2
               end if
            end if

            if( info == 1 .and. info == 2 ) info = 3
            if( info /= 0 ) exit outer

            ! Tests for termination and stringent tolerances.
!###  Added this test for iter, because the next test for nfev is not (y
!     accurate enough...
if( iter >= max_iter ) info = 5
! End of temporary fix
            if( nfev >= maxfev ) info = 5
            if( abs(actred) <= epsmch .and. prered <= epsmch            &
                .and. p5*ratio <= one ) info = 6
            if( delta <= epsmch*xnorm ) info = 7
            if( gnorm <= epsmch ) info = 8
            if( info /= 0 ) exit outer

            ! End of the inner loop. Repeat only if iteration unsuccessful
            if( ratio >= p0001 ) exit inner
         end do inner

      ! End of the outer loop.
      end do outer

   99 continue

      if( MF_NUMERICAL_CHECK ) then
         call msSilentRelease( nullspace )
      endif

      ! Termination, either normal or user imposed.
      if( flag < 0 ) info = flag

   end
