stdlib_lapack_eigv_svd_bidiag_dc.fypp Source File


Source Code

#:include "common.fypp" 
submodule(stdlib_lapack_eig_svd_lsq) stdlib_lapack_eigv_svd_bidiag_dc
  implicit none


  contains
#:for ik,it,ii in LINALG_INT_KINDS_TYPES

     pure module subroutine stdlib${ii}$_slasd0( n, sqre, d, e, u, ldu, vt, ldvt, smlsiz, iwork,work, info )
     !! Using a divide and conquer approach, SLASD0: computes the singular
     !! value decomposition (SVD) of a real upper bidiagonal N-by-M
     !! matrix B with diagonal D and offdiagonal E, where M = N + SQRE.
     !! The algorithm computes orthogonal matrices U and VT such that
     !! B = U * S * VT. The singular values S are overwritten on D.
     !! A related subroutine, SLASDA, computes only the singular values,
     !! and optionally, the singular vectors in compact form.
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ldu, ldvt, n, smlsiz, sqre
           ! Array Arguments 
           integer(${ik}$), intent(out) :: iwork(*)
           real(sp), intent(inout) :: d(*), e(*)
           real(sp), intent(out) :: u(ldu,*), vt(ldvt,*), work(*)
        ! =====================================================================
           ! Local Scalars 
           integer(${ik}$) :: i, i1, ic, idxq, idxqc, im1, inode, itemp, iwk, j, lf, ll, lvl, m, ncc,&
                      nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, sqrei
           real(sp) :: alpha, beta
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -1_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -2_${ik}$
           end if
           m = n + sqre
           if( ldu<n ) then
              info = -6_${ik}$
           else if( ldvt<m ) then
              info = -8_${ik}$
           else if( smlsiz<3_${ik}$ ) then
              info = -9_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SLASD0', -info )
              return
           end if
           ! if the input matrix is too small, call stdlib${ii}$_slasdq to find the svd.
           if( n<=smlsiz ) then
              call stdlib${ii}$_slasdq( 'U', sqre, n, m, n, 0_${ik}$, d, e, vt, ldvt, u, ldu, u,ldu, work, &
                        info )
              return
           end if
           ! set up the computation tree.
           inode = 1_${ik}$
           ndiml = inode + n
           ndimr = ndiml + n
           idxq = ndimr + n
           iwk = idxq + n
           call stdlib${ii}$_slasdt( n, nlvl, nd, iwork( inode ), iwork( ndiml ),iwork( ndimr ), smlsiz &
                     )
           ! for the nodes on bottom level of the tree, solve
           ! their subproblems by stdlib${ii}$_slasdq.
           ndb1 = ( nd+1 ) / 2_${ik}$
           ncc = 0_${ik}$
           loop_30: do i = ndb1, nd
           ! ic : center row of each node
           ! nl : number of rows of left  subproblem
           ! nr : number of rows of right subproblem
           ! nlf: starting row of the left   subproblem
           ! nrf: starting row of the right  subproblem
              i1 = i - 1_${ik}$
              ic = iwork( inode+i1 )
              nl = iwork( ndiml+i1 )
              nlp1 = nl + 1_${ik}$
              nr = iwork( ndimr+i1 )
              nrp1 = nr + 1_${ik}$
              nlf = ic - nl
              nrf = ic + 1_${ik}$
              sqrei = 1_${ik}$
              call stdlib${ii}$_slasdq( 'U', sqrei, nl, nlp1, nl, ncc, d( nlf ), e( nlf ),vt( nlf, nlf )&
                        , ldvt, u( nlf, nlf ), ldu,u( nlf, nlf ), ldu, work, info )
              if( info/=0_${ik}$ ) then
                 return
              end if
              itemp = idxq + nlf - 2_${ik}$
              do j = 1, nl
                 iwork( itemp+j ) = j
              end do
              if( i==nd ) then
                 sqrei = sqre
              else
                 sqrei = 1_${ik}$
              end if
              nrp1 = nr + sqrei
              call stdlib${ii}$_slasdq( 'U', sqrei, nr, nrp1, nr, ncc, d( nrf ), e( nrf ),vt( nrf, nrf )&
                        , ldvt, u( nrf, nrf ), ldu,u( nrf, nrf ), ldu, work, info )
              if( info/=0_${ik}$ ) then
                 return
              end if
              itemp = idxq + ic
              do j = 1, nr
                 iwork( itemp+j-1 ) = j
              end do
           end do loop_30
           ! now conquer each subproblem bottom-up.
           loop_50: do lvl = nlvl, 1, -1
              ! find the first node lf and last node ll on the
              ! current level lvl.
              if( lvl==1_${ik}$ ) then
                 lf = 1_${ik}$
                 ll = 1_${ik}$
              else
                 lf = 2_${ik}$**( lvl-1 )
                 ll = 2_${ik}$*lf - 1_${ik}$
              end if
              do i = lf, ll
                 im1 = i - 1_${ik}$
                 ic = iwork( inode+im1 )
                 nl = iwork( ndiml+im1 )
                 nr = iwork( ndimr+im1 )
                 nlf = ic - nl
                 if( ( sqre==0_${ik}$ ) .and. ( i==ll ) ) then
                    sqrei = sqre
                 else
                    sqrei = 1_${ik}$
                 end if
                 idxqc = idxq + nlf - 1_${ik}$
                 alpha = d( ic )
                 beta = e( ic )
                 call stdlib${ii}$_slasd1( nl, nr, sqrei, d( nlf ), alpha, beta,u( nlf, nlf ), ldu, vt( &
                           nlf, nlf ), ldvt,iwork( idxqc ), iwork( iwk ), work, info )
           ! report the possible convergence failure.
                 if( info/=0_${ik}$ ) then
                    return
                 end if
              end do
           end do loop_50
           return
     end subroutine stdlib${ii}$_slasd0

     pure module subroutine stdlib${ii}$_dlasd0( n, sqre, d, e, u, ldu, vt, ldvt, smlsiz, iwork,work, info )
     !! Using a divide and conquer approach, DLASD0: computes the singular
     !! value decomposition (SVD) of a real upper bidiagonal N-by-M
     !! matrix B with diagonal D and offdiagonal E, where M = N + SQRE.
     !! The algorithm computes orthogonal matrices U and VT such that
     !! B = U * S * VT. The singular values S are overwritten on D.
     !! A related subroutine, DLASDA, computes only the singular values,
     !! and optionally, the singular vectors in compact form.
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ldu, ldvt, n, smlsiz, sqre
           ! Array Arguments 
           integer(${ik}$), intent(out) :: iwork(*)
           real(dp), intent(inout) :: d(*), e(*)
           real(dp), intent(out) :: u(ldu,*), vt(ldvt,*), work(*)
        ! =====================================================================
           ! Local Scalars 
           integer(${ik}$) :: i, i1, ic, idxq, idxqc, im1, inode, itemp, iwk, j, lf, ll, lvl, m, ncc,&
                      nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, sqrei
           real(dp) :: alpha, beta
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -1_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -2_${ik}$
           end if
           m = n + sqre
           if( ldu<n ) then
              info = -6_${ik}$
           else if( ldvt<m ) then
              info = -8_${ik}$
           else if( smlsiz<3_${ik}$ ) then
              info = -9_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD0', -info )
              return
           end if
           ! if the input matrix is too small, call stdlib${ii}$_dlasdq to find the svd.
           if( n<=smlsiz ) then
              call stdlib${ii}$_dlasdq( 'U', sqre, n, m, n, 0_${ik}$, d, e, vt, ldvt, u, ldu, u,ldu, work, &
                        info )
              return
           end if
           ! set up the computation tree.
           inode = 1_${ik}$
           ndiml = inode + n
           ndimr = ndiml + n
           idxq = ndimr + n
           iwk = idxq + n
           call stdlib${ii}$_dlasdt( n, nlvl, nd, iwork( inode ), iwork( ndiml ),iwork( ndimr ), smlsiz &
                     )
           ! for the nodes on bottom level of the tree, solve
           ! their subproblems by stdlib${ii}$_dlasdq.
           ndb1 = ( nd+1 ) / 2_${ik}$
           ncc = 0_${ik}$
           loop_30: do i = ndb1, nd
           ! ic : center row of each node
           ! nl : number of rows of left  subproblem
           ! nr : number of rows of right subproblem
           ! nlf: starting row of the left   subproblem
           ! nrf: starting row of the right  subproblem
              i1 = i - 1_${ik}$
              ic = iwork( inode+i1 )
              nl = iwork( ndiml+i1 )
              nlp1 = nl + 1_${ik}$
              nr = iwork( ndimr+i1 )
              nrp1 = nr + 1_${ik}$
              nlf = ic - nl
              nrf = ic + 1_${ik}$
              sqrei = 1_${ik}$
              call stdlib${ii}$_dlasdq( 'U', sqrei, nl, nlp1, nl, ncc, d( nlf ), e( nlf ),vt( nlf, nlf )&
                        , ldvt, u( nlf, nlf ), ldu,u( nlf, nlf ), ldu, work, info )
              if( info/=0_${ik}$ ) then
                 return
              end if
              itemp = idxq + nlf - 2_${ik}$
              do j = 1, nl
                 iwork( itemp+j ) = j
              end do
              if( i==nd ) then
                 sqrei = sqre
              else
                 sqrei = 1_${ik}$
              end if
              nrp1 = nr + sqrei
              call stdlib${ii}$_dlasdq( 'U', sqrei, nr, nrp1, nr, ncc, d( nrf ), e( nrf ),vt( nrf, nrf )&
                        , ldvt, u( nrf, nrf ), ldu,u( nrf, nrf ), ldu, work, info )
              if( info/=0_${ik}$ ) then
                 return
              end if
              itemp = idxq + ic
              do j = 1, nr
                 iwork( itemp+j-1 ) = j
              end do
           end do loop_30
           ! now conquer each subproblem bottom-up.
           loop_50: do lvl = nlvl, 1, -1
              ! find the first node lf and last node ll on the
              ! current level lvl.
              if( lvl==1_${ik}$ ) then
                 lf = 1_${ik}$
                 ll = 1_${ik}$
              else
                 lf = 2_${ik}$**( lvl-1 )
                 ll = 2_${ik}$*lf - 1_${ik}$
              end if
              do i = lf, ll
                 im1 = i - 1_${ik}$
                 ic = iwork( inode+im1 )
                 nl = iwork( ndiml+im1 )
                 nr = iwork( ndimr+im1 )
                 nlf = ic - nl
                 if( ( sqre==0_${ik}$ ) .and. ( i==ll ) ) then
                    sqrei = sqre
                 else
                    sqrei = 1_${ik}$
                 end if
                 idxqc = idxq + nlf - 1_${ik}$
                 alpha = d( ic )
                 beta = e( ic )
                 call stdlib${ii}$_dlasd1( nl, nr, sqrei, d( nlf ), alpha, beta,u( nlf, nlf ), ldu, vt( &
                           nlf, nlf ), ldvt,iwork( idxqc ), iwork( iwk ), work, info )
              ! report the possible convergence failure.
                 if( info/=0_${ik}$ ) then
                    return
                 end if
              end do
           end do loop_50
           return
     end subroutine stdlib${ii}$_dlasd0

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lasd0( n, sqre, d, e, u, ldu, vt, ldvt, smlsiz, iwork,work, info )
     !! Using a divide and conquer approach, DLASD0: computes the singular
     !! value decomposition (SVD) of a real upper bidiagonal N-by-M
     !! matrix B with diagonal D and offdiagonal E, where M = N + SQRE.
     !! The algorithm computes orthogonal matrices U and VT such that
     !! B = U * S * VT. The singular values S are overwritten on D.
     !! A related subroutine, DLASDA, computes only the singular values,
     !! and optionally, the singular vectors in compact form.
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ldu, ldvt, n, smlsiz, sqre
           ! Array Arguments 
           integer(${ik}$), intent(out) :: iwork(*)
           real(${rk}$), intent(inout) :: d(*), e(*)
           real(${rk}$), intent(out) :: u(ldu,*), vt(ldvt,*), work(*)
        ! =====================================================================
           ! Local Scalars 
           integer(${ik}$) :: i, i1, ic, idxq, idxqc, im1, inode, itemp, iwk, j, lf, ll, lvl, m, ncc,&
                      nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, sqrei
           real(${rk}$) :: alpha, beta
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -1_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -2_${ik}$
           end if
           m = n + sqre
           if( ldu<n ) then
              info = -6_${ik}$
           else if( ldvt<m ) then
              info = -8_${ik}$
           else if( smlsiz<3_${ik}$ ) then
              info = -9_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD0', -info )
              return
           end if
           ! if the input matrix is too small, call stdlib${ii}$_${ri}$lasdq to find the svd.
           if( n<=smlsiz ) then
              call stdlib${ii}$_${ri}$lasdq( 'U', sqre, n, m, n, 0_${ik}$, d, e, vt, ldvt, u, ldu, u,ldu, work, &
                        info )
              return
           end if
           ! set up the computation tree.
           inode = 1_${ik}$
           ndiml = inode + n
           ndimr = ndiml + n
           idxq = ndimr + n
           iwk = idxq + n
           call stdlib${ii}$_${ri}$lasdt( n, nlvl, nd, iwork( inode ), iwork( ndiml ),iwork( ndimr ), smlsiz &
                     )
           ! for the nodes on bottom level of the tree, solve
           ! their subproblems by stdlib${ii}$_${ri}$lasdq.
           ndb1 = ( nd+1 ) / 2_${ik}$
           ncc = 0_${ik}$
           loop_30: do i = ndb1, nd
           ! ic : center row of each node
           ! nl : number of rows of left  subproblem
           ! nr : number of rows of right subproblem
           ! nlf: starting row of the left   subproblem
           ! nrf: starting row of the right  subproblem
              i1 = i - 1_${ik}$
              ic = iwork( inode+i1 )
              nl = iwork( ndiml+i1 )
              nlp1 = nl + 1_${ik}$
              nr = iwork( ndimr+i1 )
              nrp1 = nr + 1_${ik}$
              nlf = ic - nl
              nrf = ic + 1_${ik}$
              sqrei = 1_${ik}$
              call stdlib${ii}$_${ri}$lasdq( 'U', sqrei, nl, nlp1, nl, ncc, d( nlf ), e( nlf ),vt( nlf, nlf )&
                        , ldvt, u( nlf, nlf ), ldu,u( nlf, nlf ), ldu, work, info )
              if( info/=0_${ik}$ ) then
                 return
              end if
              itemp = idxq + nlf - 2_${ik}$
              do j = 1, nl
                 iwork( itemp+j ) = j
              end do
              if( i==nd ) then
                 sqrei = sqre
              else
                 sqrei = 1_${ik}$
              end if
              nrp1 = nr + sqrei
              call stdlib${ii}$_${ri}$lasdq( 'U', sqrei, nr, nrp1, nr, ncc, d( nrf ), e( nrf ),vt( nrf, nrf )&
                        , ldvt, u( nrf, nrf ), ldu,u( nrf, nrf ), ldu, work, info )
              if( info/=0_${ik}$ ) then
                 return
              end if
              itemp = idxq + ic
              do j = 1, nr
                 iwork( itemp+j-1 ) = j
              end do
           end do loop_30
           ! now conquer each subproblem bottom-up.
           loop_50: do lvl = nlvl, 1, -1
              ! find the first node lf and last node ll on the
              ! current level lvl.
              if( lvl==1_${ik}$ ) then
                 lf = 1_${ik}$
                 ll = 1_${ik}$
              else
                 lf = 2_${ik}$**( lvl-1 )
                 ll = 2_${ik}$*lf - 1_${ik}$
              end if
              do i = lf, ll
                 im1 = i - 1_${ik}$
                 ic = iwork( inode+im1 )
                 nl = iwork( ndiml+im1 )
                 nr = iwork( ndimr+im1 )
                 nlf = ic - nl
                 if( ( sqre==0_${ik}$ ) .and. ( i==ll ) ) then
                    sqrei = sqre
                 else
                    sqrei = 1_${ik}$
                 end if
                 idxqc = idxq + nlf - 1_${ik}$
                 alpha = d( ic )
                 beta = e( ic )
                 call stdlib${ii}$_${ri}$lasd1( nl, nr, sqrei, d( nlf ), alpha, beta,u( nlf, nlf ), ldu, vt( &
                           nlf, nlf ), ldvt,iwork( idxqc ), iwork( iwk ), work, info )
              ! report the possible convergence failure.
                 if( info/=0_${ik}$ ) then
                    return
                 end if
              end do
           end do loop_50
           return
     end subroutine stdlib${ii}$_${ri}$lasd0

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slasdt( n, lvl, nd, inode, ndiml, ndimr, msub )
     !! SLASDT creates a tree of subproblems for bidiagonal divide and
     !! conquer.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: lvl, nd
           integer(${ik}$), intent(in) :: msub, n
           ! Array Arguments 
           integer(${ik}$), intent(out) :: inode(*), ndiml(*), ndimr(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, il, ir, llst, maxn, ncrnt, nlvl
           real(sp) :: temp
           ! Intrinsic Functions 
           ! Executable Statements 
           ! find the number of levels on the tree.
           maxn = max( 1_${ik}$, n )
           temp = log( real( maxn,KIND=sp) / real( msub+1,KIND=sp) ) / log( two )
           lvl = int( temp,KIND=${ik}$) + 1_${ik}$
           i = n / 2_${ik}$
           inode( 1_${ik}$ ) = i + 1_${ik}$
           ndiml( 1_${ik}$ ) = i
           ndimr( 1_${ik}$ ) = n - i - 1_${ik}$
           il = 0_${ik}$
           ir = 1_${ik}$
           llst = 1_${ik}$
           do nlvl = 1, lvl - 1
              ! constructing the tree at (nlvl+1)-st level. the number of
              ! nodes created on this level is llst * 2.
              do i = 0, llst - 1
                 il = il + 2_${ik}$
                 ir = ir + 2_${ik}$
                 ncrnt = llst + i
                 ndiml( il ) = ndiml( ncrnt ) / 2_${ik}$
                 ndimr( il ) = ndiml( ncrnt ) - ndiml( il ) - 1_${ik}$
                 inode( il ) = inode( ncrnt ) - ndimr( il ) - 1_${ik}$
                 ndiml( ir ) = ndimr( ncrnt ) / 2_${ik}$
                 ndimr( ir ) = ndimr( ncrnt ) - ndiml( ir ) - 1_${ik}$
                 inode( ir ) = inode( ncrnt ) + ndiml( ir ) + 1_${ik}$
              end do
              llst = llst*2_${ik}$
           end do
           nd = llst*2_${ik}$ - 1_${ik}$
           return
     end subroutine stdlib${ii}$_slasdt

     pure module subroutine stdlib${ii}$_dlasdt( n, lvl, nd, inode, ndiml, ndimr, msub )
     !! DLASDT creates a tree of subproblems for bidiagonal divide and
     !! conquer.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: lvl, nd
           integer(${ik}$), intent(in) :: msub, n
           ! Array Arguments 
           integer(${ik}$), intent(out) :: inode(*), ndiml(*), ndimr(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, il, ir, llst, maxn, ncrnt, nlvl
           real(dp) :: temp
           ! Intrinsic Functions 
           ! Executable Statements 
           ! find the number of levels on the tree.
           maxn = max( 1_${ik}$, n )
           temp = log( real( maxn,KIND=dp) / real( msub+1,KIND=dp) ) / log( two )
           lvl = int( temp,KIND=${ik}$) + 1_${ik}$
           i = n / 2_${ik}$
           inode( 1_${ik}$ ) = i + 1_${ik}$
           ndiml( 1_${ik}$ ) = i
           ndimr( 1_${ik}$ ) = n - i - 1_${ik}$
           il = 0_${ik}$
           ir = 1_${ik}$
           llst = 1_${ik}$
           do nlvl = 1, lvl - 1
              ! constructing the tree at (nlvl+1)-st level. the number of
              ! nodes created on this level is llst * 2.
              do i = 0, llst - 1
                 il = il + 2_${ik}$
                 ir = ir + 2_${ik}$
                 ncrnt = llst + i
                 ndiml( il ) = ndiml( ncrnt ) / 2_${ik}$
                 ndimr( il ) = ndiml( ncrnt ) - ndiml( il ) - 1_${ik}$
                 inode( il ) = inode( ncrnt ) - ndimr( il ) - 1_${ik}$
                 ndiml( ir ) = ndimr( ncrnt ) / 2_${ik}$
                 ndimr( ir ) = ndimr( ncrnt ) - ndiml( ir ) - 1_${ik}$
                 inode( ir ) = inode( ncrnt ) + ndiml( ir ) + 1_${ik}$
              end do
              llst = llst*2_${ik}$
           end do
           nd = llst*2_${ik}$ - 1_${ik}$
           return
     end subroutine stdlib${ii}$_dlasdt

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lasdt( n, lvl, nd, inode, ndiml, ndimr, msub )
     !! DLASDT: creates a tree of subproblems for bidiagonal divide and
     !! conquer.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: lvl, nd
           integer(${ik}$), intent(in) :: msub, n
           ! Array Arguments 
           integer(${ik}$), intent(out) :: inode(*), ndiml(*), ndimr(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, il, ir, llst, maxn, ncrnt, nlvl
           real(${rk}$) :: temp
           ! Intrinsic Functions 
           ! Executable Statements 
           ! find the number of levels on the tree.
           maxn = max( 1_${ik}$, n )
           temp = log( real( maxn,KIND=${rk}$) / real( msub+1,KIND=${rk}$) ) / log( two )
           lvl = int( temp,KIND=${ik}$) + 1_${ik}$
           i = n / 2_${ik}$
           inode( 1_${ik}$ ) = i + 1_${ik}$
           ndiml( 1_${ik}$ ) = i
           ndimr( 1_${ik}$ ) = n - i - 1_${ik}$
           il = 0_${ik}$
           ir = 1_${ik}$
           llst = 1_${ik}$
           do nlvl = 1, lvl - 1
              ! constructing the tree at (nlvl+1)-st level. the number of
              ! nodes created on this level is llst * 2.
              do i = 0, llst - 1
                 il = il + 2_${ik}$
                 ir = ir + 2_${ik}$
                 ncrnt = llst + i
                 ndiml( il ) = ndiml( ncrnt ) / 2_${ik}$
                 ndimr( il ) = ndiml( ncrnt ) - ndiml( il ) - 1_${ik}$
                 inode( il ) = inode( ncrnt ) - ndimr( il ) - 1_${ik}$
                 ndiml( ir ) = ndimr( ncrnt ) / 2_${ik}$
                 ndimr( ir ) = ndimr( ncrnt ) - ndiml( ir ) - 1_${ik}$
                 inode( ir ) = inode( ncrnt ) + ndiml( ir ) + 1_${ik}$
              end do
              llst = llst*2_${ik}$
           end do
           nd = llst*2_${ik}$ - 1_${ik}$
           return
     end subroutine stdlib${ii}$_${ri}$lasdt

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slasd1( nl, nr, sqre, d, alpha, beta, u, ldu, vt, ldvt,idxq, iwork, &
     !! SLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B,
     !! where N = NL + NR + 1 and M = N + SQRE. SLASD1 is called from SLASD0.
     !! A related subroutine SLASD7 handles the case in which the singular
     !! values (and the singular vectors in factored form) are desired.
     !! SLASD1 computes the SVD as follows:
     !! ( D1(in)    0    0       0 )
     !! B = U(in) * (   Z1**T   a   Z2**T    b ) * VT(in)
     !! (   0       0   D2(in)   0 )
     !! = U(out) * ( D(out) 0) * VT(out)
     !! where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M
     !! with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
     !! elsewhere; and the entry b is empty if SQRE = 0.
     !! The left singular vectors of the original matrix are stored in U, and
     !! the transpose of the right singular vectors are stored in VT, and the
     !! singular values are in D.  The algorithm consists of three stages:
     !! The first stage consists of deflating the size of the problem
     !! when there are multiple singular values or when there are zeros in
     !! the Z vector.  For each such occurrence the dimension of the
     !! secular equation problem is reduced by one.  This stage is
     !! performed by the routine SLASD2.
     !! The second stage consists of calculating the updated
     !! singular values. This is done by finding the square roots of the
     !! roots of the secular equation via the routine SLASD4 (as called
     !! by SLASD3). This routine also calculates the singular vectors of
     !! the current problem.
     !! The final stage consists of computing the updated singular vectors
     !! directly using the updated singular values.  The singular vectors
     !! for the current problem are multiplied with the singular vectors
     !! from the overall problem.
               work, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ldu, ldvt, nl, nr, sqre
           real(sp), intent(inout) :: alpha, beta
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: idxq(*)
           integer(${ik}$), intent(out) :: iwork(*)
           real(sp), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: coltyp, i, idx, idxc, idxp, iq, isigma, iu2, ivt2, iz, k, ldq, ldu2, &
                     ldvt2, m, n, n1, n2
           real(sp) :: orgnrm
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( nl<1_${ik}$ ) then
              info = -1_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -2_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -3_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SLASD1', -info )
              return
           end if
           n = nl + nr + 1_${ik}$
           m = n + sqre
           ! the following values are for bookkeeping purposes only.  they are
           ! integer pointers which indicate the portion of the workspace
           ! used by a particular array in stdlib${ii}$_slasd2 and stdlib${ii}$_slasd3.
           ldu2 = n
           ldvt2 = m
           iz = 1_${ik}$
           isigma = iz + m
           iu2 = isigma + n
           ivt2 = iu2 + ldu2*n
           iq = ivt2 + ldvt2*m
           idx = 1_${ik}$
           idxc = idx + n
           coltyp = idxc + n
           idxp = coltyp + n
           ! scale.
           orgnrm = max( abs( alpha ), abs( beta ) )
           d( nl+1 ) = zero
           do i = 1, n
              if( abs( d( i ) )>orgnrm ) then
                 orgnrm = abs( d( i ) )
              end if
           end do
           call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, orgnrm, one, n, 1_${ik}$, d, n, info )
           alpha = alpha / orgnrm
           beta = beta / orgnrm
           ! deflate singular values.
           call stdlib${ii}$_slasd2( nl, nr, sqre, k, d, work( iz ), alpha, beta, u, ldu,vt, ldvt, work(&
            isigma ), work( iu2 ), ldu2,work( ivt2 ), ldvt2, iwork( idxp ), iwork( idx ),iwork( &
                      idxc ), idxq, iwork( coltyp ), info )
           ! solve secular equation and update singular vectors.
           ldq = k
           call stdlib${ii}$_slasd3( nl, nr, sqre, k, d, work( iq ), ldq, work( isigma ),u, ldu, work( &
           iu2 ), ldu2, vt, ldvt, work( ivt2 ),ldvt2, iwork( idxc ), iwork( coltyp ), work( iz ),&
                     info )
           ! report the possible convergence failure.
           if( info/=0_${ik}$ ) then
              return
           end if
           ! unscale.
           call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, one, orgnrm, n, 1_${ik}$, d, n, info )
           ! prepare the idxq sorting permutation.
           n1 = k
           n2 = n - k
           call stdlib${ii}$_slamrg( n1, n2, d, 1_${ik}$, -1_${ik}$, idxq )
           return
     end subroutine stdlib${ii}$_slasd1

     pure module subroutine stdlib${ii}$_dlasd1( nl, nr, sqre, d, alpha, beta, u, ldu, vt, ldvt,idxq, iwork, &
     !! DLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B,
     !! where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0.
     !! A related subroutine DLASD7 handles the case in which the singular
     !! values (and the singular vectors in factored form) are desired.
     !! DLASD1 computes the SVD as follows:
     !! ( D1(in)    0    0       0 )
     !! B = U(in) * (   Z1**T   a   Z2**T    b ) * VT(in)
     !! (   0       0   D2(in)   0 )
     !! = U(out) * ( D(out) 0) * VT(out)
     !! where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M
     !! with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
     !! elsewhere; and the entry b is empty if SQRE = 0.
     !! The left singular vectors of the original matrix are stored in U, and
     !! the transpose of the right singular vectors are stored in VT, and the
     !! singular values are in D.  The algorithm consists of three stages:
     !! The first stage consists of deflating the size of the problem
     !! when there are multiple singular values or when there are zeros in
     !! the Z vector.  For each such occurrence the dimension of the
     !! secular equation problem is reduced by one.  This stage is
     !! performed by the routine DLASD2.
     !! The second stage consists of calculating the updated
     !! singular values. This is done by finding the square roots of the
     !! roots of the secular equation via the routine DLASD4 (as called
     !! by DLASD3). This routine also calculates the singular vectors of
     !! the current problem.
     !! The final stage consists of computing the updated singular vectors
     !! directly using the updated singular values.  The singular vectors
     !! for the current problem are multiplied with the singular vectors
     !! from the overall problem.
               work, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ldu, ldvt, nl, nr, sqre
           real(dp), intent(inout) :: alpha, beta
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: idxq(*)
           integer(${ik}$), intent(out) :: iwork(*)
           real(dp), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: coltyp, i, idx, idxc, idxp, iq, isigma, iu2, ivt2, iz, k, ldq, ldu2, &
                     ldvt2, m, n, n1, n2
           real(dp) :: orgnrm
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( nl<1_${ik}$ ) then
              info = -1_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -2_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -3_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD1', -info )
              return
           end if
           n = nl + nr + 1_${ik}$
           m = n + sqre
           ! the following values are for bookkeeping purposes only.  they are
           ! integer pointers which indicate the portion of the workspace
           ! used by a particular array in stdlib${ii}$_dlasd2 and stdlib${ii}$_dlasd3.
           ldu2 = n
           ldvt2 = m
           iz = 1_${ik}$
           isigma = iz + m
           iu2 = isigma + n
           ivt2 = iu2 + ldu2*n
           iq = ivt2 + ldvt2*m
           idx = 1_${ik}$
           idxc = idx + n
           coltyp = idxc + n
           idxp = coltyp + n
           ! scale.
           orgnrm = max( abs( alpha ), abs( beta ) )
           d( nl+1 ) = zero
           do i = 1, n
              if( abs( d( i ) )>orgnrm ) then
                 orgnrm = abs( d( i ) )
              end if
           end do
           call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, orgnrm, one, n, 1_${ik}$, d, n, info )
           alpha = alpha / orgnrm
           beta = beta / orgnrm
           ! deflate singular values.
           call stdlib${ii}$_dlasd2( nl, nr, sqre, k, d, work( iz ), alpha, beta, u, ldu,vt, ldvt, work(&
            isigma ), work( iu2 ), ldu2,work( ivt2 ), ldvt2, iwork( idxp ), iwork( idx ),iwork( &
                      idxc ), idxq, iwork( coltyp ), info )
           ! solve secular equation and update singular vectors.
           ldq = k
           call stdlib${ii}$_dlasd3( nl, nr, sqre, k, d, work( iq ), ldq, work( isigma ),u, ldu, work( &
           iu2 ), ldu2, vt, ldvt, work( ivt2 ),ldvt2, iwork( idxc ), iwork( coltyp ), work( iz ),&
                     info )
           ! report the convergence failure.
           if( info/=0_${ik}$ ) then
              return
           end if
           ! unscale.
           call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, one, orgnrm, n, 1_${ik}$, d, n, info )
           ! prepare the idxq sorting permutation.
           n1 = k
           n2 = n - k
           call stdlib${ii}$_dlamrg( n1, n2, d, 1_${ik}$, -1_${ik}$, idxq )
           return
     end subroutine stdlib${ii}$_dlasd1

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lasd1( nl, nr, sqre, d, alpha, beta, u, ldu, vt, ldvt,idxq, iwork, &
     !! DLASD1: computes the SVD of an upper bidiagonal N-by-M matrix B,
     !! where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0.
     !! A related subroutine DLASD7 handles the case in which the singular
     !! values (and the singular vectors in factored form) are desired.
     !! DLASD1 computes the SVD as follows:
     !! ( D1(in)    0    0       0 )
     !! B = U(in) * (   Z1**T   a   Z2**T    b ) * VT(in)
     !! (   0       0   D2(in)   0 )
     !! = U(out) * ( D(out) 0) * VT(out)
     !! where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M
     !! with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
     !! elsewhere; and the entry b is empty if SQRE = 0.
     !! The left singular vectors of the original matrix are stored in U, and
     !! the transpose of the right singular vectors are stored in VT, and the
     !! singular values are in D.  The algorithm consists of three stages:
     !! The first stage consists of deflating the size of the problem
     !! when there are multiple singular values or when there are zeros in
     !! the Z vector.  For each such occurrence the dimension of the
     !! secular equation problem is reduced by one.  This stage is
     !! performed by the routine DLASD2.
     !! The second stage consists of calculating the updated
     !! singular values. This is done by finding the square roots of the
     !! roots of the secular equation via the routine DLASD4 (as called
     !! by DLASD3). This routine also calculates the singular vectors of
     !! the current problem.
     !! The final stage consists of computing the updated singular vectors
     !! directly using the updated singular values.  The singular vectors
     !! for the current problem are multiplied with the singular vectors
     !! from the overall problem.
               work, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ldu, ldvt, nl, nr, sqre
           real(${rk}$), intent(inout) :: alpha, beta
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: idxq(*)
           integer(${ik}$), intent(out) :: iwork(*)
           real(${rk}$), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: coltyp, i, idx, idxc, idxp, iq, isigma, iu2, ivt2, iz, k, ldq, ldu2, &
                     ldvt2, m, n, n1, n2
           real(${rk}$) :: orgnrm
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( nl<1_${ik}$ ) then
              info = -1_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -2_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -3_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD1', -info )
              return
           end if
           n = nl + nr + 1_${ik}$
           m = n + sqre
           ! the following values are for bookkeeping purposes only.  they are
           ! integer pointers which indicate the portion of the workspace
           ! used by a particular array in stdlib${ii}$_${ri}$lasd2 and stdlib${ii}$_${ri}$lasd3.
           ldu2 = n
           ldvt2 = m
           iz = 1_${ik}$
           isigma = iz + m
           iu2 = isigma + n
           ivt2 = iu2 + ldu2*n
           iq = ivt2 + ldvt2*m
           idx = 1_${ik}$
           idxc = idx + n
           coltyp = idxc + n
           idxp = coltyp + n
           ! scale.
           orgnrm = max( abs( alpha ), abs( beta ) )
           d( nl+1 ) = zero
           do i = 1, n
              if( abs( d( i ) )>orgnrm ) then
                 orgnrm = abs( d( i ) )
              end if
           end do
           call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, orgnrm, one, n, 1_${ik}$, d, n, info )
           alpha = alpha / orgnrm
           beta = beta / orgnrm
           ! deflate singular values.
           call stdlib${ii}$_${ri}$lasd2( nl, nr, sqre, k, d, work( iz ), alpha, beta, u, ldu,vt, ldvt, work(&
            isigma ), work( iu2 ), ldu2,work( ivt2 ), ldvt2, iwork( idxp ), iwork( idx ),iwork( &
                      idxc ), idxq, iwork( coltyp ), info )
           ! solve secular equation and update singular vectors.
           ldq = k
           call stdlib${ii}$_${ri}$lasd3( nl, nr, sqre, k, d, work( iq ), ldq, work( isigma ),u, ldu, work( &
           iu2 ), ldu2, vt, ldvt, work( ivt2 ),ldvt2, iwork( idxc ), iwork( coltyp ), work( iz ),&
                     info )
           ! report the convergence failure.
           if( info/=0_${ik}$ ) then
              return
           end if
           ! unscale.
           call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, orgnrm, n, 1_${ik}$, d, n, info )
           ! prepare the idxq sorting permutation.
           n1 = k
           n2 = n - k
           call stdlib${ii}$_${ri}$lamrg( n1, n2, d, 1_${ik}$, -1_${ik}$, idxq )
           return
     end subroutine stdlib${ii}$_${ri}$lasd1

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slasd2( nl, nr, sqre, k, d, z, alpha, beta, u, ldu, vt,ldvt, dsigma, &
     !! SLASD2 merges the two sets of singular values together into a single
     !! sorted set.  Then it tries to deflate the size of the problem.
     !! There are two ways in which deflation can occur:  when two or more
     !! singular values are close together or if there is a tiny entry in the
     !! Z vector.  For each such occurrence the order of the related secular
     !! equation problem is reduced by one.
     !! SLASD2 is called from SLASD1.
               u2, ldu2, vt2, ldvt2, idxp, idx,idxc, idxq, coltyp, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, k
           integer(${ik}$), intent(in) :: ldu, ldu2, ldvt, ldvt2, nl, nr, sqre
           real(sp), intent(in) :: alpha, beta
           ! Array Arguments 
           integer(${ik}$), intent(out) :: coltyp(*), idx(*), idxc(*), idxp(*)
           integer(${ik}$), intent(inout) :: idxq(*)
           real(sp), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*)
           real(sp), intent(out) :: dsigma(*), u2(ldu2,*), vt2(ldvt2,*), z(*)
        ! =====================================================================
           
           ! Local Arrays 
           integer(${ik}$) :: ctot(4_${ik}$), psm(4_${ik}$)
           ! Local Scalars 
           integer(${ik}$) :: ct, i, idxi, idxj, idxjp, j, jp, jprev, k2, m, n, nlp1, nlp2
           real(sp) :: c, eps, hlftol, s, tau, tol, z1
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( nl<1_${ik}$ ) then
              info = -1_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -2_${ik}$
           else if( ( sqre/=1_${ik}$ ) .and. ( sqre/=0_${ik}$ ) ) then
              info = -3_${ik}$
           end if
           n = nl + nr + 1_${ik}$
           m = n + sqre
           if( ldu<n ) then
              info = -10_${ik}$
           else if( ldvt<m ) then
              info = -12_${ik}$
           else if( ldu2<n ) then
              info = -15_${ik}$
           else if( ldvt2<m ) then
              info = -17_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SLASD2', -info )
              return
           end if
           nlp1 = nl + 1_${ik}$
           nlp2 = nl + 2_${ik}$
           ! generate the first part of the vector z; and move the singular
           ! values in the first part of d one position backward.
           z1 = alpha*vt( nlp1, nlp1 )
           z( 1_${ik}$ ) = z1
           do i = nl, 1, -1
              z( i+1 ) = alpha*vt( i, nlp1 )
              d( i+1 ) = d( i )
              idxq( i+1 ) = idxq( i ) + 1_${ik}$
           end do
           ! generate the second part of the vector z.
           do i = nlp2, m
              z( i ) = beta*vt( i, nlp2 )
           end do
           ! initialize some reference arrays.
           do i = 2, nlp1
              coltyp( i ) = 1_${ik}$
           end do
           do i = nlp2, n
              coltyp( i ) = 2_${ik}$
           end do
           ! sort the singular values into increasing order
           do i = nlp2, n
              idxq( i ) = idxq( i ) + nlp1
           end do
           ! dsigma, idxc, idxc, and the first column of u2
           ! are used as storage space.
           do i = 2, n
              dsigma( i ) = d( idxq( i ) )
              u2( i, 1_${ik}$ ) = z( idxq( i ) )
              idxc( i ) = coltyp( idxq( i ) )
           end do
           call stdlib${ii}$_slamrg( nl, nr, dsigma( 2_${ik}$ ), 1_${ik}$, 1_${ik}$, idx( 2_${ik}$ ) )
           do i = 2, n
              idxi = 1_${ik}$ + idx( i )
              d( i ) = dsigma( idxi )
              z( i ) = u2( idxi, 1_${ik}$ )
              coltyp( i ) = idxc( idxi )
           end do
           ! calculate the allowable deflation tolerance
           eps = stdlib${ii}$_slamch( 'EPSILON' )
           tol = max( abs( alpha ), abs( beta ) )
           tol = eight*eps*max( abs( d( n ) ), tol )
           ! there are 2 kinds of deflation -- first a value in the z-vector
           ! is small, second two (or more) singular values are very close
           ! together (their difference is small).
           ! if the value in the z-vector is small, we simply permute the
           ! array so that the corresponding singular value is moved to the
           ! end.
           ! if two values in the d-vector are close, we perform a two-sided
           ! rotation designed to make one of the corresponding z-vector
           ! entries zero, and then permute the array so that the deflated
           ! singular value is moved to the end.
           ! if there are multiple singular values then the problem deflates.
           ! here the number of equal singular values are found.  as each equal
           ! singular value is found, an elementary reflector is computed to
           ! rotate the corresponding singular subspace so that the
           ! corresponding components of z are zero in this new basis.
           k = 1_${ik}$
           k2 = n + 1_${ik}$
           do j = 2, n
              if( abs( z( j ) )<=tol ) then
                 ! deflate due to small z component.
                 k2 = k2 - 1_${ik}$
                 idxp( k2 ) = j
                 coltyp( j ) = 4_${ik}$
                 if( j==n )go to 120
              else
                 jprev = j
                 go to 90
              end if
           end do
           90 continue
           j = jprev
           100 continue
           j = j + 1_${ik}$
           if( j>n )go to 110
           if( abs( z( j ) )<=tol ) then
              ! deflate due to small z component.
              k2 = k2 - 1_${ik}$
              idxp( k2 ) = j
              coltyp( j ) = 4_${ik}$
           else
              ! check if singular values are close enough to allow deflation.
              if( abs( d( j )-d( jprev ) )<=tol ) then
                 ! deflation is possible.
                 s = z( jprev )
                 c = z( j )
                 ! find sqrt(a**2+b**2) without overflow or
                 ! destructive underflow.
                 tau = stdlib${ii}$_slapy2( c, s )
                 c = c / tau
                 s = -s / tau
                 z( j ) = tau
                 z( jprev ) = zero
                 ! apply back the givens rotation to the left and right
                 ! singular vector matrices.
                 idxjp = idxq( idx( jprev )+1_${ik}$ )
                 idxj = idxq( idx( j )+1_${ik}$ )
                 if( idxjp<=nlp1 ) then
                    idxjp = idxjp - 1_${ik}$
                 end if
                 if( idxj<=nlp1 ) then
                    idxj = idxj - 1_${ik}$
                 end if
                 call stdlib${ii}$_srot( n, u( 1_${ik}$, idxjp ), 1_${ik}$, u( 1_${ik}$, idxj ), 1_${ik}$, c, s )
                 call stdlib${ii}$_srot( m, vt( idxjp, 1_${ik}$ ), ldvt, vt( idxj, 1_${ik}$ ), ldvt, c,s )
                 if( coltyp( j )/=coltyp( jprev ) ) then
                    coltyp( j ) = 3_${ik}$
                 end if
                 coltyp( jprev ) = 4_${ik}$
                 k2 = k2 - 1_${ik}$
                 idxp( k2 ) = jprev
                 jprev = j
              else
                 k = k + 1_${ik}$
                 u2( k, 1_${ik}$ ) = z( jprev )
                 dsigma( k ) = d( jprev )
                 idxp( k ) = jprev
                 jprev = j
              end if
           end if
           go to 100
           110 continue
           ! record the last singular value.
           k = k + 1_${ik}$
           u2( k, 1_${ik}$ ) = z( jprev )
           dsigma( k ) = d( jprev )
           idxp( k ) = jprev
           120 continue
           ! count up the total number of the various types of columns, then
           ! form a permutation which positions the four column types into
           ! four groups of uniform structure (although one or more of these
           ! groups may be empty).
           do j = 1, 4
              ctot( j ) = 0_${ik}$
           end do
           do j = 2, n
              ct = coltyp( j )
              ctot( ct ) = ctot( ct ) + 1_${ik}$
           end do
           ! psm(*) = position in submatrix (of types 1 through 4)
           psm( 1_${ik}$ ) = 2_${ik}$
           psm( 2_${ik}$ ) = 2_${ik}$ + ctot( 1_${ik}$ )
           psm( 3_${ik}$ ) = psm( 2_${ik}$ ) + ctot( 2_${ik}$ )
           psm( 4_${ik}$ ) = psm( 3_${ik}$ ) + ctot( 3_${ik}$ )
           ! fill out the idxc array so that the permutation which it induces
           ! will place all type-1 columns first, all type-2 columns next,
           ! then all type-3's, and finally all type-4's, starting from the
           ! second column. this applies similarly to the rows of vt.
           do j = 2, n
              jp = idxp( j )
              ct = coltyp( jp )
              idxc( psm( ct ) ) = j
              psm( ct ) = psm( ct ) + 1_${ik}$
           end do
           ! sort the singular values and corresponding singular vectors into
           ! dsigma, u2, and vt2 respectively.  the singular values/vectors
           ! which were not deflated go into the first k slots of dsigma, u2,
           ! and vt2 respectively, while those which were deflated go into the
           ! last n - k slots, except that the first column/row will be treated
           ! separately.
           do j = 2, n
              jp = idxp( j )
              dsigma( j ) = d( jp )
              idxj = idxq( idx( idxp( idxc( j ) ) )+1_${ik}$ )
              if( idxj<=nlp1 ) then
                 idxj = idxj - 1_${ik}$
              end if
              call stdlib${ii}$_scopy( n, u( 1_${ik}$, idxj ), 1_${ik}$, u2( 1_${ik}$, j ), 1_${ik}$ )
              call stdlib${ii}$_scopy( m, vt( idxj, 1_${ik}$ ), ldvt, vt2( j, 1_${ik}$ ), ldvt2 )
           end do
           ! determine dsigma(1), dsigma(2) and z(1)
           dsigma( 1_${ik}$ ) = zero
           hlftol = tol / two
           if( abs( dsigma( 2_${ik}$ ) )<=hlftol )dsigma( 2_${ik}$ ) = hlftol
           if( m>n ) then
              z( 1_${ik}$ ) = stdlib${ii}$_slapy2( z1, z( m ) )
              if( z( 1_${ik}$ )<=tol ) then
                 c = one
                 s = zero
                 z( 1_${ik}$ ) = tol
              else
                 c = z1 / z( 1_${ik}$ )
                 s = z( m ) / z( 1_${ik}$ )
              end if
           else
              if( abs( z1 )<=tol ) then
                 z( 1_${ik}$ ) = tol
              else
                 z( 1_${ik}$ ) = z1
              end if
           end if
           ! move the rest of the updating row to z.
           call stdlib${ii}$_scopy( k-1, u2( 2_${ik}$, 1_${ik}$ ), 1_${ik}$, z( 2_${ik}$ ), 1_${ik}$ )
           ! determine the first column of u2, the first row of vt2 and the
           ! last row of vt.
           call stdlib${ii}$_slaset( 'A', n, 1_${ik}$, zero, zero, u2, ldu2 )
           u2( nlp1, 1_${ik}$ ) = one
           if( m>n ) then
              do i = 1, nlp1
                 vt( m, i ) = -s*vt( nlp1, i )
                 vt2( 1_${ik}$, i ) = c*vt( nlp1, i )
              end do
              do i = nlp2, m
                 vt2( 1_${ik}$, i ) = s*vt( m, i )
                 vt( m, i ) = c*vt( m, i )
              end do
           else
              call stdlib${ii}$_scopy( m, vt( nlp1, 1_${ik}$ ), ldvt, vt2( 1_${ik}$, 1_${ik}$ ), ldvt2 )
           end if
           if( m>n ) then
              call stdlib${ii}$_scopy( m, vt( m, 1_${ik}$ ), ldvt, vt2( m, 1_${ik}$ ), ldvt2 )
           end if
           ! the deflated singular values and their corresponding vectors go
           ! into the back of d, u, and v respectively.
           if( n>k ) then
              call stdlib${ii}$_scopy( n-k, dsigma( k+1 ), 1_${ik}$, d( k+1 ), 1_${ik}$ )
              call stdlib${ii}$_slacpy( 'A', n, n-k, u2( 1_${ik}$, k+1 ), ldu2, u( 1_${ik}$, k+1 ),ldu )
              call stdlib${ii}$_slacpy( 'A', n-k, m, vt2( k+1, 1_${ik}$ ), ldvt2, vt( k+1, 1_${ik}$ ),ldvt )
           end if
           ! copy ctot into coltyp for referencing in stdlib${ii}$_slasd3.
           do j = 1, 4
              coltyp( j ) = ctot( j )
           end do
           return
     end subroutine stdlib${ii}$_slasd2

     pure module subroutine stdlib${ii}$_dlasd2( nl, nr, sqre, k, d, z, alpha, beta, u, ldu, vt,ldvt, dsigma, &
     !! DLASD2 merges the two sets of singular values together into a single
     !! sorted set.  Then it tries to deflate the size of the problem.
     !! There are two ways in which deflation can occur:  when two or more
     !! singular values are close together or if there is a tiny entry in the
     !! Z vector.  For each such occurrence the order of the related secular
     !! equation problem is reduced by one.
     !! DLASD2 is called from DLASD1.
               u2, ldu2, vt2, ldvt2, idxp, idx,idxc, idxq, coltyp, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, k
           integer(${ik}$), intent(in) :: ldu, ldu2, ldvt, ldvt2, nl, nr, sqre
           real(dp), intent(in) :: alpha, beta
           ! Array Arguments 
           integer(${ik}$), intent(out) :: coltyp(*), idx(*), idxc(*), idxp(*)
           integer(${ik}$), intent(inout) :: idxq(*)
           real(dp), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*)
           real(dp), intent(out) :: dsigma(*), u2(ldu2,*), vt2(ldvt2,*), z(*)
        ! =====================================================================
           
           ! Local Arrays 
           integer(${ik}$) :: ctot(4_${ik}$), psm(4_${ik}$)
           ! Local Scalars 
           integer(${ik}$) :: ct, i, idxi, idxj, idxjp, j, jp, jprev, k2, m, n, nlp1, nlp2
           real(dp) :: c, eps, hlftol, s, tau, tol, z1
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( nl<1_${ik}$ ) then
              info = -1_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -2_${ik}$
           else if( ( sqre/=1_${ik}$ ) .and. ( sqre/=0_${ik}$ ) ) then
              info = -3_${ik}$
           end if
           n = nl + nr + 1_${ik}$
           m = n + sqre
           if( ldu<n ) then
              info = -10_${ik}$
           else if( ldvt<m ) then
              info = -12_${ik}$
           else if( ldu2<n ) then
              info = -15_${ik}$
           else if( ldvt2<m ) then
              info = -17_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD2', -info )
              return
           end if
           nlp1 = nl + 1_${ik}$
           nlp2 = nl + 2_${ik}$
           ! generate the first part of the vector z; and move the singular
           ! values in the first part of d one position backward.
           z1 = alpha*vt( nlp1, nlp1 )
           z( 1_${ik}$ ) = z1
           do i = nl, 1, -1
              z( i+1 ) = alpha*vt( i, nlp1 )
              d( i+1 ) = d( i )
              idxq( i+1 ) = idxq( i ) + 1_${ik}$
           end do
           ! generate the second part of the vector z.
           do i = nlp2, m
              z( i ) = beta*vt( i, nlp2 )
           end do
           ! initialize some reference arrays.
           do i = 2, nlp1
              coltyp( i ) = 1_${ik}$
           end do
           do i = nlp2, n
              coltyp( i ) = 2_${ik}$
           end do
           ! sort the singular values into increasing order
           do i = nlp2, n
              idxq( i ) = idxq( i ) + nlp1
           end do
           ! dsigma, idxc, idxc, and the first column of u2
           ! are used as storage space.
           do i = 2, n
              dsigma( i ) = d( idxq( i ) )
              u2( i, 1_${ik}$ ) = z( idxq( i ) )
              idxc( i ) = coltyp( idxq( i ) )
           end do
           call stdlib${ii}$_dlamrg( nl, nr, dsigma( 2_${ik}$ ), 1_${ik}$, 1_${ik}$, idx( 2_${ik}$ ) )
           do i = 2, n
              idxi = 1_${ik}$ + idx( i )
              d( i ) = dsigma( idxi )
              z( i ) = u2( idxi, 1_${ik}$ )
              coltyp( i ) = idxc( idxi )
           end do
           ! calculate the allowable deflation tolerance
           eps = stdlib${ii}$_dlamch( 'EPSILON' )
           tol = max( abs( alpha ), abs( beta ) )
           tol = eight*eps*max( abs( d( n ) ), tol )
           ! there are 2 kinds of deflation -- first a value in the z-vector
           ! is small, second two (or more) singular values are very close
           ! together (their difference is small).
           ! if the value in the z-vector is small, we simply permute the
           ! array so that the corresponding singular value is moved to the
           ! end.
           ! if two values in the d-vector are close, we perform a two-sided
           ! rotation designed to make one of the corresponding z-vector
           ! entries zero, and then permute the array so that the deflated
           ! singular value is moved to the end.
           ! if there are multiple singular values then the problem deflates.
           ! here the number of equal singular values are found.  as each equal
           ! singular value is found, an elementary reflector is computed to
           ! rotate the corresponding singular subspace so that the
           ! corresponding components of z are zero in this new basis.
           k = 1_${ik}$
           k2 = n + 1_${ik}$
           do j = 2, n
              if( abs( z( j ) )<=tol ) then
                 ! deflate due to small z component.
                 k2 = k2 - 1_${ik}$
                 idxp( k2 ) = j
                 coltyp( j ) = 4_${ik}$
                 if( j==n )go to 120
              else
                 jprev = j
                 go to 90
              end if
           end do
           90 continue
           j = jprev
           100 continue
           j = j + 1_${ik}$
           if( j>n )go to 110
           if( abs( z( j ) )<=tol ) then
              ! deflate due to small z component.
              k2 = k2 - 1_${ik}$
              idxp( k2 ) = j
              coltyp( j ) = 4_${ik}$
           else
              ! check if singular values are close enough to allow deflation.
              if( abs( d( j )-d( jprev ) )<=tol ) then
                 ! deflation is possible.
                 s = z( jprev )
                 c = z( j )
                 ! find sqrt(a**2+b**2) without overflow or
                 ! destructive underflow.
                 tau = stdlib${ii}$_dlapy2( c, s )
                 c = c / tau
                 s = -s / tau
                 z( j ) = tau
                 z( jprev ) = zero
                 ! apply back the givens rotation to the left and right
                 ! singular vector matrices.
                 idxjp = idxq( idx( jprev )+1_${ik}$ )
                 idxj = idxq( idx( j )+1_${ik}$ )
                 if( idxjp<=nlp1 ) then
                    idxjp = idxjp - 1_${ik}$
                 end if
                 if( idxj<=nlp1 ) then
                    idxj = idxj - 1_${ik}$
                 end if
                 call stdlib${ii}$_drot( n, u( 1_${ik}$, idxjp ), 1_${ik}$, u( 1_${ik}$, idxj ), 1_${ik}$, c, s )
                 call stdlib${ii}$_drot( m, vt( idxjp, 1_${ik}$ ), ldvt, vt( idxj, 1_${ik}$ ), ldvt, c,s )
                 if( coltyp( j )/=coltyp( jprev ) ) then
                    coltyp( j ) = 3_${ik}$
                 end if
                 coltyp( jprev ) = 4_${ik}$
                 k2 = k2 - 1_${ik}$
                 idxp( k2 ) = jprev
                 jprev = j
              else
                 k = k + 1_${ik}$
                 u2( k, 1_${ik}$ ) = z( jprev )
                 dsigma( k ) = d( jprev )
                 idxp( k ) = jprev
                 jprev = j
              end if
           end if
           go to 100
           110 continue
           ! record the last singular value.
           k = k + 1_${ik}$
           u2( k, 1_${ik}$ ) = z( jprev )
           dsigma( k ) = d( jprev )
           idxp( k ) = jprev
           120 continue
           ! count up the total number of the various types of columns, then
           ! form a permutation which positions the four column types into
           ! four groups of uniform structure (although one or more of these
           ! groups may be empty).
           do j = 1, 4
              ctot( j ) = 0_${ik}$
           end do
           do j = 2, n
              ct = coltyp( j )
              ctot( ct ) = ctot( ct ) + 1_${ik}$
           end do
           ! psm(*) = position in submatrix (of types 1 through 4)
           psm( 1_${ik}$ ) = 2_${ik}$
           psm( 2_${ik}$ ) = 2_${ik}$ + ctot( 1_${ik}$ )
           psm( 3_${ik}$ ) = psm( 2_${ik}$ ) + ctot( 2_${ik}$ )
           psm( 4_${ik}$ ) = psm( 3_${ik}$ ) + ctot( 3_${ik}$ )
           ! fill out the idxc array so that the permutation which it induces
           ! will place all type-1 columns first, all type-2 columns next,
           ! then all type-3's, and finally all type-4's, starting from the
           ! second column. this applies similarly to the rows of vt.
           do j = 2, n
              jp = idxp( j )
              ct = coltyp( jp )
              idxc( psm( ct ) ) = j
              psm( ct ) = psm( ct ) + 1_${ik}$
           end do
           ! sort the singular values and corresponding singular vectors into
           ! dsigma, u2, and vt2 respectively.  the singular values/vectors
           ! which were not deflated go into the first k slots of dsigma, u2,
           ! and vt2 respectively, while those which were deflated go into the
           ! last n - k slots, except that the first column/row will be treated
           ! separately.
           do j = 2, n
              jp = idxp( j )
              dsigma( j ) = d( jp )
              idxj = idxq( idx( idxp( idxc( j ) ) )+1_${ik}$ )
              if( idxj<=nlp1 ) then
                 idxj = idxj - 1_${ik}$
              end if
              call stdlib${ii}$_dcopy( n, u( 1_${ik}$, idxj ), 1_${ik}$, u2( 1_${ik}$, j ), 1_${ik}$ )
              call stdlib${ii}$_dcopy( m, vt( idxj, 1_${ik}$ ), ldvt, vt2( j, 1_${ik}$ ), ldvt2 )
           end do
           ! determine dsigma(1), dsigma(2) and z(1)
           dsigma( 1_${ik}$ ) = zero
           hlftol = tol / two
           if( abs( dsigma( 2_${ik}$ ) )<=hlftol )dsigma( 2_${ik}$ ) = hlftol
           if( m>n ) then
              z( 1_${ik}$ ) = stdlib${ii}$_dlapy2( z1, z( m ) )
              if( z( 1_${ik}$ )<=tol ) then
                 c = one
                 s = zero
                 z( 1_${ik}$ ) = tol
              else
                 c = z1 / z( 1_${ik}$ )
                 s = z( m ) / z( 1_${ik}$ )
              end if
           else
              if( abs( z1 )<=tol ) then
                 z( 1_${ik}$ ) = tol
              else
                 z( 1_${ik}$ ) = z1
              end if
           end if
           ! move the rest of the updating row to z.
           call stdlib${ii}$_dcopy( k-1, u2( 2_${ik}$, 1_${ik}$ ), 1_${ik}$, z( 2_${ik}$ ), 1_${ik}$ )
           ! determine the first column of u2, the first row of vt2 and the
           ! last row of vt.
           call stdlib${ii}$_dlaset( 'A', n, 1_${ik}$, zero, zero, u2, ldu2 )
           u2( nlp1, 1_${ik}$ ) = one
           if( m>n ) then
              do i = 1, nlp1
                 vt( m, i ) = -s*vt( nlp1, i )
                 vt2( 1_${ik}$, i ) = c*vt( nlp1, i )
              end do
              do i = nlp2, m
                 vt2( 1_${ik}$, i ) = s*vt( m, i )
                 vt( m, i ) = c*vt( m, i )
              end do
           else
              call stdlib${ii}$_dcopy( m, vt( nlp1, 1_${ik}$ ), ldvt, vt2( 1_${ik}$, 1_${ik}$ ), ldvt2 )
           end if
           if( m>n ) then
              call stdlib${ii}$_dcopy( m, vt( m, 1_${ik}$ ), ldvt, vt2( m, 1_${ik}$ ), ldvt2 )
           end if
           ! the deflated singular values and their corresponding vectors go
           ! into the back of d, u, and v respectively.
           if( n>k ) then
              call stdlib${ii}$_dcopy( n-k, dsigma( k+1 ), 1_${ik}$, d( k+1 ), 1_${ik}$ )
              call stdlib${ii}$_dlacpy( 'A', n, n-k, u2( 1_${ik}$, k+1 ), ldu2, u( 1_${ik}$, k+1 ),ldu )
              call stdlib${ii}$_dlacpy( 'A', n-k, m, vt2( k+1, 1_${ik}$ ), ldvt2, vt( k+1, 1_${ik}$ ),ldvt )
           end if
           ! copy ctot into coltyp for referencing in stdlib${ii}$_dlasd3.
           do j = 1, 4
              coltyp( j ) = ctot( j )
           end do
           return
     end subroutine stdlib${ii}$_dlasd2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lasd2( nl, nr, sqre, k, d, z, alpha, beta, u, ldu, vt,ldvt, dsigma, &
     !! DLASD2: merges the two sets of singular values together into a single
     !! sorted set.  Then it tries to deflate the size of the problem.
     !! There are two ways in which deflation can occur:  when two or more
     !! singular values are close together or if there is a tiny entry in the
     !! Z vector.  For each such occurrence the order of the related secular
     !! equation problem is reduced by one.
     !! DLASD2 is called from DLASD1.
               u2, ldu2, vt2, ldvt2, idxp, idx,idxc, idxq, coltyp, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, k
           integer(${ik}$), intent(in) :: ldu, ldu2, ldvt, ldvt2, nl, nr, sqre
           real(${rk}$), intent(in) :: alpha, beta
           ! Array Arguments 
           integer(${ik}$), intent(out) :: coltyp(*), idx(*), idxc(*), idxp(*)
           integer(${ik}$), intent(inout) :: idxq(*)
           real(${rk}$), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*)
           real(${rk}$), intent(out) :: dsigma(*), u2(ldu2,*), vt2(ldvt2,*), z(*)
        ! =====================================================================
           
           ! Local Arrays 
           integer(${ik}$) :: ctot(4_${ik}$), psm(4_${ik}$)
           ! Local Scalars 
           integer(${ik}$) :: ct, i, idxi, idxj, idxjp, j, jp, jprev, k2, m, n, nlp1, nlp2
           real(${rk}$) :: c, eps, hlftol, s, tau, tol, z1
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( nl<1_${ik}$ ) then
              info = -1_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -2_${ik}$
           else if( ( sqre/=1_${ik}$ ) .and. ( sqre/=0_${ik}$ ) ) then
              info = -3_${ik}$
           end if
           n = nl + nr + 1_${ik}$
           m = n + sqre
           if( ldu<n ) then
              info = -10_${ik}$
           else if( ldvt<m ) then
              info = -12_${ik}$
           else if( ldu2<n ) then
              info = -15_${ik}$
           else if( ldvt2<m ) then
              info = -17_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD2', -info )
              return
           end if
           nlp1 = nl + 1_${ik}$
           nlp2 = nl + 2_${ik}$
           ! generate the first part of the vector z; and move the singular
           ! values in the first part of d one position backward.
           z1 = alpha*vt( nlp1, nlp1 )
           z( 1_${ik}$ ) = z1
           do i = nl, 1, -1
              z( i+1 ) = alpha*vt( i, nlp1 )
              d( i+1 ) = d( i )
              idxq( i+1 ) = idxq( i ) + 1_${ik}$
           end do
           ! generate the second part of the vector z.
           do i = nlp2, m
              z( i ) = beta*vt( i, nlp2 )
           end do
           ! initialize some reference arrays.
           do i = 2, nlp1
              coltyp( i ) = 1_${ik}$
           end do
           do i = nlp2, n
              coltyp( i ) = 2_${ik}$
           end do
           ! sort the singular values into increasing order
           do i = nlp2, n
              idxq( i ) = idxq( i ) + nlp1
           end do
           ! dsigma, idxc, idxc, and the first column of u2
           ! are used as storage space.
           do i = 2, n
              dsigma( i ) = d( idxq( i ) )
              u2( i, 1_${ik}$ ) = z( idxq( i ) )
              idxc( i ) = coltyp( idxq( i ) )
           end do
           call stdlib${ii}$_${ri}$lamrg( nl, nr, dsigma( 2_${ik}$ ), 1_${ik}$, 1_${ik}$, idx( 2_${ik}$ ) )
           do i = 2, n
              idxi = 1_${ik}$ + idx( i )
              d( i ) = dsigma( idxi )
              z( i ) = u2( idxi, 1_${ik}$ )
              coltyp( i ) = idxc( idxi )
           end do
           ! calculate the allowable deflation tolerance
           eps = stdlib${ii}$_${ri}$lamch( 'EPSILON' )
           tol = max( abs( alpha ), abs( beta ) )
           tol = eight*eps*max( abs( d( n ) ), tol )
           ! there are 2 kinds of deflation -- first a value in the z-vector
           ! is small, second two (or more) singular values are very close
           ! together (their difference is small).
           ! if the value in the z-vector is small, we simply permute the
           ! array so that the corresponding singular value is moved to the
           ! end.
           ! if two values in the d-vector are close, we perform a two-sided
           ! rotation designed to make one of the corresponding z-vector
           ! entries zero, and then permute the array so that the deflated
           ! singular value is moved to the end.
           ! if there are multiple singular values then the problem deflates.
           ! here the number of equal singular values are found.  as each equal
           ! singular value is found, an elementary reflector is computed to
           ! rotate the corresponding singular subspace so that the
           ! corresponding components of z are zero in this new basis.
           k = 1_${ik}$
           k2 = n + 1_${ik}$
           do j = 2, n
              if( abs( z( j ) )<=tol ) then
                 ! deflate due to small z component.
                 k2 = k2 - 1_${ik}$
                 idxp( k2 ) = j
                 coltyp( j ) = 4_${ik}$
                 if( j==n )go to 120
              else
                 jprev = j
                 go to 90
              end if
           end do
           90 continue
           j = jprev
           100 continue
           j = j + 1_${ik}$
           if( j>n )go to 110
           if( abs( z( j ) )<=tol ) then
              ! deflate due to small z component.
              k2 = k2 - 1_${ik}$
              idxp( k2 ) = j
              coltyp( j ) = 4_${ik}$
           else
              ! check if singular values are close enough to allow deflation.
              if( abs( d( j )-d( jprev ) )<=tol ) then
                 ! deflation is possible.
                 s = z( jprev )
                 c = z( j )
                 ! find sqrt(a**2+b**2) without overflow or
                 ! destructive underflow.
                 tau = stdlib${ii}$_${ri}$lapy2( c, s )
                 c = c / tau
                 s = -s / tau
                 z( j ) = tau
                 z( jprev ) = zero
                 ! apply back the givens rotation to the left and right
                 ! singular vector matrices.
                 idxjp = idxq( idx( jprev )+1_${ik}$ )
                 idxj = idxq( idx( j )+1_${ik}$ )
                 if( idxjp<=nlp1 ) then
                    idxjp = idxjp - 1_${ik}$
                 end if
                 if( idxj<=nlp1 ) then
                    idxj = idxj - 1_${ik}$
                 end if
                 call stdlib${ii}$_${ri}$rot( n, u( 1_${ik}$, idxjp ), 1_${ik}$, u( 1_${ik}$, idxj ), 1_${ik}$, c, s )
                 call stdlib${ii}$_${ri}$rot( m, vt( idxjp, 1_${ik}$ ), ldvt, vt( idxj, 1_${ik}$ ), ldvt, c,s )
                 if( coltyp( j )/=coltyp( jprev ) ) then
                    coltyp( j ) = 3_${ik}$
                 end if
                 coltyp( jprev ) = 4_${ik}$
                 k2 = k2 - 1_${ik}$
                 idxp( k2 ) = jprev
                 jprev = j
              else
                 k = k + 1_${ik}$
                 u2( k, 1_${ik}$ ) = z( jprev )
                 dsigma( k ) = d( jprev )
                 idxp( k ) = jprev
                 jprev = j
              end if
           end if
           go to 100
           110 continue
           ! record the last singular value.
           k = k + 1_${ik}$
           u2( k, 1_${ik}$ ) = z( jprev )
           dsigma( k ) = d( jprev )
           idxp( k ) = jprev
           120 continue
           ! count up the total number of the various types of columns, then
           ! form a permutation which positions the four column types into
           ! four groups of uniform structure (although one or more of these
           ! groups may be empty).
           do j = 1, 4
              ctot( j ) = 0_${ik}$
           end do
           do j = 2, n
              ct = coltyp( j )
              ctot( ct ) = ctot( ct ) + 1_${ik}$
           end do
           ! psm(*) = position in submatrix (of types 1 through 4)
           psm( 1_${ik}$ ) = 2_${ik}$
           psm( 2_${ik}$ ) = 2_${ik}$ + ctot( 1_${ik}$ )
           psm( 3_${ik}$ ) = psm( 2_${ik}$ ) + ctot( 2_${ik}$ )
           psm( 4_${ik}$ ) = psm( 3_${ik}$ ) + ctot( 3_${ik}$ )
           ! fill out the idxc array so that the permutation which it induces
           ! will place all type-1 columns first, all type-2 columns next,
           ! then all type-3's, and finally all type-4's, starting from the
           ! second column. this applies similarly to the rows of vt.
           do j = 2, n
              jp = idxp( j )
              ct = coltyp( jp )
              idxc( psm( ct ) ) = j
              psm( ct ) = psm( ct ) + 1_${ik}$
           end do
           ! sort the singular values and corresponding singular vectors into
           ! dsigma, u2, and vt2 respectively.  the singular values/vectors
           ! which were not deflated go into the first k slots of dsigma, u2,
           ! and vt2 respectively, while those which were deflated go into the
           ! last n - k slots, except that the first column/row will be treated
           ! separately.
           do j = 2, n
              jp = idxp( j )
              dsigma( j ) = d( jp )
              idxj = idxq( idx( idxp( idxc( j ) ) )+1_${ik}$ )
              if( idxj<=nlp1 ) then
                 idxj = idxj - 1_${ik}$
              end if
              call stdlib${ii}$_${ri}$copy( n, u( 1_${ik}$, idxj ), 1_${ik}$, u2( 1_${ik}$, j ), 1_${ik}$ )
              call stdlib${ii}$_${ri}$copy( m, vt( idxj, 1_${ik}$ ), ldvt, vt2( j, 1_${ik}$ ), ldvt2 )
           end do
           ! determine dsigma(1), dsigma(2) and z(1)
           dsigma( 1_${ik}$ ) = zero
           hlftol = tol / two
           if( abs( dsigma( 2_${ik}$ ) )<=hlftol )dsigma( 2_${ik}$ ) = hlftol
           if( m>n ) then
              z( 1_${ik}$ ) = stdlib${ii}$_${ri}$lapy2( z1, z( m ) )
              if( z( 1_${ik}$ )<=tol ) then
                 c = one
                 s = zero
                 z( 1_${ik}$ ) = tol
              else
                 c = z1 / z( 1_${ik}$ )
                 s = z( m ) / z( 1_${ik}$ )
              end if
           else
              if( abs( z1 )<=tol ) then
                 z( 1_${ik}$ ) = tol
              else
                 z( 1_${ik}$ ) = z1
              end if
           end if
           ! move the rest of the updating row to z.
           call stdlib${ii}$_${ri}$copy( k-1, u2( 2_${ik}$, 1_${ik}$ ), 1_${ik}$, z( 2_${ik}$ ), 1_${ik}$ )
           ! determine the first column of u2, the first row of vt2 and the
           ! last row of vt.
           call stdlib${ii}$_${ri}$laset( 'A', n, 1_${ik}$, zero, zero, u2, ldu2 )
           u2( nlp1, 1_${ik}$ ) = one
           if( m>n ) then
              do i = 1, nlp1
                 vt( m, i ) = -s*vt( nlp1, i )
                 vt2( 1_${ik}$, i ) = c*vt( nlp1, i )
              end do
              do i = nlp2, m
                 vt2( 1_${ik}$, i ) = s*vt( m, i )
                 vt( m, i ) = c*vt( m, i )
              end do
           else
              call stdlib${ii}$_${ri}$copy( m, vt( nlp1, 1_${ik}$ ), ldvt, vt2( 1_${ik}$, 1_${ik}$ ), ldvt2 )
           end if
           if( m>n ) then
              call stdlib${ii}$_${ri}$copy( m, vt( m, 1_${ik}$ ), ldvt, vt2( m, 1_${ik}$ ), ldvt2 )
           end if
           ! the deflated singular values and their corresponding vectors go
           ! into the back of d, u, and v respectively.
           if( n>k ) then
              call stdlib${ii}$_${ri}$copy( n-k, dsigma( k+1 ), 1_${ik}$, d( k+1 ), 1_${ik}$ )
              call stdlib${ii}$_${ri}$lacpy( 'A', n, n-k, u2( 1_${ik}$, k+1 ), ldu2, u( 1_${ik}$, k+1 ),ldu )
              call stdlib${ii}$_${ri}$lacpy( 'A', n-k, m, vt2( k+1, 1_${ik}$ ), ldvt2, vt( k+1, 1_${ik}$ ),ldvt )
           end if
           ! copy ctot into coltyp for referencing in stdlib${ii}$_${ri}$lasd3.
           do j = 1, 4
              coltyp( j ) = ctot( j )
           end do
           return
     end subroutine stdlib${ii}$_${ri}$lasd2

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slasd3( nl, nr, sqre, k, d, q, ldq, dsigma, u, ldu, u2,ldu2, vt, ldvt,&
     !! SLASD3 finds all the square roots of the roots of the secular
     !! equation, as defined by the values in D and Z.  It makes the
     !! appropriate calls to SLASD4 and then updates the singular
     !! vectors by matrix multiplication.
     !! This code makes very mild assumptions about floating point
     !! arithmetic. It will work on machines with a guard digit in
     !! add/subtract, or on those binary machines without guard digits
     !! which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
     !! It could conceivably fail on hexadecimal or decimal machines
     !! without guard digits, but we know of none.
     !! SLASD3 is called from SLASD1.
                vt2, ldvt2, idxc, ctot, z,info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldq, ldu, ldu2, ldvt, ldvt2, nl, nr, sqre
           ! Array Arguments 
           integer(${ik}$), intent(in) :: ctot(*), idxc(*)
           real(sp), intent(out) :: d(*), q(ldq,*), u(ldu,*), vt(ldvt,*)
           real(sp), intent(inout) :: dsigma(*), vt2(ldvt2,*), z(*)
           real(sp), intent(in) :: u2(ldu2,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: ctemp, i, j, jc, ktemp, m, n, nlp1, nlp2, nrp1
           real(sp) :: rho, temp
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( nl<1_${ik}$ ) then
              info = -1_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -2_${ik}$
           else if( ( sqre/=1_${ik}$ ) .and. ( sqre/=0_${ik}$ ) ) then
              info = -3_${ik}$
           end if
           n = nl + nr + 1_${ik}$
           m = n + sqre
           nlp1 = nl + 1_${ik}$
           nlp2 = nl + 2_${ik}$
           if( ( k<1_${ik}$ ) .or. ( k>n ) ) then
              info = -4_${ik}$
           else if( ldq<k ) then
              info = -7_${ik}$
           else if( ldu<n ) then
              info = -10_${ik}$
           else if( ldu2<n ) then
              info = -12_${ik}$
           else if( ldvt<m ) then
              info = -14_${ik}$
           else if( ldvt2<m ) then
              info = -16_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SLASD3', -info )
              return
           end if
           ! quick return if possible
           if( k==1_${ik}$ ) then
              d( 1_${ik}$ ) = abs( z( 1_${ik}$ ) )
              call stdlib${ii}$_scopy( m, vt2( 1_${ik}$, 1_${ik}$ ), ldvt2, vt( 1_${ik}$, 1_${ik}$ ), ldvt )
              if( z( 1_${ik}$ )>zero ) then
                 call stdlib${ii}$_scopy( n, u2( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, u( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              else
                 do i = 1, n
                    u( i, 1_${ik}$ ) = -u2( i, 1_${ik}$ )
                 end do
              end if
              return
           end if
           ! modify values dsigma(i) to make sure all dsigma(i)-dsigma(j) can
           ! be computed with high relative accuracy (barring over/underflow).
           ! this is a problem on machines without a guard digit in
           ! add/subtract (cray xmp, cray ymp, cray c 90 and cray 2).
           ! the following code replaces dsigma(i) by 2*dsigma(i)-dsigma(i),
           ! which on any of these machines zeros out the bottommost
           ! bit of dsigma(i) if it is 1; this makes the subsequent
           ! subtractions dsigma(i)-dsigma(j) unproblematic when cancellation
           ! occurs. on binary machines with a guard digit (almost all
           ! machines) it does not change dsigma(i) at all. on hexadecimal
           ! and decimal machines with a guard digit, it slightly
           ! changes the bottommost bits of dsigma(i). it does not account
           ! for hexadecimal or decimal machines without guard digits
           ! (we know of none). we use a subroutine call to compute
           ! 2*dsigma(i) to prevent optimizing compilers from eliminating
           ! this code.
           do i = 1, k
              dsigma( i ) = stdlib${ii}$_slamc3( dsigma( i ), dsigma( i ) ) - dsigma( i )
           end do
           ! keep a copy of z.
           call stdlib${ii}$_scopy( k, z, 1_${ik}$, q, 1_${ik}$ )
           ! normalize z.
           rho = stdlib${ii}$_snrm2( k, z, 1_${ik}$ )
           call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, rho, one, k, 1_${ik}$, z, k, info )
           rho = rho*rho
           ! find the new singular values.
           do j = 1, k
              call stdlib${ii}$_slasd4( k, j, dsigma, z, u( 1_${ik}$, j ), rho, d( j ),vt( 1_${ik}$, j ), info )
                        
              ! if the zero finder fails, report the convergence failure.
              if( info/=0_${ik}$ ) then
                 return
              end if
           end do
           ! compute updated z.
           do i = 1, k
              z( i ) = u( i, k )*vt( i, k )
              do j = 1, i - 1
                 z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j ) ) /( dsigma( i &
                           )+dsigma( j ) ) )
              end do
              do j = i, k - 1
                 z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j+1 ) ) /( dsigma( &
                           i )+dsigma( j+1 ) ) )
              end do
              z( i ) = sign( sqrt( abs( z( i ) ) ), q( i, 1_${ik}$ ) )
           end do
           ! compute left singular vectors of the modified diagonal matrix,
           ! and store related information for the right singular vectors.
           do i = 1, k
              vt( 1_${ik}$, i ) = z( 1_${ik}$ ) / u( 1_${ik}$, i ) / vt( 1_${ik}$, i )
              u( 1_${ik}$, i ) = negone
              do j = 2, k
                 vt( j, i ) = z( j ) / u( j, i ) / vt( j, i )
                 u( j, i ) = dsigma( j )*vt( j, i )
              end do
              temp = stdlib${ii}$_snrm2( k, u( 1_${ik}$, i ), 1_${ik}$ )
              q( 1_${ik}$, i ) = u( 1_${ik}$, i ) / temp
              do j = 2, k
                 jc = idxc( j )
                 q( j, i ) = u( jc, i ) / temp
              end do
           end do
           ! update the left singular vector matrix.
           if( k==2_${ik}$ ) then
              call stdlib${ii}$_sgemm( 'N', 'N', n, k, k, one, u2, ldu2, q, ldq, zero, u,ldu )
              go to 100
           end if
           if( ctot( 1_${ik}$ )>0_${ik}$ ) then
              call stdlib${ii}$_sgemm( 'N', 'N', nl, k, ctot( 1_${ik}$ ), one, u2( 1_${ik}$, 2_${ik}$ ), ldu2,q( 2_${ik}$, 1_${ik}$ ), ldq,&
                         zero, u( 1_${ik}$, 1_${ik}$ ), ldu )
              if( ctot( 3_${ik}$ )>0_${ik}$ ) then
                 ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ )
                 call stdlib${ii}$_sgemm( 'N', 'N', nl, k, ctot( 3_${ik}$ ), one, u2( 1_${ik}$, ktemp ),ldu2, q( &
                           ktemp, 1_${ik}$ ), ldq, one, u( 1_${ik}$, 1_${ik}$ ), ldu )
              end if
           else if( ctot( 3_${ik}$ )>0_${ik}$ ) then
              ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ )
              call stdlib${ii}$_sgemm( 'N', 'N', nl, k, ctot( 3_${ik}$ ), one, u2( 1_${ik}$, ktemp ),ldu2, q( ktemp, &
                        1_${ik}$ ), ldq, zero, u( 1_${ik}$, 1_${ik}$ ), ldu )
           else
              call stdlib${ii}$_slacpy( 'F', nl, k, u2, ldu2, u, ldu )
           end if
           call stdlib${ii}$_scopy( k, q( 1_${ik}$, 1_${ik}$ ), ldq, u( nlp1, 1_${ik}$ ), ldu )
           ktemp = 2_${ik}$ + ctot( 1_${ik}$ )
           ctemp = ctot( 2_${ik}$ ) + ctot( 3_${ik}$ )
           call stdlib${ii}$_sgemm( 'N', 'N', nr, k, ctemp, one, u2( nlp2, ktemp ), ldu2,q( ktemp, 1_${ik}$ ), &
                     ldq, zero, u( nlp2, 1_${ik}$ ), ldu )
           ! generate the right singular vectors.
           100 continue
           do i = 1, k
              temp = stdlib${ii}$_snrm2( k, vt( 1_${ik}$, i ), 1_${ik}$ )
              q( i, 1_${ik}$ ) = vt( 1_${ik}$, i ) / temp
              do j = 2, k
                 jc = idxc( j )
                 q( i, j ) = vt( jc, i ) / temp
              end do
           end do
           ! update the right singular vector matrix.
           if( k==2_${ik}$ ) then
              call stdlib${ii}$_sgemm( 'N', 'N', k, m, k, one, q, ldq, vt2, ldvt2, zero,vt, ldvt )
                        
              return
           end if
           ktemp = 1_${ik}$ + ctot( 1_${ik}$ )
           call stdlib${ii}$_sgemm( 'N', 'N', k, nlp1, ktemp, one, q( 1_${ik}$, 1_${ik}$ ), ldq,vt2( 1_${ik}$, 1_${ik}$ ), ldvt2, &
                     zero, vt( 1_${ik}$, 1_${ik}$ ), ldvt )
           ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ )
           if( ktemp<=ldvt2 )call stdlib${ii}$_sgemm( 'N', 'N', k, nlp1, ctot( 3_${ik}$ ), one, q( 1_${ik}$, ktemp ),&
                     ldq, vt2( ktemp, 1_${ik}$ ), ldvt2, one, vt( 1_${ik}$, 1_${ik}$ ),ldvt )
           ktemp = ctot( 1_${ik}$ ) + 1_${ik}$
           nrp1 = nr + sqre
           if( ktemp>1_${ik}$ ) then
              do i = 1, k
                 q( i, ktemp ) = q( i, 1_${ik}$ )
              end do
              do i = nlp2, m
                 vt2( ktemp, i ) = vt2( 1_${ik}$, i )
              end do
           end if
           ctemp = 1_${ik}$ + ctot( 2_${ik}$ ) + ctot( 3_${ik}$ )
           call stdlib${ii}$_sgemm( 'N', 'N', k, nrp1, ctemp, one, q( 1_${ik}$, ktemp ), ldq,vt2( ktemp, nlp2 )&
                     , ldvt2, zero, vt( 1_${ik}$, nlp2 ), ldvt )
           return
     end subroutine stdlib${ii}$_slasd3

     pure module subroutine stdlib${ii}$_dlasd3( nl, nr, sqre, k, d, q, ldq, dsigma, u, ldu, u2,ldu2, vt, ldvt,&
     !! DLASD3 finds all the square roots of the roots of the secular
     !! equation, as defined by the values in D and Z.  It makes the
     !! appropriate calls to DLASD4 and then updates the singular
     !! vectors by matrix multiplication.
     !! This code makes very mild assumptions about floating point
     !! arithmetic. It will work on machines with a guard digit in
     !! add/subtract, or on those binary machines without guard digits
     !! which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
     !! It could conceivably fail on hexadecimal or decimal machines
     !! without guard digits, but we know of none.
     !! DLASD3 is called from DLASD1.
                vt2, ldvt2, idxc, ctot, z,info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldq, ldu, ldu2, ldvt, ldvt2, nl, nr, sqre
           ! Array Arguments 
           integer(${ik}$), intent(in) :: ctot(*), idxc(*)
           real(dp), intent(out) :: d(*), q(ldq,*), u(ldu,*), vt(ldvt,*)
           real(dp), intent(inout) :: dsigma(*), vt2(ldvt2,*), z(*)
           real(dp), intent(in) :: u2(ldu2,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: ctemp, i, j, jc, ktemp, m, n, nlp1, nlp2, nrp1
           real(dp) :: rho, temp
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( nl<1_${ik}$ ) then
              info = -1_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -2_${ik}$
           else if( ( sqre/=1_${ik}$ ) .and. ( sqre/=0_${ik}$ ) ) then
              info = -3_${ik}$
           end if
           n = nl + nr + 1_${ik}$
           m = n + sqre
           nlp1 = nl + 1_${ik}$
           nlp2 = nl + 2_${ik}$
           if( ( k<1_${ik}$ ) .or. ( k>n ) ) then
              info = -4_${ik}$
           else if( ldq<k ) then
              info = -7_${ik}$
           else if( ldu<n ) then
              info = -10_${ik}$
           else if( ldu2<n ) then
              info = -12_${ik}$
           else if( ldvt<m ) then
              info = -14_${ik}$
           else if( ldvt2<m ) then
              info = -16_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD3', -info )
              return
           end if
           ! quick return if possible
           if( k==1_${ik}$ ) then
              d( 1_${ik}$ ) = abs( z( 1_${ik}$ ) )
              call stdlib${ii}$_dcopy( m, vt2( 1_${ik}$, 1_${ik}$ ), ldvt2, vt( 1_${ik}$, 1_${ik}$ ), ldvt )
              if( z( 1_${ik}$ )>zero ) then
                 call stdlib${ii}$_dcopy( n, u2( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, u( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              else
                 do i = 1, n
                    u( i, 1_${ik}$ ) = -u2( i, 1_${ik}$ )
                 end do
              end if
              return
           end if
           ! modify values dsigma(i) to make sure all dsigma(i)-dsigma(j) can
           ! be computed with high relative accuracy (barring over/underflow).
           ! this is a problem on machines without a guard digit in
           ! add/subtract (cray xmp, cray ymp, cray c 90 and cray 2).
           ! the following code replaces dsigma(i) by 2*dsigma(i)-dsigma(i),
           ! which on any of these machines zeros out the bottommost
           ! bit of dsigma(i) if it is 1; this makes the subsequent
           ! subtractions dsigma(i)-dsigma(j) unproblematic when cancellation
           ! occurs. on binary machines with a guard digit (almost all
           ! machines) it does not change dsigma(i) at all. on hexadecimal
           ! and decimal machines with a guard digit, it slightly
           ! changes the bottommost bits of dsigma(i). it does not account
           ! for hexadecimal or decimal machines without guard digits
           ! (we know of none). we use a subroutine call to compute
           ! 2*dsigma(i) to prevent optimizing compilers from eliminating
           ! this code.
           do i = 1, k
              dsigma( i ) = stdlib${ii}$_dlamc3( dsigma( i ), dsigma( i ) ) - dsigma( i )
           end do
           ! keep a copy of z.
           call stdlib${ii}$_dcopy( k, z, 1_${ik}$, q, 1_${ik}$ )
           ! normalize z.
           rho = stdlib${ii}$_dnrm2( k, z, 1_${ik}$ )
           call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, rho, one, k, 1_${ik}$, z, k, info )
           rho = rho*rho
           ! find the new singular values.
           do j = 1, k
              call stdlib${ii}$_dlasd4( k, j, dsigma, z, u( 1_${ik}$, j ), rho, d( j ),vt( 1_${ik}$, j ), info )
                        
              ! if the zero finder fails, report the convergence failure.
              if( info/=0_${ik}$ ) then
                 return
              end if
           end do
           ! compute updated z.
           do i = 1, k
              z( i ) = u( i, k )*vt( i, k )
              do j = 1, i - 1
                 z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j ) ) /( dsigma( i &
                           )+dsigma( j ) ) )
              end do
              do j = i, k - 1
                 z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j+1 ) ) /( dsigma( &
                           i )+dsigma( j+1 ) ) )
              end do
              z( i ) = sign( sqrt( abs( z( i ) ) ), q( i, 1_${ik}$ ) )
           end do
           ! compute left singular vectors of the modified diagonal matrix,
           ! and store related information for the right singular vectors.
           do i = 1, k
              vt( 1_${ik}$, i ) = z( 1_${ik}$ ) / u( 1_${ik}$, i ) / vt( 1_${ik}$, i )
              u( 1_${ik}$, i ) = negone
              do j = 2, k
                 vt( j, i ) = z( j ) / u( j, i ) / vt( j, i )
                 u( j, i ) = dsigma( j )*vt( j, i )
              end do
              temp = stdlib${ii}$_dnrm2( k, u( 1_${ik}$, i ), 1_${ik}$ )
              q( 1_${ik}$, i ) = u( 1_${ik}$, i ) / temp
              do j = 2, k
                 jc = idxc( j )
                 q( j, i ) = u( jc, i ) / temp
              end do
           end do
           ! update the left singular vector matrix.
           if( k==2_${ik}$ ) then
              call stdlib${ii}$_dgemm( 'N', 'N', n, k, k, one, u2, ldu2, q, ldq, zero, u,ldu )
              go to 100
           end if
           if( ctot( 1_${ik}$ )>0_${ik}$ ) then
              call stdlib${ii}$_dgemm( 'N', 'N', nl, k, ctot( 1_${ik}$ ), one, u2( 1_${ik}$, 2_${ik}$ ), ldu2,q( 2_${ik}$, 1_${ik}$ ), ldq,&
                         zero, u( 1_${ik}$, 1_${ik}$ ), ldu )
              if( ctot( 3_${ik}$ )>0_${ik}$ ) then
                 ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ )
                 call stdlib${ii}$_dgemm( 'N', 'N', nl, k, ctot( 3_${ik}$ ), one, u2( 1_${ik}$, ktemp ),ldu2, q( &
                           ktemp, 1_${ik}$ ), ldq, one, u( 1_${ik}$, 1_${ik}$ ), ldu )
              end if
           else if( ctot( 3_${ik}$ )>0_${ik}$ ) then
              ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ )
              call stdlib${ii}$_dgemm( 'N', 'N', nl, k, ctot( 3_${ik}$ ), one, u2( 1_${ik}$, ktemp ),ldu2, q( ktemp, &
                        1_${ik}$ ), ldq, zero, u( 1_${ik}$, 1_${ik}$ ), ldu )
           else
              call stdlib${ii}$_dlacpy( 'F', nl, k, u2, ldu2, u, ldu )
           end if
           call stdlib${ii}$_dcopy( k, q( 1_${ik}$, 1_${ik}$ ), ldq, u( nlp1, 1_${ik}$ ), ldu )
           ktemp = 2_${ik}$ + ctot( 1_${ik}$ )
           ctemp = ctot( 2_${ik}$ ) + ctot( 3_${ik}$ )
           call stdlib${ii}$_dgemm( 'N', 'N', nr, k, ctemp, one, u2( nlp2, ktemp ), ldu2,q( ktemp, 1_${ik}$ ), &
                     ldq, zero, u( nlp2, 1_${ik}$ ), ldu )
           ! generate the right singular vectors.
           100 continue
           do i = 1, k
              temp = stdlib${ii}$_dnrm2( k, vt( 1_${ik}$, i ), 1_${ik}$ )
              q( i, 1_${ik}$ ) = vt( 1_${ik}$, i ) / temp
              do j = 2, k
                 jc = idxc( j )
                 q( i, j ) = vt( jc, i ) / temp
              end do
           end do
           ! update the right singular vector matrix.
           if( k==2_${ik}$ ) then
              call stdlib${ii}$_dgemm( 'N', 'N', k, m, k, one, q, ldq, vt2, ldvt2, zero,vt, ldvt )
                        
              return
           end if
           ktemp = 1_${ik}$ + ctot( 1_${ik}$ )
           call stdlib${ii}$_dgemm( 'N', 'N', k, nlp1, ktemp, one, q( 1_${ik}$, 1_${ik}$ ), ldq,vt2( 1_${ik}$, 1_${ik}$ ), ldvt2, &
                     zero, vt( 1_${ik}$, 1_${ik}$ ), ldvt )
           ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ )
           if( ktemp<=ldvt2 )call stdlib${ii}$_dgemm( 'N', 'N', k, nlp1, ctot( 3_${ik}$ ), one, q( 1_${ik}$, ktemp ),&
                     ldq, vt2( ktemp, 1_${ik}$ ), ldvt2, one, vt( 1_${ik}$, 1_${ik}$ ),ldvt )
           ktemp = ctot( 1_${ik}$ ) + 1_${ik}$
           nrp1 = nr + sqre
           if( ktemp>1_${ik}$ ) then
              do i = 1, k
                 q( i, ktemp ) = q( i, 1_${ik}$ )
              end do
              do i = nlp2, m
                 vt2( ktemp, i ) = vt2( 1_${ik}$, i )
              end do
           end if
           ctemp = 1_${ik}$ + ctot( 2_${ik}$ ) + ctot( 3_${ik}$ )
           call stdlib${ii}$_dgemm( 'N', 'N', k, nrp1, ctemp, one, q( 1_${ik}$, ktemp ), ldq,vt2( ktemp, nlp2 )&
                     , ldvt2, zero, vt( 1_${ik}$, nlp2 ), ldvt )
           return
     end subroutine stdlib${ii}$_dlasd3

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lasd3( nl, nr, sqre, k, d, q, ldq, dsigma, u, ldu, u2,ldu2, vt, ldvt,&
     !! DLASD3: finds all the square roots of the roots of the secular
     !! equation, as defined by the values in D and Z.  It makes the
     !! appropriate calls to DLASD4 and then updates the singular
     !! vectors by matrix multiplication.
     !! This code makes very mild assumptions about floating point
     !! arithmetic. It will work on machines with a guard digit in
     !! add/subtract, or on those binary machines without guard digits
     !! which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
     !! It could conceivably fail on hexadecimal or decimal machines
     !! without guard digits, but we know of none.
     !! DLASD3 is called from DLASD1.
                vt2, ldvt2, idxc, ctot, z,info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldq, ldu, ldu2, ldvt, ldvt2, nl, nr, sqre
           ! Array Arguments 
           integer(${ik}$), intent(in) :: ctot(*), idxc(*)
           real(${rk}$), intent(out) :: d(*), q(ldq,*), u(ldu,*), vt(ldvt,*)
           real(${rk}$), intent(inout) :: dsigma(*), vt2(ldvt2,*), z(*)
           real(${rk}$), intent(in) :: u2(ldu2,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: ctemp, i, j, jc, ktemp, m, n, nlp1, nlp2, nrp1
           real(${rk}$) :: rho, temp
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( nl<1_${ik}$ ) then
              info = -1_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -2_${ik}$
           else if( ( sqre/=1_${ik}$ ) .and. ( sqre/=0_${ik}$ ) ) then
              info = -3_${ik}$
           end if
           n = nl + nr + 1_${ik}$
           m = n + sqre
           nlp1 = nl + 1_${ik}$
           nlp2 = nl + 2_${ik}$
           if( ( k<1_${ik}$ ) .or. ( k>n ) ) then
              info = -4_${ik}$
           else if( ldq<k ) then
              info = -7_${ik}$
           else if( ldu<n ) then
              info = -10_${ik}$
           else if( ldu2<n ) then
              info = -12_${ik}$
           else if( ldvt<m ) then
              info = -14_${ik}$
           else if( ldvt2<m ) then
              info = -16_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD3', -info )
              return
           end if
           ! quick return if possible
           if( k==1_${ik}$ ) then
              d( 1_${ik}$ ) = abs( z( 1_${ik}$ ) )
              call stdlib${ii}$_${ri}$copy( m, vt2( 1_${ik}$, 1_${ik}$ ), ldvt2, vt( 1_${ik}$, 1_${ik}$ ), ldvt )
              if( z( 1_${ik}$ )>zero ) then
                 call stdlib${ii}$_${ri}$copy( n, u2( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, u( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              else
                 do i = 1, n
                    u( i, 1_${ik}$ ) = -u2( i, 1_${ik}$ )
                 end do
              end if
              return
           end if
           ! modify values dsigma(i) to make sure all dsigma(i)-dsigma(j) can
           ! be computed with high relative accuracy (barring over/underflow).
           ! this is a problem on machines without a guard digit in
           ! add/subtract (cray xmp, cray ymp, cray c 90 and cray 2).
           ! the following code replaces dsigma(i) by 2*dsigma(i)-dsigma(i),
           ! which on any of these machines zeros out the bottommost
           ! bit of dsigma(i) if it is 1; this makes the subsequent
           ! subtractions dsigma(i)-dsigma(j) unproblematic when cancellation
           ! occurs. on binary machines with a guard digit (almost all
           ! machines) it does not change dsigma(i) at all. on hexadecimal
           ! and decimal machines with a guard digit, it slightly
           ! changes the bottommost bits of dsigma(i). it does not account
           ! for hexadecimal or decimal machines without guard digits
           ! (we know of none). we use a subroutine call to compute
           ! 2*dsigma(i) to prevent optimizing compilers from eliminating
           ! this code.
           do i = 1, k
              dsigma( i ) = stdlib${ii}$_${ri}$lamc3( dsigma( i ), dsigma( i ) ) - dsigma( i )
           end do
           ! keep a copy of z.
           call stdlib${ii}$_${ri}$copy( k, z, 1_${ik}$, q, 1_${ik}$ )
           ! normalize z.
           rho = stdlib${ii}$_${ri}$nrm2( k, z, 1_${ik}$ )
           call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, rho, one, k, 1_${ik}$, z, k, info )
           rho = rho*rho
           ! find the new singular values.
           do j = 1, k
              call stdlib${ii}$_${ri}$lasd4( k, j, dsigma, z, u( 1_${ik}$, j ), rho, d( j ),vt( 1_${ik}$, j ), info )
                        
              ! if the zero finder fails, report the convergence failure.
              if( info/=0_${ik}$ ) then
                 return
              end if
           end do
           ! compute updated z.
           do i = 1, k
              z( i ) = u( i, k )*vt( i, k )
              do j = 1, i - 1
                 z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j ) ) /( dsigma( i &
                           )+dsigma( j ) ) )
              end do
              do j = i, k - 1
                 z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j+1 ) ) /( dsigma( &
                           i )+dsigma( j+1 ) ) )
              end do
              z( i ) = sign( sqrt( abs( z( i ) ) ), q( i, 1_${ik}$ ) )
           end do
           ! compute left singular vectors of the modified diagonal matrix,
           ! and store related information for the right singular vectors.
           do i = 1, k
              vt( 1_${ik}$, i ) = z( 1_${ik}$ ) / u( 1_${ik}$, i ) / vt( 1_${ik}$, i )
              u( 1_${ik}$, i ) = negone
              do j = 2, k
                 vt( j, i ) = z( j ) / u( j, i ) / vt( j, i )
                 u( j, i ) = dsigma( j )*vt( j, i )
              end do
              temp = stdlib${ii}$_${ri}$nrm2( k, u( 1_${ik}$, i ), 1_${ik}$ )
              q( 1_${ik}$, i ) = u( 1_${ik}$, i ) / temp
              do j = 2, k
                 jc = idxc( j )
                 q( j, i ) = u( jc, i ) / temp
              end do
           end do
           ! update the left singular vector matrix.
           if( k==2_${ik}$ ) then
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', n, k, k, one, u2, ldu2, q, ldq, zero, u,ldu )
              go to 100
           end if
           if( ctot( 1_${ik}$ )>0_${ik}$ ) then
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', nl, k, ctot( 1_${ik}$ ), one, u2( 1_${ik}$, 2_${ik}$ ), ldu2,q( 2_${ik}$, 1_${ik}$ ), ldq,&
                         zero, u( 1_${ik}$, 1_${ik}$ ), ldu )
              if( ctot( 3_${ik}$ )>0_${ik}$ ) then
                 ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ )
                 call stdlib${ii}$_${ri}$gemm( 'N', 'N', nl, k, ctot( 3_${ik}$ ), one, u2( 1_${ik}$, ktemp ),ldu2, q( &
                           ktemp, 1_${ik}$ ), ldq, one, u( 1_${ik}$, 1_${ik}$ ), ldu )
              end if
           else if( ctot( 3_${ik}$ )>0_${ik}$ ) then
              ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ )
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', nl, k, ctot( 3_${ik}$ ), one, u2( 1_${ik}$, ktemp ),ldu2, q( ktemp, &
                        1_${ik}$ ), ldq, zero, u( 1_${ik}$, 1_${ik}$ ), ldu )
           else
              call stdlib${ii}$_${ri}$lacpy( 'F', nl, k, u2, ldu2, u, ldu )
           end if
           call stdlib${ii}$_${ri}$copy( k, q( 1_${ik}$, 1_${ik}$ ), ldq, u( nlp1, 1_${ik}$ ), ldu )
           ktemp = 2_${ik}$ + ctot( 1_${ik}$ )
           ctemp = ctot( 2_${ik}$ ) + ctot( 3_${ik}$ )
           call stdlib${ii}$_${ri}$gemm( 'N', 'N', nr, k, ctemp, one, u2( nlp2, ktemp ), ldu2,q( ktemp, 1_${ik}$ ), &
                     ldq, zero, u( nlp2, 1_${ik}$ ), ldu )
           ! generate the right singular vectors.
           100 continue
           do i = 1, k
              temp = stdlib${ii}$_${ri}$nrm2( k, vt( 1_${ik}$, i ), 1_${ik}$ )
              q( i, 1_${ik}$ ) = vt( 1_${ik}$, i ) / temp
              do j = 2, k
                 jc = idxc( j )
                 q( i, j ) = vt( jc, i ) / temp
              end do
           end do
           ! update the right singular vector matrix.
           if( k==2_${ik}$ ) then
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', k, m, k, one, q, ldq, vt2, ldvt2, zero,vt, ldvt )
                        
              return
           end if
           ktemp = 1_${ik}$ + ctot( 1_${ik}$ )
           call stdlib${ii}$_${ri}$gemm( 'N', 'N', k, nlp1, ktemp, one, q( 1_${ik}$, 1_${ik}$ ), ldq,vt2( 1_${ik}$, 1_${ik}$ ), ldvt2, &
                     zero, vt( 1_${ik}$, 1_${ik}$ ), ldvt )
           ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ )
           if( ktemp<=ldvt2 )call stdlib${ii}$_${ri}$gemm( 'N', 'N', k, nlp1, ctot( 3_${ik}$ ), one, q( 1_${ik}$, ktemp ),&
                     ldq, vt2( ktemp, 1_${ik}$ ), ldvt2, one, vt( 1_${ik}$, 1_${ik}$ ),ldvt )
           ktemp = ctot( 1_${ik}$ ) + 1_${ik}$
           nrp1 = nr + sqre
           if( ktemp>1_${ik}$ ) then
              do i = 1, k
                 q( i, ktemp ) = q( i, 1_${ik}$ )
              end do
              do i = nlp2, m
                 vt2( ktemp, i ) = vt2( 1_${ik}$, i )
              end do
           end if
           ctemp = 1_${ik}$ + ctot( 2_${ik}$ ) + ctot( 3_${ik}$ )
           call stdlib${ii}$_${ri}$gemm( 'N', 'N', k, nrp1, ctemp, one, q( 1_${ik}$, ktemp ), ldq,vt2( ktemp, nlp2 )&
                     , ldvt2, zero, vt( 1_${ik}$, nlp2 ), ldvt )
           return
     end subroutine stdlib${ii}$_${ri}$lasd3

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slasd4( n, i, d, z, delta, rho, sigma, work, info )
     !! This subroutine computes the square root of the I-th updated
     !! eigenvalue of a positive symmetric rank-one modification to
     !! a positive diagonal matrix whose entries are given as the squares
     !! of the corresponding entries in the array d, and that
     !! 0 <= D(i) < D(j)  for  i < j
     !! and that RHO > 0. This is arranged by the calling routine, and is
     !! no loss in generality.  The rank-one modified system is thus
     !! diag( D ) * diag( D ) +  RHO * Z * Z_transpose.
     !! where we assume the Euclidean norm of Z is 1.
     !! The method consists of approximating the rational functions in the
     !! secular equation by simpler interpolating rational functions.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: i, n
           integer(${ik}$), intent(out) :: info
           real(sp), intent(in) :: rho
           real(sp), intent(out) :: sigma
           ! Array Arguments 
           real(sp), intent(in) :: d(*), z(*)
           real(sp), intent(out) :: delta(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: maxit = 400_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: orgati, swtch, swtch3, geomavg
           integer(${ik}$) :: ii, iim1, iip1, ip1, iter, j, niter
           real(sp) :: a, b, c, delsq, delsq2, sq2, dphi, dpsi, dtiim, dtiip, dtipsq, dtisq, &
           dtnsq, dtnsq1, dw, eps, erretm, eta, phi, prew, psi, rhoinv, sglb, sgub, tau, tau2, &
                     temp, temp1, temp2, w
           ! Local Arrays 
           real(sp) :: dd(3_${ik}$), zz(3_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! since this routine is called in an inner loop, we do no argument
           ! checking.
           ! quick return for n=1 and 2.
           info = 0_${ik}$
           if( n==1_${ik}$ ) then
              ! presumably, i=1 upon entry
              sigma = sqrt( d( 1_${ik}$ )*d( 1_${ik}$ )+rho*z( 1_${ik}$ )*z( 1_${ik}$ ) )
              delta( 1_${ik}$ ) = one
              work( 1_${ik}$ ) = one
              return
           end if
           if( n==2_${ik}$ ) then
              call stdlib${ii}$_slasd5( i, d, z, delta, rho, sigma, work )
              return
           end if
           ! compute machine epsilon
           eps = stdlib${ii}$_slamch( 'EPSILON' )
           rhoinv = one / rho
           tau2= zero
           ! the case i = n
           if( i==n ) then
              ! initialize some basic variables
              ii = n - 1_${ik}$
              niter = 1_${ik}$
              ! calculate initial guess
              temp = rho / two
              ! if ||z||_2 is not one, then temp should be set to
              ! rho * ||z||_2^2 / two
              temp1 = temp / ( d( n )+sqrt( d( n )*d( n )+temp ) )
              do j = 1, n
                 work( j ) = d( j ) + d( n ) + temp1
                 delta( j ) = ( d( j )-d( n ) ) - temp1
              end do
              psi = zero
              do j = 1, n - 2
                 psi = psi + z( j )*z( j ) / ( delta( j )*work( j ) )
              end do
              c = rhoinv + psi
              w = c + z( ii )*z( ii ) / ( delta( ii )*work( ii ) ) +z( n )*z( n ) / ( delta( n )&
                        *work( n ) )
              if( w<=zero ) then
                 temp1 = sqrt( d( n )*d( n )+rho )
                 temp = z( n-1 )*z( n-1 ) / ( ( d( n-1 )+temp1 )*( d( n )-d( n-1 )+rho / ( d( n )+&
                           temp1 ) ) ) +z( n )*z( n ) / rho
                 ! the following tau2 is to approximate
                 ! sigma_n^2 - d( n )*d( n )
                 if( c<=temp ) then
                    tau = rho
                 else
                    delsq = ( d( n )-d( n-1 ) )*( d( n )+d( n-1 ) )
                    a = -c*delsq + z( n-1 )*z( n-1 ) + z( n )*z( n )
                    b = z( n )*z( n )*delsq
                    if( a<zero ) then
                       tau2 = two*b / ( sqrt( a*a+four*b*c )-a )
                    else
                       tau2 = ( a+sqrt( a*a+four*b*c ) ) / ( two*c )
                    end if
                    tau = tau2 / ( d( n )+sqrt( d( n )*d( n )+tau2 ) )
                 end if
                 ! it can be proved that
                     ! d(n)^2+rho/2 <= sigma_n^2 < d(n)^2+tau2 <= d(n)^2+rho
              else
                 delsq = ( d( n )-d( n-1 ) )*( d( n )+d( n-1 ) )
                 a = -c*delsq + z( n-1 )*z( n-1 ) + z( n )*z( n )
                 b = z( n )*z( n )*delsq
                 ! the following tau2 is to approximate
                 ! sigma_n^2 - d( n )*d( n )
                 if( a<zero ) then
                    tau2 = two*b / ( sqrt( a*a+four*b*c )-a )
                 else
                    tau2 = ( a+sqrt( a*a+four*b*c ) ) / ( two*c )
                 end if
                 tau = tau2 / ( d( n )+sqrt( d( n )*d( n )+tau2 ) )
                 ! it can be proved that
                 ! d(n)^2 < d(n)^2+tau2 < sigma(n)^2 < d(n)^2+rho/2
              end if
              ! the following tau is to approximate sigma_n - d( n )
               ! tau = tau2 / ( d( n )+sqrt( d( n )*d( n )+tau2 ) )
              sigma = d( n ) + tau
              do j = 1, n
                 delta( j ) = ( d( j )-d( n ) ) - tau
                 work( j ) = d( j ) + d( n ) + tau
              end do
              ! evaluate psi and the derivative dpsi
              dpsi = zero
              psi = zero
              erretm = zero
              do j = 1, ii
                 temp = z( j ) / ( delta( j )*work( j ) )
                 psi = psi + z( j )*temp
                 dpsi = dpsi + temp*temp
                 erretm = erretm + psi
              end do
              erretm = abs( erretm )
              ! evaluate phi and the derivative dphi
              temp = z( n ) / ( delta( n )*work( n ) )
              phi = z( n )*temp
              dphi = temp*temp
              erretm = eight*( -phi-psi ) + erretm - phi + rhoinv
          ! $          + abs( tau2 )*( dpsi+dphi )
              w = rhoinv + phi + psi
              ! test for convergence
              if( abs( w )<=eps*erretm ) then
                 go to 240
              end if
              ! calculate the new step
              niter = niter + 1_${ik}$
              dtnsq1 = work( n-1 )*delta( n-1 )
              dtnsq = work( n )*delta( n )
              c = w - dtnsq1*dpsi - dtnsq*dphi
              a = ( dtnsq+dtnsq1 )*w - dtnsq*dtnsq1*( dpsi+dphi )
              b = dtnsq*dtnsq1*w
              if( c<zero )c = abs( c )
              if( c==zero ) then
                 eta = rho - sigma*sigma
              else if( a>=zero ) then
                 eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
              else
                 eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) )
              end if
              ! note, eta should be positive if w is negative, and
              ! eta should be negative otherwise. however,
              ! if for some reason caused by roundoff, eta*w > 0,
              ! we simply use one newton step instead. this way
              ! will guarantee eta*w < 0.
              if( w*eta>zero )eta = -w / ( dpsi+dphi )
              temp = eta - dtnsq
              if( temp>rho )eta = rho + dtnsq
              eta = eta / ( sigma+sqrt( eta+sigma*sigma ) )
              tau = tau + eta
              sigma = sigma + eta
              do j = 1, n
                 delta( j ) = delta( j ) - eta
                 work( j ) = work( j ) + eta
              end do
              ! evaluate psi and the derivative dpsi
              dpsi = zero
              psi = zero
              erretm = zero
              do j = 1, ii
                 temp = z( j ) / ( work( j )*delta( j ) )
                 psi = psi + z( j )*temp
                 dpsi = dpsi + temp*temp
                 erretm = erretm + psi
              end do
              erretm = abs( erretm )
              ! evaluate phi and the derivative dphi
              tau2 = work( n )*delta( n )
              temp = z( n ) / tau2
              phi = z( n )*temp
              dphi = temp*temp
              erretm = eight*( -phi-psi ) + erretm - phi + rhoinv
          ! $          + abs( tau2 )*( dpsi+dphi )
              w = rhoinv + phi + psi
              ! main loop to update the values of the array   delta
              iter = niter + 1_${ik}$
              loop_90: do niter = iter, maxit
                 ! test for convergence
                 if( abs( w )<=eps*erretm ) then
                    go to 240
                 end if
                 ! calculate the new step
                 dtnsq1 = work( n-1 )*delta( n-1 )
                 dtnsq = work( n )*delta( n )
                 c = w - dtnsq1*dpsi - dtnsq*dphi
                 a = ( dtnsq+dtnsq1 )*w - dtnsq1*dtnsq*( dpsi+dphi )
                 b = dtnsq1*dtnsq*w
                 if( a>=zero ) then
                    eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
                 else
                    eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) )
                 end if
                 ! note, eta should be positive if w is negative, and
                 ! eta should be negative otherwise. however,
                 ! if for some reason caused by roundoff, eta*w > 0,
                 ! we simply use one newton step instead. this way
                 ! will guarantee eta*w < 0.
                 if( w*eta>zero )eta = -w / ( dpsi+dphi )
                 temp = eta - dtnsq
                 if( temp<=zero )eta = eta / two
                 eta = eta / ( sigma+sqrt( eta+sigma*sigma ) )
                 tau = tau + eta
                 sigma = sigma + eta
                 do j = 1, n
                    delta( j ) = delta( j ) - eta
                    work( j ) = work( j ) + eta
                 end do
                 ! evaluate psi and the derivative dpsi
                 dpsi = zero
                 psi = zero
                 erretm = zero
                 do j = 1, ii
                    temp = z( j ) / ( work( j )*delta( j ) )
                    psi = psi + z( j )*temp
                    dpsi = dpsi + temp*temp
                    erretm = erretm + psi
                 end do
                 erretm = abs( erretm )
                 ! evaluate phi and the derivative dphi
                 tau2 = work( n )*delta( n )
                 temp = z( n ) / tau2
                 phi = z( n )*temp
                 dphi = temp*temp
                 erretm = eight*( -phi-psi ) + erretm - phi + rhoinv
          ! $             + abs( tau2 )*( dpsi+dphi )
                 w = rhoinv + phi + psi
              end do loop_90
              ! return with info = 1, niter = maxit and not converged
              info = 1_${ik}$
              go to 240
              ! end for the case i = n
           else
              ! the case for i < n
              niter = 1_${ik}$
              ip1 = i + 1_${ik}$
              ! calculate initial guess
              delsq = ( d( ip1 )-d( i ) )*( d( ip1 )+d( i ) )
              delsq2 = delsq / two
              sq2=sqrt( ( d( i )*d( i )+d( ip1 )*d( ip1 ) ) / two )
              temp = delsq2 / ( d( i )+sq2 )
              do j = 1, n
                 work( j ) = d( j ) + d( i ) + temp
                 delta( j ) = ( d( j )-d( i ) ) - temp
              end do
              psi = zero
              do j = 1, i - 1
                 psi = psi + z( j )*z( j ) / ( work( j )*delta( j ) )
              end do
              phi = zero
              do j = n, i + 2, -1
                 phi = phi + z( j )*z( j ) / ( work( j )*delta( j ) )
              end do
              c = rhoinv + psi + phi
              w = c + z( i )*z( i ) / ( work( i )*delta( i ) ) +z( ip1 )*z( ip1 ) / ( work( ip1 )&
                        *delta( ip1 ) )
              geomavg = .false.
              if( w>zero ) then
                 ! d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2
                 ! we choose d(i) as origin.
                 orgati = .true.
                 ii = i
                 sglb = zero
                 sgub = delsq2  / ( d( i )+sq2 )
                 a = c*delsq + z( i )*z( i ) + z( ip1 )*z( ip1 )
                 b = z( i )*z( i )*delsq
                 if( a>zero ) then
                    tau2 = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) )
                 else
                    tau2 = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
                 end if
                 ! tau2 now is an estimation of sigma^2 - d( i )^2. the
                 ! following, however, is the corresponding estimation of
                 ! sigma - d( i ).
                 tau = tau2 / ( d( i )+sqrt( d( i )*d( i )+tau2 ) )
                 temp = sqrt(eps)
                 if( (d(i)<=temp*d(ip1)).and.(abs(z(i))<=temp).and.(d(i)>zero) ) then
                    tau = min( ten*d(i), sgub )
                    geomavg = .true.
                 end if
              else
                 ! (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2
                 ! we choose d(i+1) as origin.
                 orgati = .false.
                 ii = ip1
                 sglb = -delsq2  / ( d( ii )+sq2 )
                 sgub = zero
                 a = c*</