stdlib_lapack_eigv_svd_bidiag_dc.fypp Source File


Source Code

#:include "common.fypp" 
submodule(stdlib_lapack_eig_svd_lsq) stdlib_lapack_eigv_svd_bidiag_dc
  implicit none


  contains
#:for ik,it,ii in LINALG_INT_KINDS_TYPES

     pure module subroutine stdlib${ii}$_slasd0( n, sqre, d, e, u, ldu, vt, ldvt, smlsiz, iwork,work, info )
     !! Using a divide and conquer approach, SLASD0: computes the singular
     !! value decomposition (SVD) of a real upper bidiagonal N-by-M
     !! matrix B with diagonal D and offdiagonal E, where M = N + SQRE.
     !! The algorithm computes orthogonal matrices U and VT such that
     !! B = U * S * VT. The singular values S are overwritten on D.
     !! A related subroutine, SLASDA, computes only the singular values,
     !! and optionally, the singular vectors in compact form.
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ldu, ldvt, n, smlsiz, sqre
           ! Array Arguments 
           integer(${ik}$), intent(out) :: iwork(*)
           real(sp), intent(inout) :: d(*), e(*)
           real(sp), intent(out) :: u(ldu,*), vt(ldvt,*), work(*)
        ! =====================================================================
           ! Local Scalars 
           integer(${ik}$) :: i, i1, ic, idxq, idxqc, im1, inode, itemp, iwk, j, lf, ll, lvl, m, ncc,&
                      nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, sqrei
           real(sp) :: alpha, beta
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -1_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -2_${ik}$
           end if
           m = n + sqre
           if( ldu<n ) then
              info = -6_${ik}$
           else if( ldvt<m ) then
              info = -8_${ik}$
           else if( smlsiz<3_${ik}$ ) then
              info = -9_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SLASD0', -info )
              return
           end if
           ! if the input matrix is too small, call stdlib${ii}$_slasdq to find the svd.
           if( n<=smlsiz ) then
              call stdlib${ii}$_slasdq( 'U', sqre, n, m, n, 0_${ik}$, d, e, vt, ldvt, u, ldu, u,ldu, work, &
                        info )
              return
           end if
           ! set up the computation tree.
           inode = 1_${ik}$
           ndiml = inode + n
           ndimr = ndiml + n
           idxq = ndimr + n
           iwk = idxq + n
           call stdlib${ii}$_slasdt( n, nlvl, nd, iwork( inode ), iwork( ndiml ),iwork( ndimr ), smlsiz &
                     )
           ! for the nodes on bottom level of the tree, solve
           ! their subproblems by stdlib${ii}$_slasdq.
           ndb1 = ( nd+1 ) / 2_${ik}$
           ncc = 0_${ik}$
           loop_30: do i = ndb1, nd
           ! ic : center row of each node
           ! nl : number of rows of left  subproblem
           ! nr : number of rows of right subproblem
           ! nlf: starting row of the left   subproblem
           ! nrf: starting row of the right  subproblem
              i1 = i - 1_${ik}$
              ic = iwork( inode+i1 )
              nl = iwork( ndiml+i1 )
              nlp1 = nl + 1_${ik}$
              nr = iwork( ndimr+i1 )
              nrp1 = nr + 1_${ik}$
              nlf = ic - nl
              nrf = ic + 1_${ik}$
              sqrei = 1_${ik}$
              call stdlib${ii}$_slasdq( 'U', sqrei, nl, nlp1, nl, ncc, d( nlf ), e( nlf ),vt( nlf, nlf )&
                        , ldvt, u( nlf, nlf ), ldu,u( nlf, nlf ), ldu, work, info )
              if( info/=0_${ik}$ ) then
                 return
              end if
              itemp = idxq + nlf - 2_${ik}$
              do j = 1, nl
                 iwork( itemp+j ) = j
              end do
              if( i==nd ) then
                 sqrei = sqre
              else
                 sqrei = 1_${ik}$
              end if
              nrp1 = nr + sqrei
              call stdlib${ii}$_slasdq( 'U', sqrei, nr, nrp1, nr, ncc, d( nrf ), e( nrf ),vt( nrf, nrf )&
                        , ldvt, u( nrf, nrf ), ldu,u( nrf, nrf ), ldu, work, info )
              if( info/=0_${ik}$ ) then
                 return
              end if
              itemp = idxq + ic
              do j = 1, nr
                 iwork( itemp+j-1 ) = j
              end do
           end do loop_30
           ! now conquer each subproblem bottom-up.
           loop_50: do lvl = nlvl, 1, -1
              ! find the first node lf and last node ll on the
              ! current level lvl.
              if( lvl==1_${ik}$ ) then
                 lf = 1_${ik}$
                 ll = 1_${ik}$
              else
                 lf = 2_${ik}$**( lvl-1 )
                 ll = 2_${ik}$*lf - 1_${ik}$
              end if
              do i = lf, ll
                 im1 = i - 1_${ik}$
                 ic = iwork( inode+im1 )
                 nl = iwork( ndiml+im1 )
                 nr = iwork( ndimr+im1 )
                 nlf = ic - nl
                 if( ( sqre==0_${ik}$ ) .and. ( i==ll ) ) then
                    sqrei = sqre
                 else
                    sqrei = 1_${ik}$
                 end if
                 idxqc = idxq + nlf - 1_${ik}$
                 alpha = d( ic )
                 beta = e( ic )
                 call stdlib${ii}$_slasd1( nl, nr, sqrei, d( nlf ), alpha, beta,u( nlf, nlf ), ldu, vt( &
                           nlf, nlf ), ldvt,iwork( idxqc ), iwork( iwk ), work, info )
           ! report the possible convergence failure.
                 if( info/=0_${ik}$ ) then
                    return
                 end if
              end do
           end do loop_50
           return
     end subroutine stdlib${ii}$_slasd0

     pure module subroutine stdlib${ii}$_dlasd0( n, sqre, d, e, u, ldu, vt, ldvt, smlsiz, iwork,work, info )
     !! Using a divide and conquer approach, DLASD0: computes the singular
     !! value decomposition (SVD) of a real upper bidiagonal N-by-M
     !! matrix B with diagonal D and offdiagonal E, where M = N + SQRE.
     !! The algorithm computes orthogonal matrices U and VT such that
     !! B = U * S * VT. The singular values S are overwritten on D.
     !! A related subroutine, DLASDA, computes only the singular values,
     !! and optionally, the singular vectors in compact form.
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ldu, ldvt, n, smlsiz, sqre
           ! Array Arguments 
           integer(${ik}$), intent(out) :: iwork(*)
           real(dp), intent(inout) :: d(*), e(*)
           real(dp), intent(out) :: u(ldu,*), vt(ldvt,*), work(*)
        ! =====================================================================
           ! Local Scalars 
           integer(${ik}$) :: i, i1, ic, idxq, idxqc, im1, inode, itemp, iwk, j, lf, ll, lvl, m, ncc,&
                      nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, sqrei
           real(dp) :: alpha, beta
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -1_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -2_${ik}$
           end if
           m = n + sqre
           if( ldu<n ) then
              info = -6_${ik}$
           else if( ldvt<m ) then
              info = -8_${ik}$
           else if( smlsiz<3_${ik}$ ) then
              info = -9_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD0', -info )
              return
           end if
           ! if the input matrix is too small, call stdlib${ii}$_dlasdq to find the svd.
           if( n<=smlsiz ) then
              call stdlib${ii}$_dlasdq( 'U', sqre, n, m, n, 0_${ik}$, d, e, vt, ldvt, u, ldu, u,ldu, work, &
                        info )
              return
           end if
           ! set up the computation tree.
           inode = 1_${ik}$
           ndiml = inode + n
           ndimr = ndiml + n
           idxq = ndimr + n
           iwk = idxq + n
           call stdlib${ii}$_dlasdt( n, nlvl, nd, iwork( inode ), iwork( ndiml ),iwork( ndimr ), smlsiz &
                     )
           ! for the nodes on bottom level of the tree, solve
           ! their subproblems by stdlib${ii}$_dlasdq.
           ndb1 = ( nd+1 ) / 2_${ik}$
           ncc = 0_${ik}$
           loop_30: do i = ndb1, nd
           ! ic : center row of each node
           ! nl : number of rows of left  subproblem
           ! nr : number of rows of right subproblem
           ! nlf: starting row of the left   subproblem
           ! nrf: starting row of the right  subproblem
              i1 = i - 1_${ik}$
              ic = iwork( inode+i1 )
              nl = iwork( ndiml+i1 )
              nlp1 = nl + 1_${ik}$
              nr = iwork( ndimr+i1 )
              nrp1 = nr + 1_${ik}$
              nlf = ic - nl
              nrf = ic + 1_${ik}$
              sqrei = 1_${ik}$
              call stdlib${ii}$_dlasdq( 'U', sqrei, nl, nlp1, nl, ncc, d( nlf ), e( nlf ),vt( nlf, nlf )&
                        , ldvt, u( nlf, nlf ), ldu,u( nlf, nlf ), ldu, work, info )
              if( info/=0_${ik}$ ) then
                 return
              end if
              itemp = idxq + nlf - 2_${ik}$
              do j = 1, nl
                 iwork( itemp+j ) = j
              end do
              if( i==nd ) then
                 sqrei = sqre
              else
                 sqrei = 1_${ik}$
              end if
              nrp1 = nr + sqrei
              call stdlib${ii}$_dlasdq( 'U', sqrei, nr, nrp1, nr, ncc, d( nrf ), e( nrf ),vt( nrf, nrf )&
                        , ldvt, u( nrf, nrf ), ldu,u( nrf, nrf ), ldu, work, info )
              if( info/=0_${ik}$ ) then
                 return
              end if
              itemp = idxq + ic
              do j = 1, nr
                 iwork( itemp+j-1 ) = j
              end do
           end do loop_30
           ! now conquer each subproblem bottom-up.
           loop_50: do lvl = nlvl, 1, -1
              ! find the first node lf and last node ll on the
              ! current level lvl.
              if( lvl==1_${ik}$ ) then
                 lf = 1_${ik}$
                 ll = 1_${ik}$
              else
                 lf = 2_${ik}$**( lvl-1 )
                 ll = 2_${ik}$*lf - 1_${ik}$
              end if
              do i = lf, ll
                 im1 = i - 1_${ik}$
                 ic = iwork( inode+im1 )
                 nl = iwork( ndiml+im1 )
                 nr = iwork( ndimr+im1 )
                 nlf = ic - nl
                 if( ( sqre==0_${ik}$ ) .and. ( i==ll ) ) then
                    sqrei = sqre
                 else
                    sqrei = 1_${ik}$
                 end if
                 idxqc = idxq + nlf - 1_${ik}$
                 alpha = d( ic )
                 beta = e( ic )
                 call stdlib${ii}$_dlasd1( nl, nr, sqrei, d( nlf ), alpha, beta,u( nlf, nlf ), ldu, vt( &
                           nlf, nlf ), ldvt,iwork( idxqc ), iwork( iwk ), work, info )
              ! report the possible convergence failure.
                 if( info/=0_${ik}$ ) then
                    return
                 end if
              end do
           end do loop_50
           return
     end subroutine stdlib${ii}$_dlasd0

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lasd0( n, sqre, d, e, u, ldu, vt, ldvt, smlsiz, iwork,work, info )
     !! Using a divide and conquer approach, DLASD0: computes the singular
     !! value decomposition (SVD) of a real upper bidiagonal N-by-M
     !! matrix B with diagonal D and offdiagonal E, where M = N + SQRE.
     !! The algorithm computes orthogonal matrices U and VT such that
     !! B = U * S * VT. The singular values S are overwritten on D.
     !! A related subroutine, DLASDA, computes only the singular values,
     !! and optionally, the singular vectors in compact form.
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ldu, ldvt, n, smlsiz, sqre
           ! Array Arguments 
           integer(${ik}$), intent(out) :: iwork(*)
           real(${rk}$), intent(inout) :: d(*), e(*)
           real(${rk}$), intent(out) :: u(ldu,*), vt(ldvt,*), work(*)
        ! =====================================================================
           ! Local Scalars 
           integer(${ik}$) :: i, i1, ic, idxq, idxqc, im1, inode, itemp, iwk, j, lf, ll, lvl, m, ncc,&
                      nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, sqrei
           real(${rk}$) :: alpha, beta
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -1_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -2_${ik}$
           end if
           m = n + sqre
           if( ldu<n ) then
              info = -6_${ik}$
           else if( ldvt<m ) then
              info = -8_${ik}$
           else if( smlsiz<3_${ik}$ ) then
              info = -9_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD0', -info )
              return
           end if
           ! if the input matrix is too small, call stdlib${ii}$_${ri}$lasdq to find the svd.
           if( n<=smlsiz ) then
              call stdlib${ii}$_${ri}$lasdq( 'U', sqre, n, m, n, 0_${ik}$, d, e, vt, ldvt, u, ldu, u,ldu, work, &
                        info )
              return
           end if
           ! set up the computation tree.
           inode = 1_${ik}$
           ndiml = inode + n
           ndimr = ndiml + n
           idxq = ndimr + n
           iwk = idxq + n
           call stdlib${ii}$_${ri}$lasdt( n, nlvl, nd, iwork( inode ), iwork( ndiml ),iwork( ndimr ), smlsiz &
                     )
           ! for the nodes on bottom level of the tree, solve
           ! their subproblems by stdlib${ii}$_${ri}$lasdq.
           ndb1 = ( nd+1 ) / 2_${ik}$
           ncc = 0_${ik}$
           loop_30: do i = ndb1, nd
           ! ic : center row of each node
           ! nl : number of rows of left  subproblem
           ! nr : number of rows of right subproblem
           ! nlf: starting row of the left   subproblem
           ! nrf: starting row of the right  subproblem
              i1 = i - 1_${ik}$
              ic = iwork( inode+i1 )
              nl = iwork( ndiml+i1 )
              nlp1 = nl + 1_${ik}$
              nr = iwork( ndimr+i1 )
              nrp1 = nr + 1_${ik}$
              nlf = ic - nl
              nrf = ic + 1_${ik}$
              sqrei = 1_${ik}$
              call stdlib${ii}$_${ri}$lasdq( 'U', sqrei, nl, nlp1, nl, ncc, d( nlf ), e( nlf ),vt( nlf, nlf )&
                        , ldvt, u( nlf, nlf ), ldu,u( nlf, nlf ), ldu, work, info )
              if( info/=0_${ik}$ ) then
                 return
              end if
              itemp = idxq + nlf - 2_${ik}$
              do j = 1, nl
                 iwork( itemp+j ) = j
              end do
              if( i==nd ) then
                 sqrei = sqre
              else
                 sqrei = 1_${ik}$
              end if
              nrp1 = nr + sqrei
              call stdlib${ii}$_${ri}$lasdq( 'U', sqrei, nr, nrp1, nr, ncc, d( nrf ), e( nrf ),vt( nrf, nrf )&
                        , ldvt, u( nrf, nrf ), ldu,u( nrf, nrf ), ldu, work, info )
              if( info/=0_${ik}$ ) then
                 return
              end if
              itemp = idxq + ic
              do j = 1, nr
                 iwork( itemp+j-1 ) = j
              end do
           end do loop_30
           ! now conquer each subproblem bottom-up.
           loop_50: do lvl = nlvl, 1, -1
              ! find the first node lf and last node ll on the
              ! current level lvl.
              if( lvl==1_${ik}$ ) then
                 lf = 1_${ik}$
                 ll = 1_${ik}$
              else
                 lf = 2_${ik}$**( lvl-1 )
                 ll = 2_${ik}$*lf - 1_${ik}$
              end if
              do i = lf, ll
                 im1 = i - 1_${ik}$
                 ic = iwork( inode+im1 )
                 nl = iwork( ndiml+im1 )
                 nr = iwork( ndimr+im1 )
                 nlf = ic - nl
                 if( ( sqre==0_${ik}$ ) .and. ( i==ll ) ) then
                    sqrei = sqre
                 else
                    sqrei = 1_${ik}$
                 end if
                 idxqc = idxq + nlf - 1_${ik}$
                 alpha = d( ic )
                 beta = e( ic )
                 call stdlib${ii}$_${ri}$lasd1( nl, nr, sqrei, d( nlf ), alpha, beta,u( nlf, nlf ), ldu, vt( &
                           nlf, nlf ), ldvt,iwork( idxqc ), iwork( iwk ), work, info )
              ! report the possible convergence failure.
                 if( info/=0_${ik}$ ) then
                    return
                 end if
              end do
           end do loop_50
           return
     end subroutine stdlib${ii}$_${ri}$lasd0

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slasdt( n, lvl, nd, inode, ndiml, ndimr, msub )
     !! SLASDT creates a tree of subproblems for bidiagonal divide and
     !! conquer.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: lvl, nd
           integer(${ik}$), intent(in) :: msub, n
           ! Array Arguments 
           integer(${ik}$), intent(out) :: inode(*), ndiml(*), ndimr(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, il, ir, llst, maxn, ncrnt, nlvl
           real(sp) :: temp
           ! Intrinsic Functions 
           ! Executable Statements 
           ! find the number of levels on the tree.
           maxn = max( 1_${ik}$, n )
           temp = log( real( maxn,KIND=sp) / real( msub+1,KIND=sp) ) / log( two )
           lvl = int( temp,KIND=${ik}$) + 1_${ik}$
           i = n / 2_${ik}$
           inode( 1_${ik}$ ) = i + 1_${ik}$
           ndiml( 1_${ik}$ ) = i
           ndimr( 1_${ik}$ ) = n - i - 1_${ik}$
           il = 0_${ik}$
           ir = 1_${ik}$
           llst = 1_${ik}$
           do nlvl = 1, lvl - 1
              ! constructing the tree at (nlvl+1)-st level. the number of
              ! nodes created on this level is llst * 2.
              do i = 0, llst - 1
                 il = il + 2_${ik}$
                 ir = ir + 2_${ik}$
                 ncrnt = llst + i
                 ndiml( il ) = ndiml( ncrnt ) / 2_${ik}$
                 ndimr( il ) = ndiml( ncrnt ) - ndiml( il ) - 1_${ik}$
                 inode( il ) = inode( ncrnt ) - ndimr( il ) - 1_${ik}$
                 ndiml( ir ) = ndimr( ncrnt ) / 2_${ik}$
                 ndimr( ir ) = ndimr( ncrnt ) - ndiml( ir ) - 1_${ik}$
                 inode( ir ) = inode( ncrnt ) + ndiml( ir ) + 1_${ik}$
              end do
              llst = llst*2_${ik}$
           end do
           nd = llst*2_${ik}$ - 1_${ik}$
           return
     end subroutine stdlib${ii}$_slasdt

     pure module subroutine stdlib${ii}$_dlasdt( n, lvl, nd, inode, ndiml, ndimr, msub )
     !! DLASDT creates a tree of subproblems for bidiagonal divide and
     !! conquer.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: lvl, nd
           integer(${ik}$), intent(in) :: msub, n
           ! Array Arguments 
           integer(${ik}$), intent(out) :: inode(*), ndiml(*), ndimr(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, il, ir, llst, maxn, ncrnt, nlvl
           real(dp) :: temp
           ! Intrinsic Functions 
           ! Executable Statements 
           ! find the number of levels on the tree.
           maxn = max( 1_${ik}$, n )
           temp = log( real( maxn,KIND=dp) / real( msub+1,KIND=dp) ) / log( two )
           lvl = int( temp,KIND=${ik}$) + 1_${ik}$
           i = n / 2_${ik}$
           inode( 1_${ik}$ ) = i + 1_${ik}$
           ndiml( 1_${ik}$ ) = i
           ndimr( 1_${ik}$ ) = n - i - 1_${ik}$
           il = 0_${ik}$
           ir = 1_${ik}$
           llst = 1_${ik}$
           do nlvl = 1, lvl - 1
              ! constructing the tree at (nlvl+1)-st level. the number of
              ! nodes created on this level is llst * 2.
              do i = 0, llst - 1
                 il = il + 2_${ik}$
                 ir = ir + 2_${ik}$
                 ncrnt = llst + i
                 ndiml( il ) = ndiml( ncrnt ) / 2_${ik}$
                 ndimr( il ) = ndiml( ncrnt ) - ndiml( il ) - 1_${ik}$
                 inode( il ) = inode( ncrnt ) - ndimr( il ) - 1_${ik}$
                 ndiml( ir ) = ndimr( ncrnt ) / 2_${ik}$
                 ndimr( ir ) = ndimr( ncrnt ) - ndiml( ir ) - 1_${ik}$
                 inode( ir ) = inode( ncrnt ) + ndiml( ir ) + 1_${ik}$
              end do
              llst = llst*2_${ik}$
           end do
           nd = llst*2_${ik}$ - 1_${ik}$
           return
     end subroutine stdlib${ii}$_dlasdt

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lasdt( n, lvl, nd, inode, ndiml, ndimr, msub )
     !! DLASDT: creates a tree of subproblems for bidiagonal divide and
     !! conquer.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: lvl, nd
           integer(${ik}$), intent(in) :: msub, n
           ! Array Arguments 
           integer(${ik}$), intent(out) :: inode(*), ndiml(*), ndimr(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, il, ir, llst, maxn, ncrnt, nlvl
           real(${rk}$) :: temp
           ! Intrinsic Functions 
           ! Executable Statements 
           ! find the number of levels on the tree.
           maxn = max( 1_${ik}$, n )
           temp = log( real( maxn,KIND=${rk}$) / real( msub+1,KIND=${rk}$) ) / log( two )
           lvl = int( temp,KIND=${ik}$) + 1_${ik}$
           i = n / 2_${ik}$
           inode( 1_${ik}$ ) = i + 1_${ik}$
           ndiml( 1_${ik}$ ) = i
           ndimr( 1_${ik}$ ) = n - i - 1_${ik}$
           il = 0_${ik}$
           ir = 1_${ik}$
           llst = 1_${ik}$
           do nlvl = 1, lvl - 1
              ! constructing the tree at (nlvl+1)-st level. the number of
              ! nodes created on this level is llst * 2.
              do i = 0, llst - 1
                 il = il + 2_${ik}$
                 ir = ir + 2_${ik}$
                 ncrnt = llst + i
                 ndiml( il ) = ndiml( ncrnt ) / 2_${ik}$
                 ndimr( il ) = ndiml( ncrnt ) - ndiml( il ) - 1_${ik}$
                 inode( il ) = inode( ncrnt ) - ndimr( il ) - 1_${ik}$
                 ndiml( ir ) = ndimr( ncrnt ) / 2_${ik}$
                 ndimr( ir ) = ndimr( ncrnt ) - ndiml( ir ) - 1_${ik}$
                 inode( ir ) = inode( ncrnt ) + ndiml( ir ) + 1_${ik}$
              end do
              llst = llst*2_${ik}$
           end do
           nd = llst*2_${ik}$ - 1_${ik}$
           return
     end subroutine stdlib${ii}$_${ri}$lasdt

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slasd1( nl, nr, sqre, d, alpha, beta, u, ldu, vt, ldvt,idxq, iwork, &
     !! SLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B,
     !! where N = NL + NR + 1 and M = N + SQRE. SLASD1 is called from SLASD0.
     !! A related subroutine SLASD7 handles the case in which the singular
     !! values (and the singular vectors in factored form) are desired.
     !! SLASD1 computes the SVD as follows:
     !! ( D1(in)    0    0       0 )
     !! B = U(in) * (   Z1**T   a   Z2**T    b ) * VT(in)
     !! (   0       0   D2(in)   0 )
     !! = U(out) * ( D(out) 0) * VT(out)
     !! where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M
     !! with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
     !! elsewhere; and the entry b is empty if SQRE = 0.
     !! The left singular vectors of the original matrix are stored in U, and
     !! the transpose of the right singular vectors are stored in VT, and the
     !! singular values are in D.  The algorithm consists of three stages:
     !! The first stage consists of deflating the size of the problem
     !! when there are multiple singular values or when there are zeros in
     !! the Z vector.  For each such occurrence the dimension of the
     !! secular equation problem is reduced by one.  This stage is
     !! performed by the routine SLASD2.
     !! The second stage consists of calculating the updated
     !! singular values. This is done by finding the square roots of the
     !! roots of the secular equation via the routine SLASD4 (as called
     !! by SLASD3). This routine also calculates the singular vectors of
     !! the current problem.
     !! The final stage consists of computing the updated singular vectors
     !! directly using the updated singular values.  The singular vectors
     !! for the current problem are multiplied with the singular vectors
     !! from the overall problem.
               work, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ldu, ldvt, nl, nr, sqre
           real(sp), intent(inout) :: alpha, beta
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: idxq(*)
           integer(${ik}$), intent(out) :: iwork(*)
           real(sp), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: coltyp, i, idx, idxc, idxp, iq, isigma, iu2, ivt2, iz, k, ldq, ldu2, &
                     ldvt2, m, n, n1, n2
           real(sp) :: orgnrm
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( nl<1_${ik}$ ) then
              info = -1_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -2_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -3_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SLASD1', -info )
              return
           end if
           n = nl + nr + 1_${ik}$
           m = n + sqre
           ! the following values are for bookkeeping purposes only.  they are
           ! integer pointers which indicate the portion of the workspace
           ! used by a particular array in stdlib${ii}$_slasd2 and stdlib${ii}$_slasd3.
           ldu2 = n
           ldvt2 = m
           iz = 1_${ik}$
           isigma = iz + m
           iu2 = isigma + n
           ivt2 = iu2 + ldu2*n
           iq = ivt2 + ldvt2*m
           idx = 1_${ik}$
           idxc = idx + n
           coltyp = idxc + n
           idxp = coltyp + n
           ! scale.
           orgnrm = max( abs( alpha ), abs( beta ) )
           d( nl+1 ) = zero
           do i = 1, n
              if( abs( d( i ) )>orgnrm ) then
                 orgnrm = abs( d( i ) )
              end if
           end do
           call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, orgnrm, one, n, 1_${ik}$, d, n, info )
           alpha = alpha / orgnrm
           beta = beta / orgnrm
           ! deflate singular values.
           call stdlib${ii}$_slasd2( nl, nr, sqre, k, d, work( iz ), alpha, beta, u, ldu,vt, ldvt, work(&
            isigma ), work( iu2 ), ldu2,work( ivt2 ), ldvt2, iwork( idxp ), iwork( idx ),iwork( &
                      idxc ), idxq, iwork( coltyp ), info )
           ! solve secular equation and update singular vectors.
           ldq = k
           call stdlib${ii}$_slasd3( nl, nr, sqre, k, d, work( iq ), ldq, work( isigma ),u, ldu, work( &
           iu2 ), ldu2, vt, ldvt, work( ivt2 ),ldvt2, iwork( idxc ), iwork( coltyp ), work( iz ),&
                     info )
           ! report the possible convergence failure.
           if( info/=0_${ik}$ ) then
              return
           end if
           ! unscale.
           call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, one, orgnrm, n, 1_${ik}$, d, n, info )
           ! prepare the idxq sorting permutation.
           n1 = k
           n2 = n - k
           call stdlib${ii}$_slamrg( n1, n2, d, 1_${ik}$, -1_${ik}$, idxq )
           return
     end subroutine stdlib${ii}$_slasd1

     pure module subroutine stdlib${ii}$_dlasd1( nl, nr, sqre, d, alpha, beta, u, ldu, vt, ldvt,idxq, iwork, &
     !! DLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B,
     !! where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0.
     !! A related subroutine DLASD7 handles the case in which the singular
     !! values (and the singular vectors in factored form) are desired.
     !! DLASD1 computes the SVD as follows:
     !! ( D1(in)    0    0       0 )
     !! B = U(in) * (   Z1**T   a   Z2**T    b ) * VT(in)
     !! (   0       0   D2(in)   0 )
     !! = U(out) * ( D(out) 0) * VT(out)
     !! where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M
     !! with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
     !! elsewhere; and the entry b is empty if SQRE = 0.
     !! The left singular vectors of the original matrix are stored in U, and
     !! the transpose of the right singular vectors are stored in VT, and the
     !! singular values are in D.  The algorithm consists of three stages:
     !! The first stage consists of deflating the size of the problem
     !! when there are multiple singular values or when there are zeros in
     !! the Z vector.  For each such occurrence the dimension of the
     !! secular equation problem is reduced by one.  This stage is
     !! performed by the routine DLASD2.
     !! The second stage consists of calculating the updated
     !! singular values. This is done by finding the square roots of the
     !! roots of the secular equation via the routine DLASD4 (as called
     !! by DLASD3). This routine also calculates the singular vectors of
     !! the current problem.
     !! The final stage consists of computing the updated singular vectors
     !! directly using the updated singular values.  The singular vectors
     !! for the current problem are multiplied with the singular vectors
     !! from the overall problem.
               work, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ldu, ldvt, nl, nr, sqre
           real(dp), intent(inout) :: alpha, beta
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: idxq(*)
           integer(${ik}$), intent(out) :: iwork(*)
           real(dp), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: coltyp, i, idx, idxc, idxp, iq, isigma, iu2, ivt2, iz, k, ldq, ldu2, &
                     ldvt2, m, n, n1, n2
           real(dp) :: orgnrm
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( nl<1_${ik}$ ) then
              info = -1_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -2_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -3_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD1', -info )
              return
           end if
           n = nl + nr + 1_${ik}$
           m = n + sqre
           ! the following values are for bookkeeping purposes only.  they are
           ! integer pointers which indicate the portion of the workspace
           ! used by a particular array in stdlib${ii}$_dlasd2 and stdlib${ii}$_dlasd3.
           ldu2 = n
           ldvt2 = m
           iz = 1_${ik}$
           isigma = iz + m
           iu2 = isigma + n
           ivt2 = iu2 + ldu2*n
           iq = ivt2 + ldvt2*m
           idx = 1_${ik}$
           idxc = idx + n
           coltyp = idxc + n
           idxp = coltyp + n
           ! scale.
           orgnrm = max( abs( alpha ), abs( beta ) )
           d( nl+1 ) = zero
           do i = 1, n
              if( abs( d( i ) )>orgnrm ) then
                 orgnrm = abs( d( i ) )
              end if
           end do
           call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, orgnrm, one, n, 1_${ik}$, d, n, info )
           alpha = alpha / orgnrm
           beta = beta / orgnrm
           ! deflate singular values.
           call stdlib${ii}$_dlasd2( nl, nr, sqre, k, d, work( iz ), alpha, beta, u, ldu,vt, ldvt, work(&
            isigma ), work( iu2 ), ldu2,work( ivt2 ), ldvt2, iwork( idxp ), iwork( idx ),iwork( &
                      idxc ), idxq, iwork( coltyp ), info )
           ! solve secular equation and update singular vectors.
           ldq = k
           call stdlib${ii}$_dlasd3( nl, nr, sqre, k, d, work( iq ), ldq, work( isigma ),u, ldu, work( &
           iu2 ), ldu2, vt, ldvt, work( ivt2 ),ldvt2, iwork( idxc ), iwork( coltyp ), work( iz ),&
                     info )
           ! report the convergence failure.
           if( info/=0_${ik}$ ) then
              return
           end if
           ! unscale.
           call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, one, orgnrm, n, 1_${ik}$, d, n, info )
           ! prepare the idxq sorting permutation.
           n1 = k
           n2 = n - k
           call stdlib${ii}$_dlamrg( n1, n2, d, 1_${ik}$, -1_${ik}$, idxq )
           return
     end subroutine stdlib${ii}$_dlasd1

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lasd1( nl, nr, sqre, d, alpha, beta, u, ldu, vt, ldvt,idxq, iwork, &
     !! DLASD1: computes the SVD of an upper bidiagonal N-by-M matrix B,
     !! where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0.
     !! A related subroutine DLASD7 handles the case in which the singular
     !! values (and the singular vectors in factored form) are desired.
     !! DLASD1 computes the SVD as follows:
     !! ( D1(in)    0    0       0 )
     !! B = U(in) * (   Z1**T   a   Z2**T    b ) * VT(in)
     !! (   0       0   D2(in)   0 )
     !! = U(out) * ( D(out) 0) * VT(out)
     !! where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M
     !! with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
     !! elsewhere; and the entry b is empty if SQRE = 0.
     !! The left singular vectors of the original matrix are stored in U, and
     !! the transpose of the right singular vectors are stored in VT, and the
     !! singular values are in D.  The algorithm consists of three stages:
     !! The first stage consists of deflating the size of the problem
     !! when there are multiple singular values or when there are zeros in
     !! the Z vector.  For each such occurrence the dimension of the
     !! secular equation problem is reduced by one.  This stage is
     !! performed by the routine DLASD2.
     !! The second stage consists of calculating the updated
     !! singular values. This is done by finding the square roots of the
     !! roots of the secular equation via the routine DLASD4 (as called
     !! by DLASD3). This routine also calculates the singular vectors of
     !! the current problem.
     !! The final stage consists of computing the updated singular vectors
     !! directly using the updated singular values.  The singular vectors
     !! for the current problem are multiplied with the singular vectors
     !! from the overall problem.
               work, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ldu, ldvt, nl, nr, sqre
           real(${rk}$), intent(inout) :: alpha, beta
           ! Array Arguments 
           integer(${ik}$), intent(inout) :: idxq(*)
           integer(${ik}$), intent(out) :: iwork(*)
           real(${rk}$), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: coltyp, i, idx, idxc, idxp, iq, isigma, iu2, ivt2, iz, k, ldq, ldu2, &
                     ldvt2, m, n, n1, n2
           real(${rk}$) :: orgnrm
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( nl<1_${ik}$ ) then
              info = -1_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -2_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -3_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD1', -info )
              return
           end if
           n = nl + nr + 1_${ik}$
           m = n + sqre
           ! the following values are for bookkeeping purposes only.  they are
           ! integer pointers which indicate the portion of the workspace
           ! used by a particular array in stdlib${ii}$_${ri}$lasd2 and stdlib${ii}$_${ri}$lasd3.
           ldu2 = n
           ldvt2 = m
           iz = 1_${ik}$
           isigma = iz + m
           iu2 = isigma + n
           ivt2 = iu2 + ldu2*n
           iq = ivt2 + ldvt2*m
           idx = 1_${ik}$
           idxc = idx + n
           coltyp = idxc + n
           idxp = coltyp + n
           ! scale.
           orgnrm = max( abs( alpha ), abs( beta ) )
           d( nl+1 ) = zero
           do i = 1, n
              if( abs( d( i ) )>orgnrm ) then
                 orgnrm = abs( d( i ) )
              end if
           end do
           call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, orgnrm, one, n, 1_${ik}$, d, n, info )
           alpha = alpha / orgnrm
           beta = beta / orgnrm
           ! deflate singular values.
           call stdlib${ii}$_${ri}$lasd2( nl, nr, sqre, k, d, work( iz ), alpha, beta, u, ldu,vt, ldvt, work(&
            isigma ), work( iu2 ), ldu2,work( ivt2 ), ldvt2, iwork( idxp ), iwork( idx ),iwork( &
                      idxc ), idxq, iwork( coltyp ), info )
           ! solve secular equation and update singular vectors.
           ldq = k
           call stdlib${ii}$_${ri}$lasd3( nl, nr, sqre, k, d, work( iq ), ldq, work( isigma ),u, ldu, work( &
           iu2 ), ldu2, vt, ldvt, work( ivt2 ),ldvt2, iwork( idxc ), iwork( coltyp ), work( iz ),&
                     info )
           ! report the convergence failure.
           if( info/=0_${ik}$ ) then
              return
           end if
           ! unscale.
           call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, orgnrm, n, 1_${ik}$, d, n, info )
           ! prepare the idxq sorting permutation.
           n1 = k
           n2 = n - k
           call stdlib${ii}$_${ri}$lamrg( n1, n2, d, 1_${ik}$, -1_${ik}$, idxq )
           return
     end subroutine stdlib${ii}$_${ri}$lasd1

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slasd2( nl, nr, sqre, k, d, z, alpha, beta, u, ldu, vt,ldvt, dsigma, &
     !! SLASD2 merges the two sets of singular values together into a single
     !! sorted set.  Then it tries to deflate the size of the problem.
     !! There are two ways in which deflation can occur:  when two or more
     !! singular values are close together or if there is a tiny entry in the
     !! Z vector.  For each such occurrence the order of the related secular
     !! equation problem is reduced by one.
     !! SLASD2 is called from SLASD1.
               u2, ldu2, vt2, ldvt2, idxp, idx,idxc, idxq, coltyp, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, k
           integer(${ik}$), intent(in) :: ldu, ldu2, ldvt, ldvt2, nl, nr, sqre
           real(sp), intent(in) :: alpha, beta
           ! Array Arguments 
           integer(${ik}$), intent(out) :: coltyp(*), idx(*), idxc(*), idxp(*)
           integer(${ik}$), intent(inout) :: idxq(*)
           real(sp), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*)
           real(sp), intent(out) :: dsigma(*), u2(ldu2,*), vt2(ldvt2,*), z(*)
        ! =====================================================================
           
           ! Local Arrays 
           integer(${ik}$) :: ctot(4_${ik}$), psm(4_${ik}$)
           ! Local Scalars 
           integer(${ik}$) :: ct, i, idxi, idxj, idxjp, j, jp, jprev, k2, m, n, nlp1, nlp2
           real(sp) :: c, eps, hlftol, s, tau, tol, z1
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( nl<1_${ik}$ ) then
              info = -1_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -2_${ik}$
           else if( ( sqre/=1_${ik}$ ) .and. ( sqre/=0_${ik}$ ) ) then
              info = -3_${ik}$
           end if
           n = nl + nr + 1_${ik}$
           m = n + sqre
           if( ldu<n ) then
              info = -10_${ik}$
           else if( ldvt<m ) then
              info = -12_${ik}$
           else if( ldu2<n ) then
              info = -15_${ik}$
           else if( ldvt2<m ) then
              info = -17_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SLASD2', -info )
              return
           end if
           nlp1 = nl + 1_${ik}$
           nlp2 = nl + 2_${ik}$
           ! generate the first part of the vector z; and move the singular
           ! values in the first part of d one position backward.
           z1 = alpha*vt( nlp1, nlp1 )
           z( 1_${ik}$ ) = z1
           do i = nl, 1, -1
              z( i+1 ) = alpha*vt( i, nlp1 )
              d( i+1 ) = d( i )
              idxq( i+1 ) = idxq( i ) + 1_${ik}$
           end do
           ! generate the second part of the vector z.
           do i = nlp2, m
              z( i ) = beta*vt( i, nlp2 )
           end do
           ! initialize some reference arrays.
           do i = 2, nlp1
              coltyp( i ) = 1_${ik}$
           end do
           do i = nlp2, n
              coltyp( i ) = 2_${ik}$
           end do
           ! sort the singular values into increasing order
           do i = nlp2, n
              idxq( i ) = idxq( i ) + nlp1
           end do
           ! dsigma, idxc, idxc, and the first column of u2
           ! are used as storage space.
           do i = 2, n
              dsigma( i ) = d( idxq( i ) )
              u2( i, 1_${ik}$ ) = z( idxq( i ) )
              idxc( i ) = coltyp( idxq( i ) )
           end do
           call stdlib${ii}$_slamrg( nl, nr, dsigma( 2_${ik}$ ), 1_${ik}$, 1_${ik}$, idx( 2_${ik}$ ) )
           do i = 2, n
              idxi = 1_${ik}$ + idx( i )
              d( i ) = dsigma( idxi )
              z( i ) = u2( idxi, 1_${ik}$ )
              coltyp( i ) = idxc( idxi )
           end do
           ! calculate the allowable deflation tolerance
           eps = stdlib${ii}$_slamch( 'EPSILON' )
           tol = max( abs( alpha ), abs( beta ) )
           tol = eight*eps*max( abs( d( n ) ), tol )
           ! there are 2 kinds of deflation -- first a value in the z-vector
           ! is small, second two (or more) singular values are very close
           ! together (their difference is small).
           ! if the value in the z-vector is small, we simply permute the
           ! array so that the corresponding singular value is moved to the
           ! end.
           ! if two values in the d-vector are close, we perform a two-sided
           ! rotation designed to make one of the corresponding z-vector
           ! entries zero, and then permute the array so that the deflated
           ! singular value is moved to the end.
           ! if there are multiple singular values then the problem deflates.
           ! here the number of equal singular values are found.  as each equal
           ! singular value is found, an elementary reflector is computed to
           ! rotate the corresponding singular subspace so that the
           ! corresponding components of z are zero in this new basis.
           k = 1_${ik}$
           k2 = n + 1_${ik}$
           do j = 2, n
              if( abs( z( j ) )<=tol ) then
                 ! deflate due to small z component.
                 k2 = k2 - 1_${ik}$
                 idxp( k2 ) = j
                 coltyp( j ) = 4_${ik}$
                 if( j==n )go to 120
              else
                 jprev = j
                 go to 90
              end if
           end do
           90 continue
           j = jprev
           100 continue
           j = j + 1_${ik}$
           if( j>n )go to 110
           if( abs( z( j ) )<=tol ) then
              ! deflate due to small z component.
              k2 = k2 - 1_${ik}$
              idxp( k2 ) = j
              coltyp( j ) = 4_${ik}$
           else
              ! check if singular values are close enough to allow deflation.
              if( abs( d( j )-d( jprev ) )<=tol ) then
                 ! deflation is possible.
                 s = z( jprev )
                 c = z( j )
                 ! find sqrt(a**2+b**2) without overflow or
                 ! destructive underflow.
                 tau = stdlib${ii}$_slapy2( c, s )
                 c = c / tau
                 s = -s / tau
                 z( j ) = tau
                 z( jprev ) = zero
                 ! apply back the givens rotation to the left and right
                 ! singular vector matrices.
                 idxjp = idxq( idx( jprev )+1_${ik}$ )
                 idxj = idxq( idx( j )+1_${ik}$ )
                 if( idxjp<=nlp1 ) then
                    idxjp = idxjp - 1_${ik}$
                 end if
                 if( idxj<=nlp1 ) then
                    idxj = idxj - 1_${ik}$
                 end if
                 call stdlib${ii}$_srot( n, u( 1_${ik}$, idxjp ), 1_${ik}$, u( 1_${ik}$, idxj ), 1_${ik}$, c, s )
                 call stdlib${ii}$_srot( m, vt( idxjp, 1_${ik}$ ), ldvt, vt( idxj, 1_${ik}$ ), ldvt, c,s )
                 if( coltyp( j )/=coltyp( jprev ) ) then
                    coltyp( j ) = 3_${ik}$
                 end if
                 coltyp( jprev ) = 4_${ik}$
                 k2 = k2 - 1_${ik}$
                 idxp( k2 ) = jprev
                 jprev = j
              else
                 k = k + 1_${ik}$
                 u2( k, 1_${ik}$ ) = z( jprev )
                 dsigma( k ) = d( jprev )
                 idxp( k ) = jprev
                 jprev = j
              end if
           end if
           go to 100
           110 continue
           ! record the last singular value.
           k = k + 1_${ik}$
           u2( k, 1_${ik}$ ) = z( jprev )
           dsigma( k ) = d( jprev )
           idxp( k ) = jprev
           120 continue
           ! count up the total number of the various types of columns, then
           ! form a permutation which positions the four column types into
           ! four groups of uniform structure (although one or more of these
           ! groups may be empty).
           do j = 1, 4
              ctot( j ) = 0_${ik}$
           end do
           do j = 2, n
              ct = coltyp( j )
              ctot( ct ) = ctot( ct ) + 1_${ik}$
           end do
           ! psm(*) = position in submatrix (of types 1 through 4)
           psm( 1_${ik}$ ) = 2_${ik}$
           psm( 2_${ik}$ ) = 2_${ik}$ + ctot( 1_${ik}$ )
           psm( 3_${ik}$ ) = psm( 2_${ik}$ ) + ctot( 2_${ik}$ )
           psm( 4_${ik}$ ) = psm( 3_${ik}$ ) + ctot( 3_${ik}$ )
           ! fill out the idxc array so that the permutation which it induces
           ! will place all type-1 columns first, all type-2 columns next,
           ! then all type-3's, and finally all type-4's, starting from the
           ! second column. this applies similarly to the rows of vt.
           do j = 2, n
              jp = idxp( j )
              ct = coltyp( jp )
              idxc( psm( ct ) ) = j
              psm( ct ) = psm( ct ) + 1_${ik}$
           end do
           ! sort the singular values and corresponding singular vectors into
           ! dsigma, u2, and vt2 respectively.  the singular values/vectors
           ! which were not deflated go into the first k slots of dsigma, u2,
           ! and vt2 respectively, while those which were deflated go into the
           ! last n - k slots, except that the first column/row will be treated
           ! separately.
           do j = 2, n
              jp = idxp( j )
              dsigma( j ) = d( jp )
              idxj = idxq( idx( idxp( idxc( j ) ) )+1_${ik}$ )
              if( idxj<=nlp1 ) then
                 idxj = idxj - 1_${ik}$
              end if
              call stdlib${ii}$_scopy( n, u( 1_${ik}$, idxj ), 1_${ik}$, u2( 1_${ik}$, j ), 1_${ik}$ )
              call stdlib${ii}$_scopy( m, vt( idxj, 1_${ik}$ ), ldvt, vt2( j, 1_${ik}$ ), ldvt2 )
           end do
           ! determine dsigma(1), dsigma(2) and z(1)
           dsigma( 1_${ik}$ ) = zero
           hlftol = tol / two
           if( abs( dsigma( 2_${ik}$ ) )<=hlftol )dsigma( 2_${ik}$ ) = hlftol
           if( m>n ) then
              z( 1_${ik}$ ) = stdlib${ii}$_slapy2( z1, z( m ) )
              if( z( 1_${ik}$ )<=tol ) then
                 c = one
                 s = zero
                 z( 1_${ik}$ ) = tol
              else
                 c = z1 / z( 1_${ik}$ )
                 s = z( m ) / z( 1_${ik}$ )
              end if
           else
              if( abs( z1 )<=tol ) then
                 z( 1_${ik}$ ) = tol
              else
                 z( 1_${ik}$ ) = z1
              end if
           end if
           ! move the rest of the updating row to z.
           call stdlib${ii}$_scopy( k-1, u2( 2_${ik}$, 1_${ik}$ ), 1_${ik}$, z( 2_${ik}$ ), 1_${ik}$ )
           ! determine the first column of u2, the first row of vt2 and the
           ! last row of vt.
           call stdlib${ii}$_slaset( 'A', n, 1_${ik}$, zero, zero, u2, ldu2 )
           u2( nlp1, 1_${ik}$ ) = one
           if( m>n ) then
              do i = 1, nlp1
                 vt( m, i ) = -s*vt( nlp1, i )
                 vt2( 1_${ik}$, i ) = c*vt( nlp1, i )
              end do
              do i = nlp2, m
                 vt2( 1_${ik}$, i ) = s*vt( m, i )
                 vt( m, i ) = c*vt( m, i )
              end do
           else
              call stdlib${ii}$_scopy( m, vt( nlp1, 1_${ik}$ ), ldvt, vt2( 1_${ik}$, 1_${ik}$ ), ldvt2 )
           end if
           if( m>n ) then
              call stdlib${ii}$_scopy( m, vt( m, 1_${ik}$ ), ldvt, vt2( m, 1_${ik}$ ), ldvt2 )
           end if
           ! the deflated singular values and their corresponding vectors go
           ! into the back of d, u, and v respectively.
           if( n>k ) then
              call stdlib${ii}$_scopy( n-k, dsigma( k+1 ), 1_${ik}$, d( k+1 ), 1_${ik}$ )
              call stdlib${ii}$_slacpy( 'A', n, n-k, u2( 1_${ik}$, k+1 ), ldu2, u( 1_${ik}$, k+1 ),ldu )
              call stdlib${ii}$_slacpy( 'A', n-k, m, vt2( k+1, 1_${ik}$ ), ldvt2, vt( k+1, 1_${ik}$ ),ldvt )
           end if
           ! copy ctot into coltyp for referencing in stdlib${ii}$_slasd3.
           do j = 1, 4
              coltyp( j ) = ctot( j )
           end do
           return
     end subroutine stdlib${ii}$_slasd2

     pure module subroutine stdlib${ii}$_dlasd2( nl, nr, sqre, k, d, z, alpha, beta, u, ldu, vt,ldvt, dsigma, &
     !! DLASD2 merges the two sets of singular values together into a single
     !! sorted set.  Then it tries to deflate the size of the problem.
     !! There are two ways in which deflation can occur:  when two or more
     !! singular values are close together or if there is a tiny entry in the
     !! Z vector.  For each such occurrence the order of the related secular
     !! equation problem is reduced by one.
     !! DLASD2 is called from DLASD1.
               u2, ldu2, vt2, ldvt2, idxp, idx,idxc, idxq, coltyp, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, k
           integer(${ik}$), intent(in) :: ldu, ldu2, ldvt, ldvt2, nl, nr, sqre
           real(dp), intent(in) :: alpha, beta
           ! Array Arguments 
           integer(${ik}$), intent(out) :: coltyp(*), idx(*), idxc(*), idxp(*)
           integer(${ik}$), intent(inout) :: idxq(*)
           real(dp), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*)
           real(dp), intent(out) :: dsigma(*), u2(ldu2,*), vt2(ldvt2,*), z(*)
        ! =====================================================================
           
           ! Local Arrays 
           integer(${ik}$) :: ctot(4_${ik}$), psm(4_${ik}$)
           ! Local Scalars 
           integer(${ik}$) :: ct, i, idxi, idxj, idxjp, j, jp, jprev, k2, m, n, nlp1, nlp2
           real(dp) :: c, eps, hlftol, s, tau, tol, z1
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( nl<1_${ik}$ ) then
              info = -1_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -2_${ik}$
           else if( ( sqre/=1_${ik}$ ) .and. ( sqre/=0_${ik}$ ) ) then
              info = -3_${ik}$
           end if
           n = nl + nr + 1_${ik}$
           m = n + sqre
           if( ldu<n ) then
              info = -10_${ik}$
           else if( ldvt<m ) then
              info = -12_${ik}$
           else if( ldu2<n ) then
              info = -15_${ik}$
           else if( ldvt2<m ) then
              info = -17_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD2', -info )
              return
           end if
           nlp1 = nl + 1_${ik}$
           nlp2 = nl + 2_${ik}$
           ! generate the first part of the vector z; and move the singular
           ! values in the first part of d one position backward.
           z1 = alpha*vt( nlp1, nlp1 )
           z( 1_${ik}$ ) = z1
           do i = nl, 1, -1
              z( i+1 ) = alpha*vt( i, nlp1 )
              d( i+1 ) = d( i )
              idxq( i+1 ) = idxq( i ) + 1_${ik}$
           end do
           ! generate the second part of the vector z.
           do i = nlp2, m
              z( i ) = beta*vt( i, nlp2 )
           end do
           ! initialize some reference arrays.
           do i = 2, nlp1
              coltyp( i ) = 1_${ik}$
           end do
           do i = nlp2, n
              coltyp( i ) = 2_${ik}$
           end do
           ! sort the singular values into increasing order
           do i = nlp2, n
              idxq( i ) = idxq( i ) + nlp1
           end do
           ! dsigma, idxc, idxc, and the first column of u2
           ! are used as storage space.
           do i = 2, n
              dsigma( i ) = d( idxq( i ) )
              u2( i, 1_${ik}$ ) = z( idxq( i ) )
              idxc( i ) = coltyp( idxq( i ) )
           end do
           call stdlib${ii}$_dlamrg( nl, nr, dsigma( 2_${ik}$ ), 1_${ik}$, 1_${ik}$, idx( 2_${ik}$ ) )
           do i = 2, n
              idxi = 1_${ik}$ + idx( i )
              d( i ) = dsigma( idxi )
              z( i ) = u2( idxi, 1_${ik}$ )
              coltyp( i ) = idxc( idxi )
           end do
           ! calculate the allowable deflation tolerance
           eps = stdlib${ii}$_dlamch( 'EPSILON' )
           tol = max( abs( alpha ), abs( beta ) )
           tol = eight*eps*max( abs( d( n ) ), tol )
           ! there are 2 kinds of deflation -- first a value in the z-vector
           ! is small, second two (or more) singular values are very close
           ! together (their difference is small).
           ! if the value in the z-vector is small, we simply permute the
           ! array so that the corresponding singular value is moved to the
           ! end.
           ! if two values in the d-vector are close, we perform a two-sided
           ! rotation designed to make one of the corresponding z-vector
           ! entries zero, and then permute the array so that the deflated
           ! singular value is moved to the end.
           ! if there are multiple singular values then the problem deflates.
           ! here the number of equal singular values are found.  as each equal
           ! singular value is found, an elementary reflector is computed to
           ! rotate the corresponding singular subspace so that the
           ! corresponding components of z are zero in this new basis.
           k = 1_${ik}$
           k2 = n + 1_${ik}$
           do j = 2, n
              if( abs( z( j ) )<=tol ) then
                 ! deflate due to small z component.
                 k2 = k2 - 1_${ik}$
                 idxp( k2 ) = j
                 coltyp( j ) = 4_${ik}$
                 if( j==n )go to 120
              else
                 jprev = j
                 go to 90
              end if
           end do
           90 continue
           j = jprev
           100 continue
           j = j + 1_${ik}$
           if( j>n )go to 110
           if( abs( z( j ) )<=tol ) then
              ! deflate due to small z component.
              k2 = k2 - 1_${ik}$
              idxp( k2 ) = j
              coltyp( j ) = 4_${ik}$
           else
              ! check if singular values are close enough to allow deflation.
              if( abs( d( j )-d( jprev ) )<=tol ) then
                 ! deflation is possible.
                 s = z( jprev )
                 c = z( j )
                 ! find sqrt(a**2+b**2) without overflow or
                 ! destructive underflow.
                 tau = stdlib${ii}$_dlapy2( c, s )
                 c = c / tau
                 s = -s / tau
                 z( j ) = tau
                 z( jprev ) = zero
                 ! apply back the givens rotation to the left and right
                 ! singular vector matrices.
                 idxjp = idxq( idx( jprev )+1_${ik}$ )
                 idxj = idxq( idx( j )+1_${ik}$ )
                 if( idxjp<=nlp1 ) then
                    idxjp = idxjp - 1_${ik}$
                 end if
                 if( idxj<=nlp1 ) then
                    idxj = idxj - 1_${ik}$
                 end if
                 call stdlib${ii}$_drot( n, u( 1_${ik}$, idxjp ), 1_${ik}$, u( 1_${ik}$, idxj ), 1_${ik}$, c, s )
                 call stdlib${ii}$_drot( m, vt( idxjp, 1_${ik}$ ), ldvt, vt( idxj, 1_${ik}$ ), ldvt, c,s )
                 if( coltyp( j )/=coltyp( jprev ) ) then
                    coltyp( j ) = 3_${ik}$
                 end if
                 coltyp( jprev ) = 4_${ik}$
                 k2 = k2 - 1_${ik}$
                 idxp( k2 ) = jprev
                 jprev = j
              else
                 k = k + 1_${ik}$
                 u2( k, 1_${ik}$ ) = z( jprev )
                 dsigma( k ) = d( jprev )
                 idxp( k ) = jprev
                 jprev = j
              end if
           end if
           go to 100
           110 continue
           ! record the last singular value.
           k = k + 1_${ik}$
           u2( k, 1_${ik}$ ) = z( jprev )
           dsigma( k ) = d( jprev )
           idxp( k ) = jprev
           120 continue
           ! count up the total number of the various types of columns, then
           ! form a permutation which positions the four column types into
           ! four groups of uniform structure (although one or more of these
           ! groups may be empty).
           do j = 1, 4
              ctot( j ) = 0_${ik}$
           end do
           do j = 2, n
              ct = coltyp( j )
              ctot( ct ) = ctot( ct ) + 1_${ik}$
           end do
           ! psm(*) = position in submatrix (of types 1 through 4)
           psm( 1_${ik}$ ) = 2_${ik}$
           psm( 2_${ik}$ ) = 2_${ik}$ + ctot( 1_${ik}$ )
           psm( 3_${ik}$ ) = psm( 2_${ik}$ ) + ctot( 2_${ik}$ )
           psm( 4_${ik}$ ) = psm( 3_${ik}$ ) + ctot( 3_${ik}$ )
           ! fill out the idxc array so that the permutation which it induces
           ! will place all type-1 columns first, all type-2 columns next,
           ! then all type-3's, and finally all type-4's, starting from the
           ! second column. this applies similarly to the rows of vt.
           do j = 2, n
              jp = idxp( j )
              ct = coltyp( jp )
              idxc( psm( ct ) ) = j
              psm( ct ) = psm( ct ) + 1_${ik}$
           end do
           ! sort the singular values and corresponding singular vectors into
           ! dsigma, u2, and vt2 respectively.  the singular values/vectors
           ! which were not deflated go into the first k slots of dsigma, u2,
           ! and vt2 respectively, while those which were deflated go into the
           ! last n - k slots, except that the first column/row will be treated
           ! separately.
           do j = 2, n
              jp = idxp( j )
              dsigma( j ) = d( jp )
              idxj = idxq( idx( idxp( idxc( j ) ) )+1_${ik}$ )
              if( idxj<=nlp1 ) then
                 idxj = idxj - 1_${ik}$
              end if
              call stdlib${ii}$_dcopy( n, u( 1_${ik}$, idxj ), 1_${ik}$, u2( 1_${ik}$, j ), 1_${ik}$ )
              call stdlib${ii}$_dcopy( m, vt( idxj, 1_${ik}$ ), ldvt, vt2( j, 1_${ik}$ ), ldvt2 )
           end do
           ! determine dsigma(1), dsigma(2) and z(1)
           dsigma( 1_${ik}$ ) = zero
           hlftol = tol / two
           if( abs( dsigma( 2_${ik}$ ) )<=hlftol )dsigma( 2_${ik}$ ) = hlftol
           if( m>n ) then
              z( 1_${ik}$ ) = stdlib${ii}$_dlapy2( z1, z( m ) )
              if( z( 1_${ik}$ )<=tol ) then
                 c = one
                 s = zero
                 z( 1_${ik}$ ) = tol
              else
                 c = z1 / z( 1_${ik}$ )
                 s = z( m ) / z( 1_${ik}$ )
              end if
           else
              if( abs( z1 )<=tol ) then
                 z( 1_${ik}$ ) = tol
              else
                 z( 1_${ik}$ ) = z1
              end if
           end if
           ! move the rest of the updating row to z.
           call stdlib${ii}$_dcopy( k-1, u2( 2_${ik}$, 1_${ik}$ ), 1_${ik}$, z( 2_${ik}$ ), 1_${ik}$ )
           ! determine the first column of u2, the first row of vt2 and the
           ! last row of vt.
           call stdlib${ii}$_dlaset( 'A', n, 1_${ik}$, zero, zero, u2, ldu2 )
           u2( nlp1, 1_${ik}$ ) = one
           if( m>n ) then
              do i = 1, nlp1
                 vt( m, i ) = -s*vt( nlp1, i )
                 vt2( 1_${ik}$, i ) = c*vt( nlp1, i )
              end do
              do i = nlp2, m
                 vt2( 1_${ik}$, i ) = s*vt( m, i )
                 vt( m, i ) = c*vt( m, i )
              end do
           else
              call stdlib${ii}$_dcopy( m, vt( nlp1, 1_${ik}$ ), ldvt, vt2( 1_${ik}$, 1_${ik}$ ), ldvt2 )
           end if
           if( m>n ) then
              call stdlib${ii}$_dcopy( m, vt( m, 1_${ik}$ ), ldvt, vt2( m, 1_${ik}$ ), ldvt2 )
           end if
           ! the deflated singular values and their corresponding vectors go
           ! into the back of d, u, and v respectively.
           if( n>k ) then
              call stdlib${ii}$_dcopy( n-k, dsigma( k+1 ), 1_${ik}$, d( k+1 ), 1_${ik}$ )
              call stdlib${ii}$_dlacpy( 'A', n, n-k, u2( 1_${ik}$, k+1 ), ldu2, u( 1_${ik}$, k+1 ),ldu )
              call stdlib${ii}$_dlacpy( 'A', n-k, m, vt2( k+1, 1_${ik}$ ), ldvt2, vt( k+1, 1_${ik}$ ),ldvt )
           end if
           ! copy ctot into coltyp for referencing in stdlib${ii}$_dlasd3.
           do j = 1, 4
              coltyp( j ) = ctot( j )
           end do
           return
     end subroutine stdlib${ii}$_dlasd2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lasd2( nl, nr, sqre, k, d, z, alpha, beta, u, ldu, vt,ldvt, dsigma, &
     !! DLASD2: merges the two sets of singular values together into a single
     !! sorted set.  Then it tries to deflate the size of the problem.
     !! There are two ways in which deflation can occur:  when two or more
     !! singular values are close together or if there is a tiny entry in the
     !! Z vector.  For each such occurrence the order of the related secular
     !! equation problem is reduced by one.
     !! DLASD2 is called from DLASD1.
               u2, ldu2, vt2, ldvt2, idxp, idx,idxc, idxq, coltyp, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info, k
           integer(${ik}$), intent(in) :: ldu, ldu2, ldvt, ldvt2, nl, nr, sqre
           real(${rk}$), intent(in) :: alpha, beta
           ! Array Arguments 
           integer(${ik}$), intent(out) :: coltyp(*), idx(*), idxc(*), idxp(*)
           integer(${ik}$), intent(inout) :: idxq(*)
           real(${rk}$), intent(inout) :: d(*), u(ldu,*), vt(ldvt,*)
           real(${rk}$), intent(out) :: dsigma(*), u2(ldu2,*), vt2(ldvt2,*), z(*)
        ! =====================================================================
           
           ! Local Arrays 
           integer(${ik}$) :: ctot(4_${ik}$), psm(4_${ik}$)
           ! Local Scalars 
           integer(${ik}$) :: ct, i, idxi, idxj, idxjp, j, jp, jprev, k2, m, n, nlp1, nlp2
           real(${rk}$) :: c, eps, hlftol, s, tau, tol, z1
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( nl<1_${ik}$ ) then
              info = -1_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -2_${ik}$
           else if( ( sqre/=1_${ik}$ ) .and. ( sqre/=0_${ik}$ ) ) then
              info = -3_${ik}$
           end if
           n = nl + nr + 1_${ik}$
           m = n + sqre
           if( ldu<n ) then
              info = -10_${ik}$
           else if( ldvt<m ) then
              info = -12_${ik}$
           else if( ldu2<n ) then
              info = -15_${ik}$
           else if( ldvt2<m ) then
              info = -17_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD2', -info )
              return
           end if
           nlp1 = nl + 1_${ik}$
           nlp2 = nl + 2_${ik}$
           ! generate the first part of the vector z; and move the singular
           ! values in the first part of d one position backward.
           z1 = alpha*vt( nlp1, nlp1 )
           z( 1_${ik}$ ) = z1
           do i = nl, 1, -1
              z( i+1 ) = alpha*vt( i, nlp1 )
              d( i+1 ) = d( i )
              idxq( i+1 ) = idxq( i ) + 1_${ik}$
           end do
           ! generate the second part of the vector z.
           do i = nlp2, m
              z( i ) = beta*vt( i, nlp2 )
           end do
           ! initialize some reference arrays.
           do i = 2, nlp1
              coltyp( i ) = 1_${ik}$
           end do
           do i = nlp2, n
              coltyp( i ) = 2_${ik}$
           end do
           ! sort the singular values into increasing order
           do i = nlp2, n
              idxq( i ) = idxq( i ) + nlp1
           end do
           ! dsigma, idxc, idxc, and the first column of u2
           ! are used as storage space.
           do i = 2, n
              dsigma( i ) = d( idxq( i ) )
              u2( i, 1_${ik}$ ) = z( idxq( i ) )
              idxc( i ) = coltyp( idxq( i ) )
           end do
           call stdlib${ii}$_${ri}$lamrg( nl, nr, dsigma( 2_${ik}$ ), 1_${ik}$, 1_${ik}$, idx( 2_${ik}$ ) )
           do i = 2, n
              idxi = 1_${ik}$ + idx( i )
              d( i ) = dsigma( idxi )
              z( i ) = u2( idxi, 1_${ik}$ )
              coltyp( i ) = idxc( idxi )
           end do
           ! calculate the allowable deflation tolerance
           eps = stdlib${ii}$_${ri}$lamch( 'EPSILON' )
           tol = max( abs( alpha ), abs( beta ) )
           tol = eight*eps*max( abs( d( n ) ), tol )
           ! there are 2 kinds of deflation -- first a value in the z-vector
           ! is small, second two (or more) singular values are very close
           ! together (their difference is small).
           ! if the value in the z-vector is small, we simply permute the
           ! array so that the corresponding singular value is moved to the
           ! end.
           ! if two values in the d-vector are close, we perform a two-sided
           ! rotation designed to make one of the corresponding z-vector
           ! entries zero, and then permute the array so that the deflated
           ! singular value is moved to the end.
           ! if there are multiple singular values then the problem deflates.
           ! here the number of equal singular values are found.  as each equal
           ! singular value is found, an elementary reflector is computed to
           ! rotate the corresponding singular subspace so that the
           ! corresponding components of z are zero in this new basis.
           k = 1_${ik}$
           k2 = n + 1_${ik}$
           do j = 2, n
              if( abs( z( j ) )<=tol ) then
                 ! deflate due to small z component.
                 k2 = k2 - 1_${ik}$
                 idxp( k2 ) = j
                 coltyp( j ) = 4_${ik}$
                 if( j==n )go to 120
              else
                 jprev = j
                 go to 90
              end if
           end do
           90 continue
           j = jprev
           100 continue
           j = j + 1_${ik}$
           if( j>n )go to 110
           if( abs( z( j ) )<=tol ) then
              ! deflate due to small z component.
              k2 = k2 - 1_${ik}$
              idxp( k2 ) = j
              coltyp( j ) = 4_${ik}$
           else
              ! check if singular values are close enough to allow deflation.
              if( abs( d( j )-d( jprev ) )<=tol ) then
                 ! deflation is possible.
                 s = z( jprev )
                 c = z( j )
                 ! find sqrt(a**2+b**2) without overflow or
                 ! destructive underflow.
                 tau = stdlib${ii}$_${ri}$lapy2( c, s )
                 c = c / tau
                 s = -s / tau
                 z( j ) = tau
                 z( jprev ) = zero
                 ! apply back the givens rotation to the left and right
                 ! singular vector matrices.
                 idxjp = idxq( idx( jprev )+1_${ik}$ )
                 idxj = idxq( idx( j )+1_${ik}$ )
                 if( idxjp<=nlp1 ) then
                    idxjp = idxjp - 1_${ik}$
                 end if
                 if( idxj<=nlp1 ) then
                    idxj = idxj - 1_${ik}$
                 end if
                 call stdlib${ii}$_${ri}$rot( n, u( 1_${ik}$, idxjp ), 1_${ik}$, u( 1_${ik}$, idxj ), 1_${ik}$, c, s )
                 call stdlib${ii}$_${ri}$rot( m, vt( idxjp, 1_${ik}$ ), ldvt, vt( idxj, 1_${ik}$ ), ldvt, c,s )
                 if( coltyp( j )/=coltyp( jprev ) ) then
                    coltyp( j ) = 3_${ik}$
                 end if
                 coltyp( jprev ) = 4_${ik}$
                 k2 = k2 - 1_${ik}$
                 idxp( k2 ) = jprev
                 jprev = j
              else
                 k = k + 1_${ik}$
                 u2( k, 1_${ik}$ ) = z( jprev )
                 dsigma( k ) = d( jprev )
                 idxp( k ) = jprev
                 jprev = j
              end if
           end if
           go to 100
           110 continue
           ! record the last singular value.
           k = k + 1_${ik}$
           u2( k, 1_${ik}$ ) = z( jprev )
           dsigma( k ) = d( jprev )
           idxp( k ) = jprev
           120 continue
           ! count up the total number of the various types of columns, then
           ! form a permutation which positions the four column types into
           ! four groups of uniform structure (although one or more of these
           ! groups may be empty).
           do j = 1, 4
              ctot( j ) = 0_${ik}$
           end do
           do j = 2, n
              ct = coltyp( j )
              ctot( ct ) = ctot( ct ) + 1_${ik}$
           end do
           ! psm(*) = position in submatrix (of types 1 through 4)
           psm( 1_${ik}$ ) = 2_${ik}$
           psm( 2_${ik}$ ) = 2_${ik}$ + ctot( 1_${ik}$ )
           psm( 3_${ik}$ ) = psm( 2_${ik}$ ) + ctot( 2_${ik}$ )
           psm( 4_${ik}$ ) = psm( 3_${ik}$ ) + ctot( 3_${ik}$ )
           ! fill out the idxc array so that the permutation which it induces
           ! will place all type-1 columns first, all type-2 columns next,
           ! then all type-3's, and finally all type-4's, starting from the
           ! second column. this applies similarly to the rows of vt.
           do j = 2, n
              jp = idxp( j )
              ct = coltyp( jp )
              idxc( psm( ct ) ) = j
              psm( ct ) = psm( ct ) + 1_${ik}$
           end do
           ! sort the singular values and corresponding singular vectors into
           ! dsigma, u2, and vt2 respectively.  the singular values/vectors
           ! which were not deflated go into the first k slots of dsigma, u2,
           ! and vt2 respectively, while those which were deflated go into the
           ! last n - k slots, except that the first column/row will be treated
           ! separately.
           do j = 2, n
              jp = idxp( j )
              dsigma( j ) = d( jp )
              idxj = idxq( idx( idxp( idxc( j ) ) )+1_${ik}$ )
              if( idxj<=nlp1 ) then
                 idxj = idxj - 1_${ik}$
              end if
              call stdlib${ii}$_${ri}$copy( n, u( 1_${ik}$, idxj ), 1_${ik}$, u2( 1_${ik}$, j ), 1_${ik}$ )
              call stdlib${ii}$_${ri}$copy( m, vt( idxj, 1_${ik}$ ), ldvt, vt2( j, 1_${ik}$ ), ldvt2 )
           end do
           ! determine dsigma(1), dsigma(2) and z(1)
           dsigma( 1_${ik}$ ) = zero
           hlftol = tol / two
           if( abs( dsigma( 2_${ik}$ ) )<=hlftol )dsigma( 2_${ik}$ ) = hlftol
           if( m>n ) then
              z( 1_${ik}$ ) = stdlib${ii}$_${ri}$lapy2( z1, z( m ) )
              if( z( 1_${ik}$ )<=tol ) then
                 c = one
                 s = zero
                 z( 1_${ik}$ ) = tol
              else
                 c = z1 / z( 1_${ik}$ )
                 s = z( m ) / z( 1_${ik}$ )
              end if
           else
              if( abs( z1 )<=tol ) then
                 z( 1_${ik}$ ) = tol
              else
                 z( 1_${ik}$ ) = z1
              end if
           end if
           ! move the rest of the updating row to z.
           call stdlib${ii}$_${ri}$copy( k-1, u2( 2_${ik}$, 1_${ik}$ ), 1_${ik}$, z( 2_${ik}$ ), 1_${ik}$ )
           ! determine the first column of u2, the first row of vt2 and the
           ! last row of vt.
           call stdlib${ii}$_${ri}$laset( 'A', n, 1_${ik}$, zero, zero, u2, ldu2 )
           u2( nlp1, 1_${ik}$ ) = one
           if( m>n ) then
              do i = 1, nlp1
                 vt( m, i ) = -s*vt( nlp1, i )
                 vt2( 1_${ik}$, i ) = c*vt( nlp1, i )
              end do
              do i = nlp2, m
                 vt2( 1_${ik}$, i ) = s*vt( m, i )
                 vt( m, i ) = c*vt( m, i )
              end do
           else
              call stdlib${ii}$_${ri}$copy( m, vt( nlp1, 1_${ik}$ ), ldvt, vt2( 1_${ik}$, 1_${ik}$ ), ldvt2 )
           end if
           if( m>n ) then
              call stdlib${ii}$_${ri}$copy( m, vt( m, 1_${ik}$ ), ldvt, vt2( m, 1_${ik}$ ), ldvt2 )
           end if
           ! the deflated singular values and their corresponding vectors go
           ! into the back of d, u, and v respectively.
           if( n>k ) then
              call stdlib${ii}$_${ri}$copy( n-k, dsigma( k+1 ), 1_${ik}$, d( k+1 ), 1_${ik}$ )
              call stdlib${ii}$_${ri}$lacpy( 'A', n, n-k, u2( 1_${ik}$, k+1 ), ldu2, u( 1_${ik}$, k+1 ),ldu )
              call stdlib${ii}$_${ri}$lacpy( 'A', n-k, m, vt2( k+1, 1_${ik}$ ), ldvt2, vt( k+1, 1_${ik}$ ),ldvt )
           end if
           ! copy ctot into coltyp for referencing in stdlib${ii}$_${ri}$lasd3.
           do j = 1, 4
              coltyp( j ) = ctot( j )
           end do
           return
     end subroutine stdlib${ii}$_${ri}$lasd2

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slasd3( nl, nr, sqre, k, d, q, ldq, dsigma, u, ldu, u2,ldu2, vt, ldvt,&
     !! SLASD3 finds all the square roots of the roots of the secular
     !! equation, as defined by the values in D and Z.  It makes the
     !! appropriate calls to SLASD4 and then updates the singular
     !! vectors by matrix multiplication.
     !! This code makes very mild assumptions about floating point
     !! arithmetic. It will work on machines with a guard digit in
     !! add/subtract, or on those binary machines without guard digits
     !! which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
     !! It could conceivably fail on hexadecimal or decimal machines
     !! without guard digits, but we know of none.
     !! SLASD3 is called from SLASD1.
                vt2, ldvt2, idxc, ctot, z,info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldq, ldu, ldu2, ldvt, ldvt2, nl, nr, sqre
           ! Array Arguments 
           integer(${ik}$), intent(in) :: ctot(*), idxc(*)
           real(sp), intent(out) :: d(*), q(ldq,*), u(ldu,*), vt(ldvt,*)
           real(sp), intent(inout) :: dsigma(*), vt2(ldvt2,*), z(*)
           real(sp), intent(in) :: u2(ldu2,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: ctemp, i, j, jc, ktemp, m, n, nlp1, nlp2, nrp1
           real(sp) :: rho, temp
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( nl<1_${ik}$ ) then
              info = -1_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -2_${ik}$
           else if( ( sqre/=1_${ik}$ ) .and. ( sqre/=0_${ik}$ ) ) then
              info = -3_${ik}$
           end if
           n = nl + nr + 1_${ik}$
           m = n + sqre
           nlp1 = nl + 1_${ik}$
           nlp2 = nl + 2_${ik}$
           if( ( k<1_${ik}$ ) .or. ( k>n ) ) then
              info = -4_${ik}$
           else if( ldq<k ) then
              info = -7_${ik}$
           else if( ldu<n ) then
              info = -10_${ik}$
           else if( ldu2<n ) then
              info = -12_${ik}$
           else if( ldvt<m ) then
              info = -14_${ik}$
           else if( ldvt2<m ) then
              info = -16_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SLASD3', -info )
              return
           end if
           ! quick return if possible
           if( k==1_${ik}$ ) then
              d( 1_${ik}$ ) = abs( z( 1_${ik}$ ) )
              call stdlib${ii}$_scopy( m, vt2( 1_${ik}$, 1_${ik}$ ), ldvt2, vt( 1_${ik}$, 1_${ik}$ ), ldvt )
              if( z( 1_${ik}$ )>zero ) then
                 call stdlib${ii}$_scopy( n, u2( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, u( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              else
                 do i = 1, n
                    u( i, 1_${ik}$ ) = -u2( i, 1_${ik}$ )
                 end do
              end if
              return
           end if
           ! modify values dsigma(i) to make sure all dsigma(i)-dsigma(j) can
           ! be computed with high relative accuracy (barring over/underflow).
           ! this is a problem on machines without a guard digit in
           ! add/subtract (cray xmp, cray ymp, cray c 90 and cray 2).
           ! the following code replaces dsigma(i) by 2*dsigma(i)-dsigma(i),
           ! which on any of these machines zeros out the bottommost
           ! bit of dsigma(i) if it is 1; this makes the subsequent
           ! subtractions dsigma(i)-dsigma(j) unproblematic when cancellation
           ! occurs. on binary machines with a guard digit (almost all
           ! machines) it does not change dsigma(i) at all. on hexadecimal
           ! and decimal machines with a guard digit, it slightly
           ! changes the bottommost bits of dsigma(i). it does not account
           ! for hexadecimal or decimal machines without guard digits
           ! (we know of none). we use a subroutine call to compute
           ! 2*dsigma(i) to prevent optimizing compilers from eliminating
           ! this code.
           do i = 1, k
              dsigma( i ) = stdlib${ii}$_slamc3( dsigma( i ), dsigma( i ) ) - dsigma( i )
           end do
           ! keep a copy of z.
           call stdlib${ii}$_scopy( k, z, 1_${ik}$, q, 1_${ik}$ )
           ! normalize z.
           rho = stdlib${ii}$_snrm2( k, z, 1_${ik}$ )
           call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, rho, one, k, 1_${ik}$, z, k, info )
           rho = rho*rho
           ! find the new singular values.
           do j = 1, k
              call stdlib${ii}$_slasd4( k, j, dsigma, z, u( 1_${ik}$, j ), rho, d( j ),vt( 1_${ik}$, j ), info )
                        
              ! if the zero finder fails, report the convergence failure.
              if( info/=0_${ik}$ ) then
                 return
              end if
           end do
           ! compute updated z.
           do i = 1, k
              z( i ) = u( i, k )*vt( i, k )
              do j = 1, i - 1
                 z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j ) ) /( dsigma( i &
                           )+dsigma( j ) ) )
              end do
              do j = i, k - 1
                 z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j+1 ) ) /( dsigma( &
                           i )+dsigma( j+1 ) ) )
              end do
              z( i ) = sign( sqrt( abs( z( i ) ) ), q( i, 1_${ik}$ ) )
           end do
           ! compute left singular vectors of the modified diagonal matrix,
           ! and store related information for the right singular vectors.
           do i = 1, k
              vt( 1_${ik}$, i ) = z( 1_${ik}$ ) / u( 1_${ik}$, i ) / vt( 1_${ik}$, i )
              u( 1_${ik}$, i ) = negone
              do j = 2, k
                 vt( j, i ) = z( j ) / u( j, i ) / vt( j, i )
                 u( j, i ) = dsigma( j )*vt( j, i )
              end do
              temp = stdlib${ii}$_snrm2( k, u( 1_${ik}$, i ), 1_${ik}$ )
              q( 1_${ik}$, i ) = u( 1_${ik}$, i ) / temp
              do j = 2, k
                 jc = idxc( j )
                 q( j, i ) = u( jc, i ) / temp
              end do
           end do
           ! update the left singular vector matrix.
           if( k==2_${ik}$ ) then
              call stdlib${ii}$_sgemm( 'N', 'N', n, k, k, one, u2, ldu2, q, ldq, zero, u,ldu )
              go to 100
           end if
           if( ctot( 1_${ik}$ )>0_${ik}$ ) then
              call stdlib${ii}$_sgemm( 'N', 'N', nl, k, ctot( 1_${ik}$ ), one, u2( 1_${ik}$, 2_${ik}$ ), ldu2,q( 2_${ik}$, 1_${ik}$ ), ldq,&
                         zero, u( 1_${ik}$, 1_${ik}$ ), ldu )
              if( ctot( 3_${ik}$ )>0_${ik}$ ) then
                 ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ )
                 call stdlib${ii}$_sgemm( 'N', 'N', nl, k, ctot( 3_${ik}$ ), one, u2( 1_${ik}$, ktemp ),ldu2, q( &
                           ktemp, 1_${ik}$ ), ldq, one, u( 1_${ik}$, 1_${ik}$ ), ldu )
              end if
           else if( ctot( 3_${ik}$ )>0_${ik}$ ) then
              ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ )
              call stdlib${ii}$_sgemm( 'N', 'N', nl, k, ctot( 3_${ik}$ ), one, u2( 1_${ik}$, ktemp ),ldu2, q( ktemp, &
                        1_${ik}$ ), ldq, zero, u( 1_${ik}$, 1_${ik}$ ), ldu )
           else
              call stdlib${ii}$_slacpy( 'F', nl, k, u2, ldu2, u, ldu )
           end if
           call stdlib${ii}$_scopy( k, q( 1_${ik}$, 1_${ik}$ ), ldq, u( nlp1, 1_${ik}$ ), ldu )
           ktemp = 2_${ik}$ + ctot( 1_${ik}$ )
           ctemp = ctot( 2_${ik}$ ) + ctot( 3_${ik}$ )
           call stdlib${ii}$_sgemm( 'N', 'N', nr, k, ctemp, one, u2( nlp2, ktemp ), ldu2,q( ktemp, 1_${ik}$ ), &
                     ldq, zero, u( nlp2, 1_${ik}$ ), ldu )
           ! generate the right singular vectors.
           100 continue
           do i = 1, k
              temp = stdlib${ii}$_snrm2( k, vt( 1_${ik}$, i ), 1_${ik}$ )
              q( i, 1_${ik}$ ) = vt( 1_${ik}$, i ) / temp
              do j = 2, k
                 jc = idxc( j )
                 q( i, j ) = vt( jc, i ) / temp
              end do
           end do
           ! update the right singular vector matrix.
           if( k==2_${ik}$ ) then
              call stdlib${ii}$_sgemm( 'N', 'N', k, m, k, one, q, ldq, vt2, ldvt2, zero,vt, ldvt )
                        
              return
           end if
           ktemp = 1_${ik}$ + ctot( 1_${ik}$ )
           call stdlib${ii}$_sgemm( 'N', 'N', k, nlp1, ktemp, one, q( 1_${ik}$, 1_${ik}$ ), ldq,vt2( 1_${ik}$, 1_${ik}$ ), ldvt2, &
                     zero, vt( 1_${ik}$, 1_${ik}$ ), ldvt )
           ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ )
           if( ktemp<=ldvt2 )call stdlib${ii}$_sgemm( 'N', 'N', k, nlp1, ctot( 3_${ik}$ ), one, q( 1_${ik}$, ktemp ),&
                     ldq, vt2( ktemp, 1_${ik}$ ), ldvt2, one, vt( 1_${ik}$, 1_${ik}$ ),ldvt )
           ktemp = ctot( 1_${ik}$ ) + 1_${ik}$
           nrp1 = nr + sqre
           if( ktemp>1_${ik}$ ) then
              do i = 1, k
                 q( i, ktemp ) = q( i, 1_${ik}$ )
              end do
              do i = nlp2, m
                 vt2( ktemp, i ) = vt2( 1_${ik}$, i )
              end do
           end if
           ctemp = 1_${ik}$ + ctot( 2_${ik}$ ) + ctot( 3_${ik}$ )
           call stdlib${ii}$_sgemm( 'N', 'N', k, nrp1, ctemp, one, q( 1_${ik}$, ktemp ), ldq,vt2( ktemp, nlp2 )&
                     , ldvt2, zero, vt( 1_${ik}$, nlp2 ), ldvt )
           return
     end subroutine stdlib${ii}$_slasd3

     pure module subroutine stdlib${ii}$_dlasd3( nl, nr, sqre, k, d, q, ldq, dsigma, u, ldu, u2,ldu2, vt, ldvt,&
     !! DLASD3 finds all the square roots of the roots of the secular
     !! equation, as defined by the values in D and Z.  It makes the
     !! appropriate calls to DLASD4 and then updates the singular
     !! vectors by matrix multiplication.
     !! This code makes very mild assumptions about floating point
     !! arithmetic. It will work on machines with a guard digit in
     !! add/subtract, or on those binary machines without guard digits
     !! which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
     !! It could conceivably fail on hexadecimal or decimal machines
     !! without guard digits, but we know of none.
     !! DLASD3 is called from DLASD1.
                vt2, ldvt2, idxc, ctot, z,info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldq, ldu, ldu2, ldvt, ldvt2, nl, nr, sqre
           ! Array Arguments 
           integer(${ik}$), intent(in) :: ctot(*), idxc(*)
           real(dp), intent(out) :: d(*), q(ldq,*), u(ldu,*), vt(ldvt,*)
           real(dp), intent(inout) :: dsigma(*), vt2(ldvt2,*), z(*)
           real(dp), intent(in) :: u2(ldu2,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: ctemp, i, j, jc, ktemp, m, n, nlp1, nlp2, nrp1
           real(dp) :: rho, temp
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( nl<1_${ik}$ ) then
              info = -1_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -2_${ik}$
           else if( ( sqre/=1_${ik}$ ) .and. ( sqre/=0_${ik}$ ) ) then
              info = -3_${ik}$
           end if
           n = nl + nr + 1_${ik}$
           m = n + sqre
           nlp1 = nl + 1_${ik}$
           nlp2 = nl + 2_${ik}$
           if( ( k<1_${ik}$ ) .or. ( k>n ) ) then
              info = -4_${ik}$
           else if( ldq<k ) then
              info = -7_${ik}$
           else if( ldu<n ) then
              info = -10_${ik}$
           else if( ldu2<n ) then
              info = -12_${ik}$
           else if( ldvt<m ) then
              info = -14_${ik}$
           else if( ldvt2<m ) then
              info = -16_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD3', -info )
              return
           end if
           ! quick return if possible
           if( k==1_${ik}$ ) then
              d( 1_${ik}$ ) = abs( z( 1_${ik}$ ) )
              call stdlib${ii}$_dcopy( m, vt2( 1_${ik}$, 1_${ik}$ ), ldvt2, vt( 1_${ik}$, 1_${ik}$ ), ldvt )
              if( z( 1_${ik}$ )>zero ) then
                 call stdlib${ii}$_dcopy( n, u2( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, u( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              else
                 do i = 1, n
                    u( i, 1_${ik}$ ) = -u2( i, 1_${ik}$ )
                 end do
              end if
              return
           end if
           ! modify values dsigma(i) to make sure all dsigma(i)-dsigma(j) can
           ! be computed with high relative accuracy (barring over/underflow).
           ! this is a problem on machines without a guard digit in
           ! add/subtract (cray xmp, cray ymp, cray c 90 and cray 2).
           ! the following code replaces dsigma(i) by 2*dsigma(i)-dsigma(i),
           ! which on any of these machines zeros out the bottommost
           ! bit of dsigma(i) if it is 1; this makes the subsequent
           ! subtractions dsigma(i)-dsigma(j) unproblematic when cancellation
           ! occurs. on binary machines with a guard digit (almost all
           ! machines) it does not change dsigma(i) at all. on hexadecimal
           ! and decimal machines with a guard digit, it slightly
           ! changes the bottommost bits of dsigma(i). it does not account
           ! for hexadecimal or decimal machines without guard digits
           ! (we know of none). we use a subroutine call to compute
           ! 2*dsigma(i) to prevent optimizing compilers from eliminating
           ! this code.
           do i = 1, k
              dsigma( i ) = stdlib${ii}$_dlamc3( dsigma( i ), dsigma( i ) ) - dsigma( i )
           end do
           ! keep a copy of z.
           call stdlib${ii}$_dcopy( k, z, 1_${ik}$, q, 1_${ik}$ )
           ! normalize z.
           rho = stdlib${ii}$_dnrm2( k, z, 1_${ik}$ )
           call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, rho, one, k, 1_${ik}$, z, k, info )
           rho = rho*rho
           ! find the new singular values.
           do j = 1, k
              call stdlib${ii}$_dlasd4( k, j, dsigma, z, u( 1_${ik}$, j ), rho, d( j ),vt( 1_${ik}$, j ), info )
                        
              ! if the zero finder fails, report the convergence failure.
              if( info/=0_${ik}$ ) then
                 return
              end if
           end do
           ! compute updated z.
           do i = 1, k
              z( i ) = u( i, k )*vt( i, k )
              do j = 1, i - 1
                 z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j ) ) /( dsigma( i &
                           )+dsigma( j ) ) )
              end do
              do j = i, k - 1
                 z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j+1 ) ) /( dsigma( &
                           i )+dsigma( j+1 ) ) )
              end do
              z( i ) = sign( sqrt( abs( z( i ) ) ), q( i, 1_${ik}$ ) )
           end do
           ! compute left singular vectors of the modified diagonal matrix,
           ! and store related information for the right singular vectors.
           do i = 1, k
              vt( 1_${ik}$, i ) = z( 1_${ik}$ ) / u( 1_${ik}$, i ) / vt( 1_${ik}$, i )
              u( 1_${ik}$, i ) = negone
              do j = 2, k
                 vt( j, i ) = z( j ) / u( j, i ) / vt( j, i )
                 u( j, i ) = dsigma( j )*vt( j, i )
              end do
              temp = stdlib${ii}$_dnrm2( k, u( 1_${ik}$, i ), 1_${ik}$ )
              q( 1_${ik}$, i ) = u( 1_${ik}$, i ) / temp
              do j = 2, k
                 jc = idxc( j )
                 q( j, i ) = u( jc, i ) / temp
              end do
           end do
           ! update the left singular vector matrix.
           if( k==2_${ik}$ ) then
              call stdlib${ii}$_dgemm( 'N', 'N', n, k, k, one, u2, ldu2, q, ldq, zero, u,ldu )
              go to 100
           end if
           if( ctot( 1_${ik}$ )>0_${ik}$ ) then
              call stdlib${ii}$_dgemm( 'N', 'N', nl, k, ctot( 1_${ik}$ ), one, u2( 1_${ik}$, 2_${ik}$ ), ldu2,q( 2_${ik}$, 1_${ik}$ ), ldq,&
                         zero, u( 1_${ik}$, 1_${ik}$ ), ldu )
              if( ctot( 3_${ik}$ )>0_${ik}$ ) then
                 ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ )
                 call stdlib${ii}$_dgemm( 'N', 'N', nl, k, ctot( 3_${ik}$ ), one, u2( 1_${ik}$, ktemp ),ldu2, q( &
                           ktemp, 1_${ik}$ ), ldq, one, u( 1_${ik}$, 1_${ik}$ ), ldu )
              end if
           else if( ctot( 3_${ik}$ )>0_${ik}$ ) then
              ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ )
              call stdlib${ii}$_dgemm( 'N', 'N', nl, k, ctot( 3_${ik}$ ), one, u2( 1_${ik}$, ktemp ),ldu2, q( ktemp, &
                        1_${ik}$ ), ldq, zero, u( 1_${ik}$, 1_${ik}$ ), ldu )
           else
              call stdlib${ii}$_dlacpy( 'F', nl, k, u2, ldu2, u, ldu )
           end if
           call stdlib${ii}$_dcopy( k, q( 1_${ik}$, 1_${ik}$ ), ldq, u( nlp1, 1_${ik}$ ), ldu )
           ktemp = 2_${ik}$ + ctot( 1_${ik}$ )
           ctemp = ctot( 2_${ik}$ ) + ctot( 3_${ik}$ )
           call stdlib${ii}$_dgemm( 'N', 'N', nr, k, ctemp, one, u2( nlp2, ktemp ), ldu2,q( ktemp, 1_${ik}$ ), &
                     ldq, zero, u( nlp2, 1_${ik}$ ), ldu )
           ! generate the right singular vectors.
           100 continue
           do i = 1, k
              temp = stdlib${ii}$_dnrm2( k, vt( 1_${ik}$, i ), 1_${ik}$ )
              q( i, 1_${ik}$ ) = vt( 1_${ik}$, i ) / temp
              do j = 2, k
                 jc = idxc( j )
                 q( i, j ) = vt( jc, i ) / temp
              end do
           end do
           ! update the right singular vector matrix.
           if( k==2_${ik}$ ) then
              call stdlib${ii}$_dgemm( 'N', 'N', k, m, k, one, q, ldq, vt2, ldvt2, zero,vt, ldvt )
                        
              return
           end if
           ktemp = 1_${ik}$ + ctot( 1_${ik}$ )
           call stdlib${ii}$_dgemm( 'N', 'N', k, nlp1, ktemp, one, q( 1_${ik}$, 1_${ik}$ ), ldq,vt2( 1_${ik}$, 1_${ik}$ ), ldvt2, &
                     zero, vt( 1_${ik}$, 1_${ik}$ ), ldvt )
           ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ )
           if( ktemp<=ldvt2 )call stdlib${ii}$_dgemm( 'N', 'N', k, nlp1, ctot( 3_${ik}$ ), one, q( 1_${ik}$, ktemp ),&
                     ldq, vt2( ktemp, 1_${ik}$ ), ldvt2, one, vt( 1_${ik}$, 1_${ik}$ ),ldvt )
           ktemp = ctot( 1_${ik}$ ) + 1_${ik}$
           nrp1 = nr + sqre
           if( ktemp>1_${ik}$ ) then
              do i = 1, k
                 q( i, ktemp ) = q( i, 1_${ik}$ )
              end do
              do i = nlp2, m
                 vt2( ktemp, i ) = vt2( 1_${ik}$, i )
              end do
           end if
           ctemp = 1_${ik}$ + ctot( 2_${ik}$ ) + ctot( 3_${ik}$ )
           call stdlib${ii}$_dgemm( 'N', 'N', k, nrp1, ctemp, one, q( 1_${ik}$, ktemp ), ldq,vt2( ktemp, nlp2 )&
                     , ldvt2, zero, vt( 1_${ik}$, nlp2 ), ldvt )
           return
     end subroutine stdlib${ii}$_dlasd3

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lasd3( nl, nr, sqre, k, d, q, ldq, dsigma, u, ldu, u2,ldu2, vt, ldvt,&
     !! DLASD3: finds all the square roots of the roots of the secular
     !! equation, as defined by the values in D and Z.  It makes the
     !! appropriate calls to DLASD4 and then updates the singular
     !! vectors by matrix multiplication.
     !! This code makes very mild assumptions about floating point
     !! arithmetic. It will work on machines with a guard digit in
     !! add/subtract, or on those binary machines without guard digits
     !! which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
     !! It could conceivably fail on hexadecimal or decimal machines
     !! without guard digits, but we know of none.
     !! DLASD3 is called from DLASD1.
                vt2, ldvt2, idxc, ctot, z,info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: k, ldq, ldu, ldu2, ldvt, ldvt2, nl, nr, sqre
           ! Array Arguments 
           integer(${ik}$), intent(in) :: ctot(*), idxc(*)
           real(${rk}$), intent(out) :: d(*), q(ldq,*), u(ldu,*), vt(ldvt,*)
           real(${rk}$), intent(inout) :: dsigma(*), vt2(ldvt2,*), z(*)
           real(${rk}$), intent(in) :: u2(ldu2,*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: ctemp, i, j, jc, ktemp, m, n, nlp1, nlp2, nrp1
           real(${rk}$) :: rho, temp
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( nl<1_${ik}$ ) then
              info = -1_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -2_${ik}$
           else if( ( sqre/=1_${ik}$ ) .and. ( sqre/=0_${ik}$ ) ) then
              info = -3_${ik}$
           end if
           n = nl + nr + 1_${ik}$
           m = n + sqre
           nlp1 = nl + 1_${ik}$
           nlp2 = nl + 2_${ik}$
           if( ( k<1_${ik}$ ) .or. ( k>n ) ) then
              info = -4_${ik}$
           else if( ldq<k ) then
              info = -7_${ik}$
           else if( ldu<n ) then
              info = -10_${ik}$
           else if( ldu2<n ) then
              info = -12_${ik}$
           else if( ldvt<m ) then
              info = -14_${ik}$
           else if( ldvt2<m ) then
              info = -16_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD3', -info )
              return
           end if
           ! quick return if possible
           if( k==1_${ik}$ ) then
              d( 1_${ik}$ ) = abs( z( 1_${ik}$ ) )
              call stdlib${ii}$_${ri}$copy( m, vt2( 1_${ik}$, 1_${ik}$ ), ldvt2, vt( 1_${ik}$, 1_${ik}$ ), ldvt )
              if( z( 1_${ik}$ )>zero ) then
                 call stdlib${ii}$_${ri}$copy( n, u2( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, u( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              else
                 do i = 1, n
                    u( i, 1_${ik}$ ) = -u2( i, 1_${ik}$ )
                 end do
              end if
              return
           end if
           ! modify values dsigma(i) to make sure all dsigma(i)-dsigma(j) can
           ! be computed with high relative accuracy (barring over/underflow).
           ! this is a problem on machines without a guard digit in
           ! add/subtract (cray xmp, cray ymp, cray c 90 and cray 2).
           ! the following code replaces dsigma(i) by 2*dsigma(i)-dsigma(i),
           ! which on any of these machines zeros out the bottommost
           ! bit of dsigma(i) if it is 1; this makes the subsequent
           ! subtractions dsigma(i)-dsigma(j) unproblematic when cancellation
           ! occurs. on binary machines with a guard digit (almost all
           ! machines) it does not change dsigma(i) at all. on hexadecimal
           ! and decimal machines with a guard digit, it slightly
           ! changes the bottommost bits of dsigma(i). it does not account
           ! for hexadecimal or decimal machines without guard digits
           ! (we know of none). we use a subroutine call to compute
           ! 2*dsigma(i) to prevent optimizing compilers from eliminating
           ! this code.
           do i = 1, k
              dsigma( i ) = stdlib${ii}$_${ri}$lamc3( dsigma( i ), dsigma( i ) ) - dsigma( i )
           end do
           ! keep a copy of z.
           call stdlib${ii}$_${ri}$copy( k, z, 1_${ik}$, q, 1_${ik}$ )
           ! normalize z.
           rho = stdlib${ii}$_${ri}$nrm2( k, z, 1_${ik}$ )
           call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, rho, one, k, 1_${ik}$, z, k, info )
           rho = rho*rho
           ! find the new singular values.
           do j = 1, k
              call stdlib${ii}$_${ri}$lasd4( k, j, dsigma, z, u( 1_${ik}$, j ), rho, d( j ),vt( 1_${ik}$, j ), info )
                        
              ! if the zero finder fails, report the convergence failure.
              if( info/=0_${ik}$ ) then
                 return
              end if
           end do
           ! compute updated z.
           do i = 1, k
              z( i ) = u( i, k )*vt( i, k )
              do j = 1, i - 1
                 z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j ) ) /( dsigma( i &
                           )+dsigma( j ) ) )
              end do
              do j = i, k - 1
                 z( i ) = z( i )*( u( i, j )*vt( i, j ) /( dsigma( i )-dsigma( j+1 ) ) /( dsigma( &
                           i )+dsigma( j+1 ) ) )
              end do
              z( i ) = sign( sqrt( abs( z( i ) ) ), q( i, 1_${ik}$ ) )
           end do
           ! compute left singular vectors of the modified diagonal matrix,
           ! and store related information for the right singular vectors.
           do i = 1, k
              vt( 1_${ik}$, i ) = z( 1_${ik}$ ) / u( 1_${ik}$, i ) / vt( 1_${ik}$, i )
              u( 1_${ik}$, i ) = negone
              do j = 2, k
                 vt( j, i ) = z( j ) / u( j, i ) / vt( j, i )
                 u( j, i ) = dsigma( j )*vt( j, i )
              end do
              temp = stdlib${ii}$_${ri}$nrm2( k, u( 1_${ik}$, i ), 1_${ik}$ )
              q( 1_${ik}$, i ) = u( 1_${ik}$, i ) / temp
              do j = 2, k
                 jc = idxc( j )
                 q( j, i ) = u( jc, i ) / temp
              end do
           end do
           ! update the left singular vector matrix.
           if( k==2_${ik}$ ) then
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', n, k, k, one, u2, ldu2, q, ldq, zero, u,ldu )
              go to 100
           end if
           if( ctot( 1_${ik}$ )>0_${ik}$ ) then
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', nl, k, ctot( 1_${ik}$ ), one, u2( 1_${ik}$, 2_${ik}$ ), ldu2,q( 2_${ik}$, 1_${ik}$ ), ldq,&
                         zero, u( 1_${ik}$, 1_${ik}$ ), ldu )
              if( ctot( 3_${ik}$ )>0_${ik}$ ) then
                 ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ )
                 call stdlib${ii}$_${ri}$gemm( 'N', 'N', nl, k, ctot( 3_${ik}$ ), one, u2( 1_${ik}$, ktemp ),ldu2, q( &
                           ktemp, 1_${ik}$ ), ldq, one, u( 1_${ik}$, 1_${ik}$ ), ldu )
              end if
           else if( ctot( 3_${ik}$ )>0_${ik}$ ) then
              ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ )
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', nl, k, ctot( 3_${ik}$ ), one, u2( 1_${ik}$, ktemp ),ldu2, q( ktemp, &
                        1_${ik}$ ), ldq, zero, u( 1_${ik}$, 1_${ik}$ ), ldu )
           else
              call stdlib${ii}$_${ri}$lacpy( 'F', nl, k, u2, ldu2, u, ldu )
           end if
           call stdlib${ii}$_${ri}$copy( k, q( 1_${ik}$, 1_${ik}$ ), ldq, u( nlp1, 1_${ik}$ ), ldu )
           ktemp = 2_${ik}$ + ctot( 1_${ik}$ )
           ctemp = ctot( 2_${ik}$ ) + ctot( 3_${ik}$ )
           call stdlib${ii}$_${ri}$gemm( 'N', 'N', nr, k, ctemp, one, u2( nlp2, ktemp ), ldu2,q( ktemp, 1_${ik}$ ), &
                     ldq, zero, u( nlp2, 1_${ik}$ ), ldu )
           ! generate the right singular vectors.
           100 continue
           do i = 1, k
              temp = stdlib${ii}$_${ri}$nrm2( k, vt( 1_${ik}$, i ), 1_${ik}$ )
              q( i, 1_${ik}$ ) = vt( 1_${ik}$, i ) / temp
              do j = 2, k
                 jc = idxc( j )
                 q( i, j ) = vt( jc, i ) / temp
              end do
           end do
           ! update the right singular vector matrix.
           if( k==2_${ik}$ ) then
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', k, m, k, one, q, ldq, vt2, ldvt2, zero,vt, ldvt )
                        
              return
           end if
           ktemp = 1_${ik}$ + ctot( 1_${ik}$ )
           call stdlib${ii}$_${ri}$gemm( 'N', 'N', k, nlp1, ktemp, one, q( 1_${ik}$, 1_${ik}$ ), ldq,vt2( 1_${ik}$, 1_${ik}$ ), ldvt2, &
                     zero, vt( 1_${ik}$, 1_${ik}$ ), ldvt )
           ktemp = 2_${ik}$ + ctot( 1_${ik}$ ) + ctot( 2_${ik}$ )
           if( ktemp<=ldvt2 )call stdlib${ii}$_${ri}$gemm( 'N', 'N', k, nlp1, ctot( 3_${ik}$ ), one, q( 1_${ik}$, ktemp ),&
                     ldq, vt2( ktemp, 1_${ik}$ ), ldvt2, one, vt( 1_${ik}$, 1_${ik}$ ),ldvt )
           ktemp = ctot( 1_${ik}$ ) + 1_${ik}$
           nrp1 = nr + sqre
           if( ktemp>1_${ik}$ ) then
              do i = 1, k
                 q( i, ktemp ) = q( i, 1_${ik}$ )
              end do
              do i = nlp2, m
                 vt2( ktemp, i ) = vt2( 1_${ik}$, i )
              end do
           end if
           ctemp = 1_${ik}$ + ctot( 2_${ik}$ ) + ctot( 3_${ik}$ )
           call stdlib${ii}$_${ri}$gemm( 'N', 'N', k, nrp1, ctemp, one, q( 1_${ik}$, ktemp ), ldq,vt2( ktemp, nlp2 )&
                     , ldvt2, zero, vt( 1_${ik}$, nlp2 ), ldvt )
           return
     end subroutine stdlib${ii}$_${ri}$lasd3

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slasd4( n, i, d, z, delta, rho, sigma, work, info )
     !! This subroutine computes the square root of the I-th updated
     !! eigenvalue of a positive symmetric rank-one modification to
     !! a positive diagonal matrix whose entries are given as the squares
     !! of the corresponding entries in the array d, and that
     !! 0 <= D(i) < D(j)  for  i < j
     !! and that RHO > 0. This is arranged by the calling routine, and is
     !! no loss in generality.  The rank-one modified system is thus
     !! diag( D ) * diag( D ) +  RHO * Z * Z_transpose.
     !! where we assume the Euclidean norm of Z is 1.
     !! The method consists of approximating the rational functions in the
     !! secular equation by simpler interpolating rational functions.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: i, n
           integer(${ik}$), intent(out) :: info
           real(sp), intent(in) :: rho
           real(sp), intent(out) :: sigma
           ! Array Arguments 
           real(sp), intent(in) :: d(*), z(*)
           real(sp), intent(out) :: delta(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: maxit = 400_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: orgati, swtch, swtch3, geomavg
           integer(${ik}$) :: ii, iim1, iip1, ip1, iter, j, niter
           real(sp) :: a, b, c, delsq, delsq2, sq2, dphi, dpsi, dtiim, dtiip, dtipsq, dtisq, &
           dtnsq, dtnsq1, dw, eps, erretm, eta, phi, prew, psi, rhoinv, sglb, sgub, tau, tau2, &
                     temp, temp1, temp2, w
           ! Local Arrays 
           real(sp) :: dd(3_${ik}$), zz(3_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! since this routine is called in an inner loop, we do no argument
           ! checking.
           ! quick return for n=1 and 2.
           info = 0_${ik}$
           if( n==1_${ik}$ ) then
              ! presumably, i=1 upon entry
              sigma = sqrt( d( 1_${ik}$ )*d( 1_${ik}$ )+rho*z( 1_${ik}$ )*z( 1_${ik}$ ) )
              delta( 1_${ik}$ ) = one
              work( 1_${ik}$ ) = one
              return
           end if
           if( n==2_${ik}$ ) then
              call stdlib${ii}$_slasd5( i, d, z, delta, rho, sigma, work )
              return
           end if
           ! compute machine epsilon
           eps = stdlib${ii}$_slamch( 'EPSILON' )
           rhoinv = one / rho
           tau2= zero
           ! the case i = n
           if( i==n ) then
              ! initialize some basic variables
              ii = n - 1_${ik}$
              niter = 1_${ik}$
              ! calculate initial guess
              temp = rho / two
              ! if ||z||_2 is not one, then temp should be set to
              ! rho * ||z||_2^2 / two
              temp1 = temp / ( d( n )+sqrt( d( n )*d( n )+temp ) )
              do j = 1, n
                 work( j ) = d( j ) + d( n ) + temp1
                 delta( j ) = ( d( j )-d( n ) ) - temp1
              end do
              psi = zero
              do j = 1, n - 2
                 psi = psi + z( j )*z( j ) / ( delta( j )*work( j ) )
              end do
              c = rhoinv + psi
              w = c + z( ii )*z( ii ) / ( delta( ii )*work( ii ) ) +z( n )*z( n ) / ( delta( n )&
                        *work( n ) )
              if( w<=zero ) then
                 temp1 = sqrt( d( n )*d( n )+rho )
                 temp = z( n-1 )*z( n-1 ) / ( ( d( n-1 )+temp1 )*( d( n )-d( n-1 )+rho / ( d( n )+&
                           temp1 ) ) ) +z( n )*z( n ) / rho
                 ! the following tau2 is to approximate
                 ! sigma_n^2 - d( n )*d( n )
                 if( c<=temp ) then
                    tau = rho
                 else
                    delsq = ( d( n )-d( n-1 ) )*( d( n )+d( n-1 ) )
                    a = -c*delsq + z( n-1 )*z( n-1 ) + z( n )*z( n )
                    b = z( n )*z( n )*delsq
                    if( a<zero ) then
                       tau2 = two*b / ( sqrt( a*a+four*b*c )-a )
                    else
                       tau2 = ( a+sqrt( a*a+four*b*c ) ) / ( two*c )
                    end if
                    tau = tau2 / ( d( n )+sqrt( d( n )*d( n )+tau2 ) )
                 end if
                 ! it can be proved that
                     ! d(n)^2+rho/2 <= sigma_n^2 < d(n)^2+tau2 <= d(n)^2+rho
              else
                 delsq = ( d( n )-d( n-1 ) )*( d( n )+d( n-1 ) )
                 a = -c*delsq + z( n-1 )*z( n-1 ) + z( n )*z( n )
                 b = z( n )*z( n )*delsq
                 ! the following tau2 is to approximate
                 ! sigma_n^2 - d( n )*d( n )
                 if( a<zero ) then
                    tau2 = two*b / ( sqrt( a*a+four*b*c )-a )
                 else
                    tau2 = ( a+sqrt( a*a+four*b*c ) ) / ( two*c )
                 end if
                 tau = tau2 / ( d( n )+sqrt( d( n )*d( n )+tau2 ) )
                 ! it can be proved that
                 ! d(n)^2 < d(n)^2+tau2 < sigma(n)^2 < d(n)^2+rho/2
              end if
              ! the following tau is to approximate sigma_n - d( n )
               ! tau = tau2 / ( d( n )+sqrt( d( n )*d( n )+tau2 ) )
              sigma = d( n ) + tau
              do j = 1, n
                 delta( j ) = ( d( j )-d( n ) ) - tau
                 work( j ) = d( j ) + d( n ) + tau
              end do
              ! evaluate psi and the derivative dpsi
              dpsi = zero
              psi = zero
              erretm = zero
              do j = 1, ii
                 temp = z( j ) / ( delta( j )*work( j ) )
                 psi = psi + z( j )*temp
                 dpsi = dpsi + temp*temp
                 erretm = erretm + psi
              end do
              erretm = abs( erretm )
              ! evaluate phi and the derivative dphi
              temp = z( n ) / ( delta( n )*work( n ) )
              phi = z( n )*temp
              dphi = temp*temp
              erretm = eight*( -phi-psi ) + erretm - phi + rhoinv
          ! $          + abs( tau2 )*( dpsi+dphi )
              w = rhoinv + phi + psi
              ! test for convergence
              if( abs( w )<=eps*erretm ) then
                 go to 240
              end if
              ! calculate the new step
              niter = niter + 1_${ik}$
              dtnsq1 = work( n-1 )*delta( n-1 )
              dtnsq = work( n )*delta( n )
              c = w - dtnsq1*dpsi - dtnsq*dphi
              a = ( dtnsq+dtnsq1 )*w - dtnsq*dtnsq1*( dpsi+dphi )
              b = dtnsq*dtnsq1*w
              if( c<zero )c = abs( c )
              if( c==zero ) then
                 eta = rho - sigma*sigma
              else if( a>=zero ) then
                 eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
              else
                 eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) )
              end if
              ! note, eta should be positive if w is negative, and
              ! eta should be negative otherwise. however,
              ! if for some reason caused by roundoff, eta*w > 0,
              ! we simply use one newton step instead. this way
              ! will guarantee eta*w < 0.
              if( w*eta>zero )eta = -w / ( dpsi+dphi )
              temp = eta - dtnsq
              if( temp>rho )eta = rho + dtnsq
              eta = eta / ( sigma+sqrt( eta+sigma*sigma ) )
              tau = tau + eta
              sigma = sigma + eta
              do j = 1, n
                 delta( j ) = delta( j ) - eta
                 work( j ) = work( j ) + eta
              end do
              ! evaluate psi and the derivative dpsi
              dpsi = zero
              psi = zero
              erretm = zero
              do j = 1, ii
                 temp = z( j ) / ( work( j )*delta( j ) )
                 psi = psi + z( j )*temp
                 dpsi = dpsi + temp*temp
                 erretm = erretm + psi
              end do
              erretm = abs( erretm )
              ! evaluate phi and the derivative dphi
              tau2 = work( n )*delta( n )
              temp = z( n ) / tau2
              phi = z( n )*temp
              dphi = temp*temp
              erretm = eight*( -phi-psi ) + erretm - phi + rhoinv
          ! $          + abs( tau2 )*( dpsi+dphi )
              w = rhoinv + phi + psi
              ! main loop to update the values of the array   delta
              iter = niter + 1_${ik}$
              loop_90: do niter = iter, maxit
                 ! test for convergence
                 if( abs( w )<=eps*erretm ) then
                    go to 240
                 end if
                 ! calculate the new step
                 dtnsq1 = work( n-1 )*delta( n-1 )
                 dtnsq = work( n )*delta( n )
                 c = w - dtnsq1*dpsi - dtnsq*dphi
                 a = ( dtnsq+dtnsq1 )*w - dtnsq1*dtnsq*( dpsi+dphi )
                 b = dtnsq1*dtnsq*w
                 if( a>=zero ) then
                    eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
                 else
                    eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) )
                 end if
                 ! note, eta should be positive if w is negative, and
                 ! eta should be negative otherwise. however,
                 ! if for some reason caused by roundoff, eta*w > 0,
                 ! we simply use one newton step instead. this way
                 ! will guarantee eta*w < 0.
                 if( w*eta>zero )eta = -w / ( dpsi+dphi )
                 temp = eta - dtnsq
                 if( temp<=zero )eta = eta / two
                 eta = eta / ( sigma+sqrt( eta+sigma*sigma ) )
                 tau = tau + eta
                 sigma = sigma + eta
                 do j = 1, n
                    delta( j ) = delta( j ) - eta
                    work( j ) = work( j ) + eta
                 end do
                 ! evaluate psi and the derivative dpsi
                 dpsi = zero
                 psi = zero
                 erretm = zero
                 do j = 1, ii
                    temp = z( j ) / ( work( j )*delta( j ) )
                    psi = psi + z( j )*temp
                    dpsi = dpsi + temp*temp
                    erretm = erretm + psi
                 end do
                 erretm = abs( erretm )
                 ! evaluate phi and the derivative dphi
                 tau2 = work( n )*delta( n )
                 temp = z( n ) / tau2
                 phi = z( n )*temp
                 dphi = temp*temp
                 erretm = eight*( -phi-psi ) + erretm - phi + rhoinv
          ! $             + abs( tau2 )*( dpsi+dphi )
                 w = rhoinv + phi + psi
              end do loop_90
              ! return with info = 1, niter = maxit and not converged
              info = 1_${ik}$
              go to 240
              ! end for the case i = n
           else
              ! the case for i < n
              niter = 1_${ik}$
              ip1 = i + 1_${ik}$
              ! calculate initial guess
              delsq = ( d( ip1 )-d( i ) )*( d( ip1 )+d( i ) )
              delsq2 = delsq / two
              sq2=sqrt( ( d( i )*d( i )+d( ip1 )*d( ip1 ) ) / two )
              temp = delsq2 / ( d( i )+sq2 )
              do j = 1, n
                 work( j ) = d( j ) + d( i ) + temp
                 delta( j ) = ( d( j )-d( i ) ) - temp
              end do
              psi = zero
              do j = 1, i - 1
                 psi = psi + z( j )*z( j ) / ( work( j )*delta( j ) )
              end do
              phi = zero
              do j = n, i + 2, -1
                 phi = phi + z( j )*z( j ) / ( work( j )*delta( j ) )
              end do
              c = rhoinv + psi + phi
              w = c + z( i )*z( i ) / ( work( i )*delta( i ) ) +z( ip1 )*z( ip1 ) / ( work( ip1 )&
                        *delta( ip1 ) )
              geomavg = .false.
              if( w>zero ) then
                 ! d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2
                 ! we choose d(i) as origin.
                 orgati = .true.
                 ii = i
                 sglb = zero
                 sgub = delsq2  / ( d( i )+sq2 )
                 a = c*delsq + z( i )*z( i ) + z( ip1 )*z( ip1 )
                 b = z( i )*z( i )*delsq
                 if( a>zero ) then
                    tau2 = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) )
                 else
                    tau2 = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
                 end if
                 ! tau2 now is an estimation of sigma^2 - d( i )^2. the
                 ! following, however, is the corresponding estimation of
                 ! sigma - d( i ).
                 tau = tau2 / ( d( i )+sqrt( d( i )*d( i )+tau2 ) )
                 temp = sqrt(eps)
                 if( (d(i)<=temp*d(ip1)).and.(abs(z(i))<=temp).and.(d(i)>zero) ) then
                    tau = min( ten*d(i), sgub )
                    geomavg = .true.
                 end if
              else
                 ! (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2
                 ! we choose d(i+1) as origin.
                 orgati = .false.
                 ii = ip1
                 sglb = -delsq2  / ( d( ii )+sq2 )
                 sgub = zero
                 a = c*delsq - z( i )*z( i ) - z( ip1 )*z( ip1 )
                 b = z( ip1 )*z( ip1 )*delsq
                 if( a<zero ) then
                    tau2 = two*b / ( a-sqrt( abs( a*a+four*b*c ) ) )
                 else
                    tau2 = -( a+sqrt( abs( a*a+four*b*c ) ) ) / ( two*c )
                 end if
                 ! tau2 now is an estimation of sigma^2 - d( ip1 )^2. the
                 ! following, however, is the corresponding estimation of
                 ! sigma - d( ip1 ).
                 tau = tau2 / ( d( ip1 )+sqrt( abs( d( ip1 )*d( ip1 )+tau2 ) ) )
              end if
              sigma = d( ii ) + tau
              do j = 1, n
                 work( j ) = d( j ) + d( ii ) + tau
                 delta( j ) = ( d( j )-d( ii ) ) - tau
              end do
              iim1 = ii - 1_${ik}$
              iip1 = ii + 1_${ik}$
              ! evaluate psi and the derivative dpsi
              dpsi = zero
              psi = zero
              erretm = zero
              do j = 1, iim1
                 temp = z( j ) / ( work( j )*delta( j ) )
                 psi = psi + z( j )*temp
                 dpsi = dpsi + temp*temp
                 erretm = erretm + psi
              end do
              erretm = abs( erretm )
              ! evaluate phi and the derivative dphi
              dphi = zero
              phi = zero
              do j = n, iip1, -1
                 temp = z( j ) / ( work( j )*delta( j ) )
                 phi = phi + z( j )*temp
                 dphi = dphi + temp*temp
                 erretm = erretm + phi
              end do
              w = rhoinv + phi + psi
              ! w is the value of the secular function with
              ! its ii-th element removed.
              swtch3 = .false.
              if( orgati ) then
                 if( w<zero )swtch3 = .true.
              else
                 if( w>zero )swtch3 = .true.
              end if
              if( ii==1_${ik}$ .or. ii==n )swtch3 = .false.
              temp = z( ii ) / ( work( ii )*delta( ii ) )
              dw = dpsi + dphi + temp*temp
              temp = z( ii )*temp
              w = w + temp
              erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp )
          ! $          + abs( tau2 )*dw
              ! test for convergence
              if( abs( w )<=eps*erretm ) then
                 go to 240
              end if
              if( w<=zero ) then
                 sglb = max( sglb, tau )
              else
                 sgub = min( sgub, tau )
              end if
              ! calculate the new step
              niter = niter + 1_${ik}$
              if( .not.swtch3 ) then
                 dtipsq = work( ip1 )*delta( ip1 )
                 dtisq = work( i )*delta( i )
                 if( orgati ) then
                    c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2_${ik}$
                 else
                    c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2_${ik}$
                 end if
                 a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw
                 b = dtipsq*dtisq*w
                 if( c==zero ) then
                    if( a==zero ) then
                       if( orgati ) then
                          a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi )
                       else
                          a = z( ip1 )*z( ip1 ) + dtisq*dtisq*( dpsi+dphi )
                       end if
                    end if
                    eta = b / a
                 else if( a<=zero ) then
                    eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
                 else
                    eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) )
                 end if
              else
                 ! interpolation using three most relevant poles
                 dtiim = work( iim1 )*delta( iim1 )
                 dtiip = work( iip1 )*delta( iip1 )
                 temp = rhoinv + psi + phi
                 if( orgati ) then
                    temp1 = z( iim1 ) / dtiim
                    temp1 = temp1*temp1
                    c = ( temp - dtiip*( dpsi+dphi ) ) -( d( iim1 )-d( iip1 ) )*( d( iim1 )+d( &
                              iip1 ) )*temp1
                    zz( 1_${ik}$ ) = z( iim1 )*z( iim1 )
                    if( dpsi<temp1 ) then
                       zz( 3_${ik}$ ) = dtiip*dtiip*dphi
                    else
                       zz( 3_${ik}$ ) = dtiip*dtiip*( ( dpsi-temp1 )+dphi )
                    end if
                 else
                    temp1 = z( iip1 ) / dtiip
                    temp1 = temp1*temp1
                    c = ( temp - dtiim*( dpsi+dphi ) ) -( d( iip1 )-d( iim1 ) )*( d( iim1 )+d( &
                              iip1 ) )*temp1
                    if( dphi<temp1 ) then
                       zz( 1_${ik}$ ) = dtiim*dtiim*dpsi
                    else
                       zz( 1_${ik}$ ) = dtiim*dtiim*( dpsi+( dphi-temp1 ) )
                    end if
                    zz( 3_${ik}$ ) = z( iip1 )*z( iip1 )
                 end if
                 zz( 2_${ik}$ ) = z( ii )*z( ii )
                 dd( 1_${ik}$ ) = dtiim
                 dd( 2_${ik}$ ) = delta( ii )*work( ii )
                 dd( 3_${ik}$ ) = dtiip
                 call stdlib${ii}$_slaed6( niter, orgati, c, dd, zz, w, eta, info )
                 if( info/=0_${ik}$ ) then
                    ! if info is not 0, i.e., stdlib${ii}$_slaed6 failed, switch back
                    ! to 2 pole interpolation.
                    swtch3 = .false.
                    info = 0_${ik}$
                    dtipsq = work( ip1 )*delta( ip1 )
                    dtisq = work( i )*delta( i )
                    if( orgati ) then
                       c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2_${ik}$
                    else
                       c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2_${ik}$
                    end if
                    a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw
                    b = dtipsq*dtisq*w
                    if( c==zero ) then
                       if( a==zero ) then
                          if( orgati ) then
                             a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi )
                          else
                             a = z( ip1 )*z( ip1 ) + dtisq*dtisq*( dpsi+dphi)
                          end if
                       end if
                       eta = b / a
                    else if( a<=zero ) then
                       eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
                    else
                       eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) )
                    end if
                 end if
              end if
              ! note, eta should be positive if w is negative, and
              ! eta should be negative otherwise. however,
              ! if for some reason caused by roundoff, eta*w > 0,
              ! we simply use one newton step instead. this way
              ! will guarantee eta*w < 0.
              if( w*eta>=zero )eta = -w / dw
              eta = eta / ( sigma+sqrt( sigma*sigma+eta ) )
              temp = tau + eta
              if( temp>sgub .or. temp<sglb ) then
                 if( w<zero ) then
                    eta = ( sgub-tau ) / two
                 else
                    eta = ( sglb-tau ) / two
                 end if
                 if( geomavg ) then
                    if( w < zero ) then
                       if( tau > zero ) then
                          eta = sqrt(sgub*tau)-tau
                       end if
                    else
                       if( sglb > zero ) then
                          eta = sqrt(sglb*tau)-tau
                       end if
                    end if
                 end if
              end if
              prew = w
              tau = tau + eta
              sigma = sigma + eta
              do j = 1, n
                 work( j ) = work( j ) + eta
                 delta( j ) = delta( j ) - eta
              end do
              ! evaluate psi and the derivative dpsi
              dpsi = zero
              psi = zero
              erretm = zero
              do j = 1, iim1
                 temp = z( j ) / ( work( j )*delta( j ) )
                 psi = psi + z( j )*temp
                 dpsi = dpsi + temp*temp
                 erretm = erretm + psi
              end do
              erretm = abs( erretm )
              ! evaluate phi and the derivative dphi
              dphi = zero
              phi = zero
              do j = n, iip1, -1
                 temp = z( j ) / ( work( j )*delta( j ) )
                 phi = phi + z( j )*temp
                 dphi = dphi + temp*temp
                 erretm = erretm + phi
              end do
              tau2 = work( ii )*delta( ii )
              temp = z( ii ) / tau2
              dw = dpsi + dphi + temp*temp
              temp = z( ii )*temp
              w = rhoinv + phi + psi + temp
              erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp )
          ! $          + abs( tau2 )*dw
              swtch = .false.
              if( orgati ) then
                 if( -w>abs( prew ) / ten )swtch = .true.
              else
                 if( w>abs( prew ) / ten )swtch = .true.
              end if
              ! main loop to update the values of the array   delta and work
              iter = niter + 1_${ik}$
              loop_230: do niter = iter, maxit
                 ! test for convergence
                 if( abs( w )<=eps*erretm ) then
           ! $          .or. (sgub-sglb)<=eight*abs(sgub+sglb) ) then
                    go to 240
                 end if
                 if( w<=zero ) then
                    sglb = max( sglb, tau )
                 else
                    sgub = min( sgub, tau )
                 end if
                 ! calculate the new step
                 if( .not.swtch3 ) then
                    dtipsq = work( ip1 )*delta( ip1 )
                    dtisq = work( i )*delta( i )
                    if( .not.swtch ) then
                       if( orgati ) then
                          c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2_${ik}$
                       else
                          c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2_${ik}$
                       end if
                    else
                       temp = z( ii ) / ( work( ii )*delta( ii ) )
                       if( orgati ) then
                          dpsi = dpsi + temp*temp
                       else
                          dphi = dphi + temp*temp
                       end if
                       c = w - dtisq*dpsi - dtipsq*dphi
                    end if
                    a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw
                    b = dtipsq*dtisq*w
                    if( c==zero ) then
                       if( a==zero ) then
                          if( .not.swtch ) then
                             if( orgati ) then
                                a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi )
                             else
                                a = z( ip1 )*z( ip1 ) +dtisq*dtisq*( dpsi+dphi )
                             end if
                          else
                             a = dtisq*dtisq*dpsi + dtipsq*dtipsq*dphi
                          end if
                       end if
                       eta = b / a
                    else if( a<=zero ) then
                       eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
                    else
                       eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) )
                    end if
                 else
                    ! interpolation using three most relevant poles
                    dtiim = work( iim1 )*delta( iim1 )
                    dtiip = work( iip1 )*delta( iip1 )
                    temp = rhoinv + psi + phi
                    if( swtch ) then
                       c = temp - dtiim*dpsi - dtiip*dphi
                       zz( 1_${ik}$ ) = dtiim*dtiim*dpsi
                       zz( 3_${ik}$ ) = dtiip*dtiip*dphi
                    else
                       if( orgati ) then
                          temp1 = z( iim1 ) / dtiim
                          temp1 = temp1*temp1
                          temp2 = ( d( iim1 )-d( iip1 ) )*( d( iim1 )+d( iip1 ) )*temp1
                          c = temp - dtiip*( dpsi+dphi ) - temp2
                          zz( 1_${ik}$ ) = z( iim1 )*z( iim1 )
                          if( dpsi<temp1 ) then
                             zz( 3_${ik}$ ) = dtiip*dtiip*dphi
                          else
                             zz( 3_${ik}$ ) = dtiip*dtiip*( ( dpsi-temp1 )+dphi )
                          end if
                       else
                          temp1 = z( iip1 ) / dtiip
                          temp1 = temp1*temp1
                          temp2 = ( d( iip1 )-d( iim1 ) )*( d( iim1 )+d( iip1 ) )*temp1
                          c = temp - dtiim*( dpsi+dphi ) - temp2
                          if( dphi<temp1 ) then
                             zz( 1_${ik}$ ) = dtiim*dtiim*dpsi
                          else
                             zz( 1_${ik}$ ) = dtiim*dtiim*( dpsi+( dphi-temp1 ) )
                          end if
                          zz( 3_${ik}$ ) = z( iip1 )*z( iip1 )
                       end if
                    end if
                    dd( 1_${ik}$ ) = dtiim
                    dd( 2_${ik}$ ) = delta( ii )*work( ii )
                    dd( 3_${ik}$ ) = dtiip
                    call stdlib${ii}$_slaed6( niter, orgati, c, dd, zz, w, eta, info )
                    if( info/=0_${ik}$ ) then
                       ! if info is not 0, i.e., stdlib${ii}$_slaed6 failed, switch
                       ! back to two pole interpolation
                       swtch3 = .false.
                       info = 0_${ik}$
                       dtipsq = work( ip1 )*delta( ip1 )
                       dtisq = work( i )*delta( i )
                       if( .not.swtch ) then
                          if( orgati ) then
                             c = w - dtipsq*dw + delsq*( z( i )/dtisq )**2_${ik}$
                          else
                             c = w - dtisq*dw - delsq*( z( ip1 )/dtipsq )**2_${ik}$
                          end if
                       else
                          temp = z( ii ) / ( work( ii )*delta( ii ) )
                          if( orgati ) then
                             dpsi = dpsi + temp*temp
                          else
                             dphi = dphi + temp*temp
                          end if
                          c = w - dtisq*dpsi - dtipsq*dphi
                       end if
                       a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw
                       b = dtipsq*dtisq*w
                       if( c==zero ) then
                          if( a==zero ) then
                             if( .not.swtch ) then
                                if( orgati ) then
                                   a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi )
                                else
                                   a = z( ip1 )*z( ip1 ) +dtisq*dtisq*( dpsi+dphi )
                                end if
                             else
                                a = dtisq*dtisq*dpsi + dtipsq*dtipsq*dphi
                             end if
                          end if
                          eta = b / a
                       else if( a<=zero ) then
                          eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
                       else
                          eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) )
                       end if
                    end if
                 end if
                 ! note, eta should be positive if w is negative, and
                 ! eta should be negative otherwise. however,
                 ! if for some reason caused by roundoff, eta*w > 0,
                 ! we simply use one newton step instead. this way
                 ! will guarantee eta*w < 0.
                 if( w*eta>=zero )eta = -w / dw
                 eta = eta / ( sigma+sqrt( sigma*sigma+eta ) )
                 temp=tau+eta
                 if( temp>sgub .or. temp<sglb ) then
                    if( w<zero ) then
                       eta = ( sgub-tau ) / two
                    else
                       eta = ( sglb-tau ) / two
                    end if
                    if( geomavg ) then
                       if( w < zero ) then
                          if( tau > zero ) then
                             eta = sqrt(sgub*tau)-tau
                          end if
                       else
                          if( sglb > zero ) then
                             eta = sqrt(sglb*tau)-tau
                          end if
                       end if
                    end if
                 end if
                 prew = w
                 tau = tau + eta
                 sigma = sigma + eta
                 do j = 1, n
                    work( j ) = work( j ) + eta
                    delta( j ) = delta( j ) - eta
                 end do
                 ! evaluate psi and the derivative dpsi
                 dpsi = zero
                 psi = zero
                 erretm = zero
                 do j = 1, iim1
                    temp = z( j ) / ( work( j )*delta( j ) )
                    psi = psi + z( j )*temp
                    dpsi = dpsi + temp*temp
                    erretm = erretm + psi
                 end do
                 erretm = abs( erretm )
                 ! evaluate phi and the derivative dphi
                 dphi = zero
                 phi = zero
                 do j = n, iip1, -1
                    temp = z( j ) / ( work( j )*delta( j ) )
                    phi = phi + z( j )*temp
                    dphi = dphi + temp*temp
                    erretm = erretm + phi
                 end do
                 tau2 = work( ii )*delta( ii )
                 temp = z( ii ) / tau2
                 dw = dpsi + dphi + temp*temp
                 temp = z( ii )*temp
                 w = rhoinv + phi + psi + temp
                 erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp )
          ! $             + abs( tau2 )*dw
                 if( w*prew>zero .and. abs( w )>abs( prew ) / ten )swtch = .not.swtch
              end do loop_230
              ! return with info = 1, niter = maxit and not converged
              info = 1_${ik}$
           end if
           240 continue
           return
     end subroutine stdlib${ii}$_slasd4

     pure module subroutine stdlib${ii}$_dlasd4( n, i, d, z, delta, rho, sigma, work, info )
     !! This subroutine computes the square root of the I-th updated
     !! eigenvalue of a positive symmetric rank-one modification to
     !! a positive diagonal matrix whose entries are given as the squares
     !! of the corresponding entries in the array d, and that
     !! 0 <= D(i) < D(j)  for  i < j
     !! and that RHO > 0. This is arranged by the calling routine, and is
     !! no loss in generality.  The rank-one modified system is thus
     !! diag( D ) * diag( D ) +  RHO * Z * Z_transpose.
     !! where we assume the Euclidean norm of Z is 1.
     !! The method consists of approximating the rational functions in the
     !! secular equation by simpler interpolating rational functions.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: i, n
           integer(${ik}$), intent(out) :: info
           real(dp), intent(in) :: rho
           real(dp), intent(out) :: sigma
           ! Array Arguments 
           real(dp), intent(in) :: d(*), z(*)
           real(dp), intent(out) :: delta(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: maxit = 400_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: orgati, swtch, swtch3, geomavg
           integer(${ik}$) :: ii, iim1, iip1, ip1, iter, j, niter
           real(dp) :: a, b, c, delsq, delsq2, sq2, dphi, dpsi, dtiim, dtiip, dtipsq, dtisq, &
           dtnsq, dtnsq1, dw, eps, erretm, eta, phi, prew, psi, rhoinv, sglb, sgub, tau, tau2, &
                     temp, temp1, temp2, w
           ! Local Arrays 
           real(dp) :: dd(3_${ik}$), zz(3_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! since this routine is called in an inner loop, we do no argument
           ! checking.
           ! quick return for n=1 and 2.
           info = 0_${ik}$
           if( n==1_${ik}$ ) then
              ! presumably, i=1 upon entry
              sigma = sqrt( d( 1_${ik}$ )*d( 1_${ik}$ )+rho*z( 1_${ik}$ )*z( 1_${ik}$ ) )
              delta( 1_${ik}$ ) = one
              work( 1_${ik}$ ) = one
              return
           end if
           if( n==2_${ik}$ ) then
              call stdlib${ii}$_dlasd5( i, d, z, delta, rho, sigma, work )
              return
           end if
           ! compute machine epsilon
           eps = stdlib${ii}$_dlamch( 'EPSILON' )
           rhoinv = one / rho
           tau2= zero
           ! the case i = n
           if( i==n ) then
              ! initialize some basic variables
              ii = n - 1_${ik}$
              niter = 1_${ik}$
              ! calculate initial guess
              temp = rho / two
              ! if ||z||_2 is not one, then temp should be set to
              ! rho * ||z||_2^2 / two
              temp1 = temp / ( d( n )+sqrt( d( n )*d( n )+temp ) )
              do j = 1, n
                 work( j ) = d( j ) + d( n ) + temp1
                 delta( j ) = ( d( j )-d( n ) ) - temp1
              end do
              psi = zero
              do j = 1, n - 2
                 psi = psi + z( j )*z( j ) / ( delta( j )*work( j ) )
              end do
              c = rhoinv + psi
              w = c + z( ii )*z( ii ) / ( delta( ii )*work( ii ) ) +z( n )*z( n ) / ( delta( n )&
                        *work( n ) )
              if( w<=zero ) then
                 temp1 = sqrt( d( n )*d( n )+rho )
                 temp = z( n-1 )*z( n-1 ) / ( ( d( n-1 )+temp1 )*( d( n )-d( n-1 )+rho / ( d( n )+&
                           temp1 ) ) ) +z( n )*z( n ) / rho
                 ! the following tau2 is to approximate
                 ! sigma_n^2 - d( n )*d( n )
                 if( c<=temp ) then
                    tau = rho
                 else
                    delsq = ( d( n )-d( n-1 ) )*( d( n )+d( n-1 ) )
                    a = -c*delsq + z( n-1 )*z( n-1 ) + z( n )*z( n )
                    b = z( n )*z( n )*delsq
                    if( a<zero ) then
                       tau2 = two*b / ( sqrt( a*a+four*b*c )-a )
                    else
                       tau2 = ( a+sqrt( a*a+four*b*c ) ) / ( two*c )
                    end if
                    tau = tau2 / ( d( n )+sqrt( d( n )*d( n )+tau2 ) )
                 end if
                 ! it can be proved that
                     ! d(n)^2+rho/2 <= sigma_n^2 < d(n)^2+tau2 <= d(n)^2+rho
              else
                 delsq = ( d( n )-d( n-1 ) )*( d( n )+d( n-1 ) )
                 a = -c*delsq + z( n-1 )*z( n-1 ) + z( n )*z( n )
                 b = z( n )*z( n )*delsq
                 ! the following tau2 is to approximate
                 ! sigma_n^2 - d( n )*d( n )
                 if( a<zero ) then
                    tau2 = two*b / ( sqrt( a*a+four*b*c )-a )
                 else
                    tau2 = ( a+sqrt( a*a+four*b*c ) ) / ( two*c )
                 end if
                 tau = tau2 / ( d( n )+sqrt( d( n )*d( n )+tau2 ) )
                 ! it can be proved that
                 ! d(n)^2 < d(n)^2+tau2 < sigma(n)^2 < d(n)^2+rho/2
              end if
              ! the following tau is to approximate sigma_n - d( n )
               ! tau = tau2 / ( d( n )+sqrt( d( n )*d( n )+tau2 ) )
              sigma = d( n ) + tau
              do j = 1, n
                 delta( j ) = ( d( j )-d( n ) ) - tau
                 work( j ) = d( j ) + d( n ) + tau
              end do
              ! evaluate psi and the derivative dpsi
              dpsi = zero
              psi = zero
              erretm = zero
              do j = 1, ii
                 temp = z( j ) / ( delta( j )*work( j ) )
                 psi = psi + z( j )*temp
                 dpsi = dpsi + temp*temp
                 erretm = erretm + psi
              end do
              erretm = abs( erretm )
              ! evaluate phi and the derivative dphi
              temp = z( n ) / ( delta( n )*work( n ) )
              phi = z( n )*temp
              dphi = temp*temp
              erretm = eight*( -phi-psi ) + erretm - phi + rhoinv
          ! $          + abs( tau2 )*( dpsi+dphi )
              w = rhoinv + phi + psi
              ! test for convergence
              if( abs( w )<=eps*erretm ) then
                 go to 240
              end if
              ! calculate the new step
              niter = niter + 1_${ik}$
              dtnsq1 = work( n-1 )*delta( n-1 )
              dtnsq = work( n )*delta( n )
              c = w - dtnsq1*dpsi - dtnsq*dphi
              a = ( dtnsq+dtnsq1 )*w - dtnsq*dtnsq1*( dpsi+dphi )
              b = dtnsq*dtnsq1*w
              if( c<zero )c = abs( c )
              if( c==zero ) then
                 eta = rho - sigma*sigma
              else if( a>=zero ) then
                 eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
              else
                 eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) )
              end if
              ! note, eta should be positive if w is negative, and
              ! eta should be negative otherwise. however,
              ! if for some reason caused by roundoff, eta*w > 0,
              ! we simply use one newton step instead. this way
              ! will guarantee eta*w < 0.
              if( w*eta>zero )eta = -w / ( dpsi+dphi )
              temp = eta - dtnsq
              if( temp>rho )eta = rho + dtnsq
              eta = eta / ( sigma+sqrt( eta+sigma*sigma ) )
              tau = tau + eta
              sigma = sigma + eta
              do j = 1, n
                 delta( j ) = delta( j ) - eta
                 work( j ) = work( j ) + eta
              end do
              ! evaluate psi and the derivative dpsi
              dpsi = zero
              psi = zero
              erretm = zero
              do j = 1, ii
                 temp = z( j ) / ( work( j )*delta( j ) )
                 psi = psi + z( j )*temp
                 dpsi = dpsi + temp*temp
                 erretm = erretm + psi
              end do
              erretm = abs( erretm )
              ! evaluate phi and the derivative dphi
              tau2 = work( n )*delta( n )
              temp = z( n ) / tau2
              phi = z( n )*temp
              dphi = temp*temp
              erretm = eight*( -phi-psi ) + erretm - phi + rhoinv
          ! $          + abs( tau2 )*( dpsi+dphi )
              w = rhoinv + phi + psi
              ! main loop to update the values of the array   delta
              iter = niter + 1_${ik}$
              loop_90: do niter = iter, maxit
                 ! test for convergence
                 if( abs( w )<=eps*erretm ) then
                    go to 240
                 end if
                 ! calculate the new step
                 dtnsq1 = work( n-1 )*delta( n-1 )
                 dtnsq = work( n )*delta( n )
                 c = w - dtnsq1*dpsi - dtnsq*dphi
                 a = ( dtnsq+dtnsq1 )*w - dtnsq1*dtnsq*( dpsi+dphi )
                 b = dtnsq1*dtnsq*w
                 if( a>=zero ) then
                    eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
                 else
                    eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) )
                 end if
                 ! note, eta should be positive if w is negative, and
                 ! eta should be negative otherwise. however,
                 ! if for some reason caused by roundoff, eta*w > 0,
                 ! we simply use one newton step instead. this way
                 ! will guarantee eta*w < 0.
                 if( w*eta>zero )eta = -w / ( dpsi+dphi )
                 temp = eta - dtnsq
                 if( temp<=zero )eta = eta / two
                 eta = eta / ( sigma+sqrt( eta+sigma*sigma ) )
                 tau = tau + eta
                 sigma = sigma + eta
                 do j = 1, n
                    delta( j ) = delta( j ) - eta
                    work( j ) = work( j ) + eta
                 end do
                 ! evaluate psi and the derivative dpsi
                 dpsi = zero
                 psi = zero
                 erretm = zero
                 do j = 1, ii
                    temp = z( j ) / ( work( j )*delta( j ) )
                    psi = psi + z( j )*temp
                    dpsi = dpsi + temp*temp
                    erretm = erretm + psi
                 end do
                 erretm = abs( erretm )
                 ! evaluate phi and the derivative dphi
                 tau2 = work( n )*delta( n )
                 temp = z( n ) / tau2
                 phi = z( n )*temp
                 dphi = temp*temp
                 erretm = eight*( -phi-psi ) + erretm - phi + rhoinv
          ! $             + abs( tau2 )*( dpsi+dphi )
                 w = rhoinv + phi + psi
              end do loop_90
              ! return with info = 1, niter = maxit and not converged
              info = 1_${ik}$
              go to 240
              ! end for the case i = n
           else
              ! the case for i < n
              niter = 1_${ik}$
              ip1 = i + 1_${ik}$
              ! calculate initial guess
              delsq = ( d( ip1 )-d( i ) )*( d( ip1 )+d( i ) )
              delsq2 = delsq / two
              sq2=sqrt( ( d( i )*d( i )+d( ip1 )*d( ip1 ) ) / two )
              temp = delsq2 / ( d( i )+sq2 )
              do j = 1, n
                 work( j ) = d( j ) + d( i ) + temp
                 delta( j ) = ( d( j )-d( i ) ) - temp
              end do
              psi = zero
              do j = 1, i - 1
                 psi = psi + z( j )*z( j ) / ( work( j )*delta( j ) )
              end do
              phi = zero
              do j = n, i + 2, -1
                 phi = phi + z( j )*z( j ) / ( work( j )*delta( j ) )
              end do
              c = rhoinv + psi + phi
              w = c + z( i )*z( i ) / ( work( i )*delta( i ) ) +z( ip1 )*z( ip1 ) / ( work( ip1 )&
                        *delta( ip1 ) )
              geomavg = .false.
              if( w>zero ) then
                 ! d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2
                 ! we choose d(i) as origin.
                 orgati = .true.
                 ii = i
                 sglb = zero
                 sgub = delsq2  / ( d( i )+sq2 )
                 a = c*delsq + z( i )*z( i ) + z( ip1 )*z( ip1 )
                 b = z( i )*z( i )*delsq
                 if( a>zero ) then
                    tau2 = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) )
                 else
                    tau2 = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
                 end if
                 ! tau2 now is an estimation of sigma^2 - d( i )^2. the
                 ! following, however, is the corresponding estimation of
                 ! sigma - d( i ).
                 tau = tau2 / ( d( i )+sqrt( d( i )*d( i )+tau2 ) )
                 temp = sqrt(eps)
                 if( (d(i)<=temp*d(ip1)).and.(abs(z(i))<=temp).and.(d(i)>zero) ) then
                    tau = min( ten*d(i), sgub )
                    geomavg = .true.
                 end if
              else
                 ! (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2
                 ! we choose d(i+1) as origin.
                 orgati = .false.
                 ii = ip1
                 sglb = -delsq2  / ( d( ii )+sq2 )
                 sgub = zero
                 a = c*delsq - z( i )*z( i ) - z( ip1 )*z( ip1 )
                 b = z( ip1 )*z( ip1 )*delsq
                 if( a<zero ) then
                    tau2 = two*b / ( a-sqrt( abs( a*a+four*b*c ) ) )
                 else
                    tau2 = -( a+sqrt( abs( a*a+four*b*c ) ) ) / ( two*c )
                 end if
                 ! tau2 now is an estimation of sigma^2 - d( ip1 )^2. the
                 ! following, however, is the corresponding estimation of
                 ! sigma - d( ip1 ).
                 tau = tau2 / ( d( ip1 )+sqrt( abs( d( ip1 )*d( ip1 )+tau2 ) ) )
              end if
              sigma = d( ii ) + tau
              do j = 1, n
                 work( j ) = d( j ) + d( ii ) + tau
                 delta( j ) = ( d( j )-d( ii ) ) - tau
              end do
              iim1 = ii - 1_${ik}$
              iip1 = ii + 1_${ik}$
              ! evaluate psi and the derivative dpsi
              dpsi = zero
              psi = zero
              erretm = zero
              do j = 1, iim1
                 temp = z( j ) / ( work( j )*delta( j ) )
                 psi = psi + z( j )*temp
                 dpsi = dpsi + temp*temp
                 erretm = erretm + psi
              end do
              erretm = abs( erretm )
              ! evaluate phi and the derivative dphi
              dphi = zero
              phi = zero
              do j = n, iip1, -1
                 temp = z( j ) / ( work( j )*delta( j ) )
                 phi = phi + z( j )*temp
                 dphi = dphi + temp*temp
                 erretm = erretm + phi
              end do
              w = rhoinv + phi + psi
              ! w is the value of the secular function with
              ! its ii-th element removed.
              swtch3 = .false.
              if( orgati ) then
                 if( w<zero )swtch3 = .true.
              else
                 if( w>zero )swtch3 = .true.
              end if
              if( ii==1_${ik}$ .or. ii==n )swtch3 = .false.
              temp = z( ii ) / ( work( ii )*delta( ii ) )
              dw = dpsi + dphi + temp*temp
              temp = z( ii )*temp
              w = w + temp
              erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp )
          ! $          + abs( tau2 )*dw
              ! test for convergence
              if( abs( w )<=eps*erretm ) then
                 go to 240
              end if
              if( w<=zero ) then
                 sglb = max( sglb, tau )
              else
                 sgub = min( sgub, tau )
              end if
              ! calculate the new step
              niter = niter + 1_${ik}$
              if( .not.swtch3 ) then
                 dtipsq = work( ip1 )*delta( ip1 )
                 dtisq = work( i )*delta( i )
                 if( orgati ) then
                    c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2_${ik}$
                 else
                    c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2_${ik}$
                 end if
                 a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw
                 b = dtipsq*dtisq*w
                 if( c==zero ) then
                    if( a==zero ) then
                       if( orgati ) then
                          a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi )
                       else
                          a = z( ip1 )*z( ip1 ) + dtisq*dtisq*( dpsi+dphi )
                       end if
                    end if
                    eta = b / a
                 else if( a<=zero ) then
                    eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
                 else
                    eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) )
                 end if
              else
                 ! interpolation using three most relevant poles
                 dtiim = work( iim1 )*delta( iim1 )
                 dtiip = work( iip1 )*delta( iip1 )
                 temp = rhoinv + psi + phi
                 if( orgati ) then
                    temp1 = z( iim1 ) / dtiim
                    temp1 = temp1*temp1
                    c = ( temp - dtiip*( dpsi+dphi ) ) -( d( iim1 )-d( iip1 ) )*( d( iim1 )+d( &
                              iip1 ) )*temp1
                    zz( 1_${ik}$ ) = z( iim1 )*z( iim1 )
                    if( dpsi<temp1 ) then
                       zz( 3_${ik}$ ) = dtiip*dtiip*dphi
                    else
                       zz( 3_${ik}$ ) = dtiip*dtiip*( ( dpsi-temp1 )+dphi )
                    end if
                 else
                    temp1 = z( iip1 ) / dtiip
                    temp1 = temp1*temp1
                    c = ( temp - dtiim*( dpsi+dphi ) ) -( d( iip1 )-d( iim1 ) )*( d( iim1 )+d( &
                              iip1 ) )*temp1
                    if( dphi<temp1 ) then
                       zz( 1_${ik}$ ) = dtiim*dtiim*dpsi
                    else
                       zz( 1_${ik}$ ) = dtiim*dtiim*( dpsi+( dphi-temp1 ) )
                    end if
                    zz( 3_${ik}$ ) = z( iip1 )*z( iip1 )
                 end if
                 zz( 2_${ik}$ ) = z( ii )*z( ii )
                 dd( 1_${ik}$ ) = dtiim
                 dd( 2_${ik}$ ) = delta( ii )*work( ii )
                 dd( 3_${ik}$ ) = dtiip
                 call stdlib${ii}$_dlaed6( niter, orgati, c, dd, zz, w, eta, info )
                 if( info/=0_${ik}$ ) then
                    ! if info is not 0, i.e., stdlib${ii}$_dlaed6 failed, switch back
                    ! to 2 pole interpolation.
                    swtch3 = .false.
                    info = 0_${ik}$
                    dtipsq = work( ip1 )*delta( ip1 )
                    dtisq = work( i )*delta( i )
                    if( orgati ) then
                       c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2_${ik}$
                    else
                       c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2_${ik}$
                    end if
                    a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw
                    b = dtipsq*dtisq*w
                    if( c==zero ) then
                       if( a==zero ) then
                          if( orgati ) then
                             a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi )
                          else
                             a = z( ip1 )*z( ip1 ) + dtisq*dtisq*( dpsi+dphi)
                          end if
                       end if
                       eta = b / a
                    else if( a<=zero ) then
                       eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
                    else
                       eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) )
                    end if
                 end if
              end if
              ! note, eta should be positive if w is negative, and
              ! eta should be negative otherwise. however,
              ! if for some reason caused by roundoff, eta*w > 0,
              ! we simply use one newton step instead. this way
              ! will guarantee eta*w < 0.
              if( w*eta>=zero )eta = -w / dw
              eta = eta / ( sigma+sqrt( sigma*sigma+eta ) )
              temp = tau + eta
              if( temp>sgub .or. temp<sglb ) then
                 if( w<zero ) then
                    eta = ( sgub-tau ) / two
                 else
                    eta = ( sglb-tau ) / two
                 end if
                 if( geomavg ) then
                    if( w < zero ) then
                       if( tau > zero ) then
                          eta = sqrt(sgub*tau)-tau
                       end if
                    else
                       if( sglb > zero ) then
                          eta = sqrt(sglb*tau)-tau
                       end if
                    end if
                 end if
              end if
              prew = w
              tau = tau + eta
              sigma = sigma + eta
              do j = 1, n
                 work( j ) = work( j ) + eta
                 delta( j ) = delta( j ) - eta
              end do
              ! evaluate psi and the derivative dpsi
              dpsi = zero
              psi = zero
              erretm = zero
              do j = 1, iim1
                 temp = z( j ) / ( work( j )*delta( j ) )
                 psi = psi + z( j )*temp
                 dpsi = dpsi + temp*temp
                 erretm = erretm + psi
              end do
              erretm = abs( erretm )
              ! evaluate phi and the derivative dphi
              dphi = zero
              phi = zero
              do j = n, iip1, -1
                 temp = z( j ) / ( work( j )*delta( j ) )
                 phi = phi + z( j )*temp
                 dphi = dphi + temp*temp
                 erretm = erretm + phi
              end do
              tau2 = work( ii )*delta( ii )
              temp = z( ii ) / tau2
              dw = dpsi + dphi + temp*temp
              temp = z( ii )*temp
              w = rhoinv + phi + psi + temp
              erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp )
          ! $          + abs( tau2 )*dw
              swtch = .false.
              if( orgati ) then
                 if( -w>abs( prew ) / ten )swtch = .true.
              else
                 if( w>abs( prew ) / ten )swtch = .true.
              end if
              ! main loop to update the values of the array   delta and work
              iter = niter + 1_${ik}$
              loop_230: do niter = iter, maxit
                 ! test for convergence
                 if( abs( w )<=eps*erretm ) then
           ! $          .or. (sgub-sglb)<=eight*abs(sgub+sglb) ) then
                    go to 240
                 end if
                 if( w<=zero ) then
                    sglb = max( sglb, tau )
                 else
                    sgub = min( sgub, tau )
                 end if
                 ! calculate the new step
                 if( .not.swtch3 ) then
                    dtipsq = work( ip1 )*delta( ip1 )
                    dtisq = work( i )*delta( i )
                    if( .not.swtch ) then
                       if( orgati ) then
                          c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2_${ik}$
                       else
                          c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2_${ik}$
                       end if
                    else
                       temp = z( ii ) / ( work( ii )*delta( ii ) )
                       if( orgati ) then
                          dpsi = dpsi + temp*temp
                       else
                          dphi = dphi + temp*temp
                       end if
                       c = w - dtisq*dpsi - dtipsq*dphi
                    end if
                    a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw
                    b = dtipsq*dtisq*w
                    if( c==zero ) then
                       if( a==zero ) then
                          if( .not.swtch ) then
                             if( orgati ) then
                                a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi )
                             else
                                a = z( ip1 )*z( ip1 ) +dtisq*dtisq*( dpsi+dphi )
                             end if
                          else
                             a = dtisq*dtisq*dpsi + dtipsq*dtipsq*dphi
                          end if
                       end if
                       eta = b / a
                    else if( a<=zero ) then
                       eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
                    else
                       eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) )
                    end if
                 else
                    ! interpolation using three most relevant poles
                    dtiim = work( iim1 )*delta( iim1 )
                    dtiip = work( iip1 )*delta( iip1 )
                    temp = rhoinv + psi + phi
                    if( swtch ) then
                       c = temp - dtiim*dpsi - dtiip*dphi
                       zz( 1_${ik}$ ) = dtiim*dtiim*dpsi
                       zz( 3_${ik}$ ) = dtiip*dtiip*dphi
                    else
                       if( orgati ) then
                          temp1 = z( iim1 ) / dtiim
                          temp1 = temp1*temp1
                          temp2 = ( d( iim1 )-d( iip1 ) )*( d( iim1 )+d( iip1 ) )*temp1
                          c = temp - dtiip*( dpsi+dphi ) - temp2
                          zz( 1_${ik}$ ) = z( iim1 )*z( iim1 )
                          if( dpsi<temp1 ) then
                             zz( 3_${ik}$ ) = dtiip*dtiip*dphi
                          else
                             zz( 3_${ik}$ ) = dtiip*dtiip*( ( dpsi-temp1 )+dphi )
                          end if
                       else
                          temp1 = z( iip1 ) / dtiip
                          temp1 = temp1*temp1
                          temp2 = ( d( iip1 )-d( iim1 ) )*( d( iim1 )+d( iip1 ) )*temp1
                          c = temp - dtiim*( dpsi+dphi ) - temp2
                          if( dphi<temp1 ) then
                             zz( 1_${ik}$ ) = dtiim*dtiim*dpsi
                          else
                             zz( 1_${ik}$ ) = dtiim*dtiim*( dpsi+( dphi-temp1 ) )
                          end if
                          zz( 3_${ik}$ ) = z( iip1 )*z( iip1 )
                       end if
                    end if
                    dd( 1_${ik}$ ) = dtiim
                    dd( 2_${ik}$ ) = delta( ii )*work( ii )
                    dd( 3_${ik}$ ) = dtiip
                    call stdlib${ii}$_dlaed6( niter, orgati, c, dd, zz, w, eta, info )
                    if( info/=0_${ik}$ ) then
                       ! if info is not 0, i.e., stdlib${ii}$_dlaed6 failed, switch
                       ! back to two pole interpolation
                       swtch3 = .false.
                       info = 0_${ik}$
                       dtipsq = work( ip1 )*delta( ip1 )
                       dtisq = work( i )*delta( i )
                       if( .not.swtch ) then
                          if( orgati ) then
                             c = w - dtipsq*dw + delsq*( z( i )/dtisq )**2_${ik}$
                          else
                             c = w - dtisq*dw - delsq*( z( ip1 )/dtipsq )**2_${ik}$
                          end if
                       else
                          temp = z( ii ) / ( work( ii )*delta( ii ) )
                          if( orgati ) then
                             dpsi = dpsi + temp*temp
                          else
                             dphi = dphi + temp*temp
                          end if
                          c = w - dtisq*dpsi - dtipsq*dphi
                       end if
                       a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw
                       b = dtipsq*dtisq*w
                       if( c==zero ) then
                          if( a==zero ) then
                             if( .not.swtch ) then
                                if( orgati ) then
                                   a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi )
                                else
                                   a = z( ip1 )*z( ip1 ) +dtisq*dtisq*( dpsi+dphi )
                                end if
                             else
                                a = dtisq*dtisq*dpsi + dtipsq*dtipsq*dphi
                             end if
                          end if
                          eta = b / a
                       else if( a<=zero ) then
                          eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
                       else
                          eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) )
                       end if
                    end if
                 end if
                 ! note, eta should be positive if w is negative, and
                 ! eta should be negative otherwise. however,
                 ! if for some reason caused by roundoff, eta*w > 0,
                 ! we simply use one newton step instead. this way
                 ! will guarantee eta*w < 0.
                 if( w*eta>=zero )eta = -w / dw
                 eta = eta / ( sigma+sqrt( sigma*sigma+eta ) )
                 temp=tau+eta
                 if( temp>sgub .or. temp<sglb ) then
                    if( w<zero ) then
                       eta = ( sgub-tau ) / two
                    else
                       eta = ( sglb-tau ) / two
                    end if
                    if( geomavg ) then
                       if( w < zero ) then
                          if( tau > zero ) then
                             eta = sqrt(sgub*tau)-tau
                          end if
                       else
                          if( sglb > zero ) then
                             eta = sqrt(sglb*tau)-tau
                          end if
                       end if
                    end if
                 end if
                 prew = w
                 tau = tau + eta
                 sigma = sigma + eta
                 do j = 1, n
                    work( j ) = work( j ) + eta
                    delta( j ) = delta( j ) - eta
                 end do
                 ! evaluate psi and the derivative dpsi
                 dpsi = zero
                 psi = zero
                 erretm = zero
                 do j = 1, iim1
                    temp = z( j ) / ( work( j )*delta( j ) )
                    psi = psi + z( j )*temp
                    dpsi = dpsi + temp*temp
                    erretm = erretm + psi
                 end do
                 erretm = abs( erretm )
                 ! evaluate phi and the derivative dphi
                 dphi = zero
                 phi = zero
                 do j = n, iip1, -1
                    temp = z( j ) / ( work( j )*delta( j ) )
                    phi = phi + z( j )*temp
                    dphi = dphi + temp*temp
                    erretm = erretm + phi
                 end do
                 tau2 = work( ii )*delta( ii )
                 temp = z( ii ) / tau2
                 dw = dpsi + dphi + temp*temp
                 temp = z( ii )*temp
                 w = rhoinv + phi + psi + temp
                 erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp )
          ! $             + abs( tau2 )*dw
                 if( w*prew>zero .and. abs( w )>abs( prew ) / ten )swtch = .not.swtch
              end do loop_230
              ! return with info = 1, niter = maxit and not converged
              info = 1_${ik}$
           end if
           240 continue
           return
     end subroutine stdlib${ii}$_dlasd4

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lasd4( n, i, d, z, delta, rho, sigma, work, info )
     !! This subroutine computes the square root of the I-th updated
     !! eigenvalue of a positive symmetric rank-one modification to
     !! a positive diagonal matrix whose entries are given as the squares
     !! of the corresponding entries in the array d, and that
     !! 0 <= D(i) < D(j)  for  i < j
     !! and that RHO > 0. This is arranged by the calling routine, and is
     !! no loss in generality.  The rank-one modified system is thus
     !! diag( D ) * diag( D ) +  RHO * Z * Z_transpose.
     !! where we assume the Euclidean norm of Z is 1.
     !! The method consists of approximating the rational functions in the
     !! secular equation by simpler interpolating rational functions.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: i, n
           integer(${ik}$), intent(out) :: info
           real(${rk}$), intent(in) :: rho
           real(${rk}$), intent(out) :: sigma
           ! Array Arguments 
           real(${rk}$), intent(in) :: d(*), z(*)
           real(${rk}$), intent(out) :: delta(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: maxit = 400_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: orgati, swtch, swtch3, geomavg
           integer(${ik}$) :: ii, iim1, iip1, ip1, iter, j, niter
           real(${rk}$) :: a, b, c, delsq, delsq2, sq2, dphi, dpsi, dtiim, dtiip, dtipsq, dtisq, &
           dtnsq, dtnsq1, dw, eps, erretm, eta, phi, prew, psi, rhoinv, sglb, sgub, tau, tau2, &
                     temp, temp1, temp2, w
           ! Local Arrays 
           real(${rk}$) :: dd(3_${ik}$), zz(3_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! since this routine is called in an inner loop, we do no argument
           ! checking.
           ! quick return for n=1 and 2.
           info = 0_${ik}$
           if( n==1_${ik}$ ) then
              ! presumably, i=1 upon entry
              sigma = sqrt( d( 1_${ik}$ )*d( 1_${ik}$ )+rho*z( 1_${ik}$ )*z( 1_${ik}$ ) )
              delta( 1_${ik}$ ) = one
              work( 1_${ik}$ ) = one
              return
           end if
           if( n==2_${ik}$ ) then
              call stdlib${ii}$_${ri}$lasd5( i, d, z, delta, rho, sigma, work )
              return
           end if
           ! compute machine epsilon
           eps = stdlib${ii}$_${ri}$lamch( 'EPSILON' )
           rhoinv = one / rho
           tau2= zero
           ! the case i = n
           if( i==n ) then
              ! initialize some basic variables
              ii = n - 1_${ik}$
              niter = 1_${ik}$
              ! calculate initial guess
              temp = rho / two
              ! if ||z||_2 is not one, then temp should be set to
              ! rho * ||z||_2^2 / two
              temp1 = temp / ( d( n )+sqrt( d( n )*d( n )+temp ) )
              do j = 1, n
                 work( j ) = d( j ) + d( n ) + temp1
                 delta( j ) = ( d( j )-d( n ) ) - temp1
              end do
              psi = zero
              do j = 1, n - 2
                 psi = psi + z( j )*z( j ) / ( delta( j )*work( j ) )
              end do
              c = rhoinv + psi
              w = c + z( ii )*z( ii ) / ( delta( ii )*work( ii ) ) +z( n )*z( n ) / ( delta( n )&
                        *work( n ) )
              if( w<=zero ) then
                 temp1 = sqrt( d( n )*d( n )+rho )
                 temp = z( n-1 )*z( n-1 ) / ( ( d( n-1 )+temp1 )*( d( n )-d( n-1 )+rho / ( d( n )+&
                           temp1 ) ) ) +z( n )*z( n ) / rho
                 ! the following tau2 is to approximate
                 ! sigma_n^2 - d( n )*d( n )
                 if( c<=temp ) then
                    tau = rho
                 else
                    delsq = ( d( n )-d( n-1 ) )*( d( n )+d( n-1 ) )
                    a = -c*delsq + z( n-1 )*z( n-1 ) + z( n )*z( n )
                    b = z( n )*z( n )*delsq
                    if( a<zero ) then
                       tau2 = two*b / ( sqrt( a*a+four*b*c )-a )
                    else
                       tau2 = ( a+sqrt( a*a+four*b*c ) ) / ( two*c )
                    end if
                    tau = tau2 / ( d( n )+sqrt( d( n )*d( n )+tau2 ) )
                 end if
                 ! it can be proved that
                     ! d(n)^2+rho/2 <= sigma_n^2 < d(n)^2+tau2 <= d(n)^2+rho
              else
                 delsq = ( d( n )-d( n-1 ) )*( d( n )+d( n-1 ) )
                 a = -c*delsq + z( n-1 )*z( n-1 ) + z( n )*z( n )
                 b = z( n )*z( n )*delsq
                 ! the following tau2 is to approximate
                 ! sigma_n^2 - d( n )*d( n )
                 if( a<zero ) then
                    tau2 = two*b / ( sqrt( a*a+four*b*c )-a )
                 else
                    tau2 = ( a+sqrt( a*a+four*b*c ) ) / ( two*c )
                 end if
                 tau = tau2 / ( d( n )+sqrt( d( n )*d( n )+tau2 ) )
                 ! it can be proved that
                 ! d(n)^2 < d(n)^2+tau2 < sigma(n)^2 < d(n)^2+rho/2
              end if
              ! the following tau is to approximate sigma_n - d( n )
               ! tau = tau2 / ( d( n )+sqrt( d( n )*d( n )+tau2 ) )
              sigma = d( n ) + tau
              do j = 1, n
                 delta( j ) = ( d( j )-d( n ) ) - tau
                 work( j ) = d( j ) + d( n ) + tau
              end do
              ! evaluate psi and the derivative dpsi
              dpsi = zero
              psi = zero
              erretm = zero
              do j = 1, ii
                 temp = z( j ) / ( delta( j )*work( j ) )
                 psi = psi + z( j )*temp
                 dpsi = dpsi + temp*temp
                 erretm = erretm + psi
              end do
              erretm = abs( erretm )
              ! evaluate phi and the derivative dphi
              temp = z( n ) / ( delta( n )*work( n ) )
              phi = z( n )*temp
              dphi = temp*temp
              erretm = eight*( -phi-psi ) + erretm - phi + rhoinv
          ! $          + abs( tau2 )*( dpsi+dphi )
              w = rhoinv + phi + psi
              ! test for convergence
              if( abs( w )<=eps*erretm ) then
                 go to 240
              end if
              ! calculate the new step
              niter = niter + 1_${ik}$
              dtnsq1 = work( n-1 )*delta( n-1 )
              dtnsq = work( n )*delta( n )
              c = w - dtnsq1*dpsi - dtnsq*dphi
              a = ( dtnsq+dtnsq1 )*w - dtnsq*dtnsq1*( dpsi+dphi )
              b = dtnsq*dtnsq1*w
              if( c<zero )c = abs( c )
              if( c==zero ) then
                 eta = rho - sigma*sigma
              else if( a>=zero ) then
                 eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
              else
                 eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) )
              end if
              ! note, eta should be positive if w is negative, and
              ! eta should be negative otherwise. however,
              ! if for some reason caused by roundoff, eta*w > 0,
              ! we simply use one newton step instead. this way
              ! will guarantee eta*w < 0.
              if( w*eta>zero )eta = -w / ( dpsi+dphi )
              temp = eta - dtnsq
              if( temp>rho )eta = rho + dtnsq
              eta = eta / ( sigma+sqrt( eta+sigma*sigma ) )
              tau = tau + eta
              sigma = sigma + eta
              do j = 1, n
                 delta( j ) = delta( j ) - eta
                 work( j ) = work( j ) + eta
              end do
              ! evaluate psi and the derivative dpsi
              dpsi = zero
              psi = zero
              erretm = zero
              do j = 1, ii
                 temp = z( j ) / ( work( j )*delta( j ) )
                 psi = psi + z( j )*temp
                 dpsi = dpsi + temp*temp
                 erretm = erretm + psi
              end do
              erretm = abs( erretm )
              ! evaluate phi and the derivative dphi
              tau2 = work( n )*delta( n )
              temp = z( n ) / tau2
              phi = z( n )*temp
              dphi = temp*temp
              erretm = eight*( -phi-psi ) + erretm - phi + rhoinv
          ! $          + abs( tau2 )*( dpsi+dphi )
              w = rhoinv + phi + psi
              ! main loop to update the values of the array   delta
              iter = niter + 1_${ik}$
              loop_90: do niter = iter, maxit
                 ! test for convergence
                 if( abs( w )<=eps*erretm ) then
                    go to 240
                 end if
                 ! calculate the new step
                 dtnsq1 = work( n-1 )*delta( n-1 )
                 dtnsq = work( n )*delta( n )
                 c = w - dtnsq1*dpsi - dtnsq*dphi
                 a = ( dtnsq+dtnsq1 )*w - dtnsq1*dtnsq*( dpsi+dphi )
                 b = dtnsq1*dtnsq*w
                 if( a>=zero ) then
                    eta = ( a+sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
                 else
                    eta = two*b / ( a-sqrt( abs( a*a-four*b*c ) ) )
                 end if
                 ! note, eta should be positive if w is negative, and
                 ! eta should be negative otherwise. however,
                 ! if for some reason caused by roundoff, eta*w > 0,
                 ! we simply use one newton step instead. this way
                 ! will guarantee eta*w < 0.
                 if( w*eta>zero )eta = -w / ( dpsi+dphi )
                 temp = eta - dtnsq
                 if( temp<=zero )eta = eta / two
                 eta = eta / ( sigma+sqrt( eta+sigma*sigma ) )
                 tau = tau + eta
                 sigma = sigma + eta
                 do j = 1, n
                    delta( j ) = delta( j ) - eta
                    work( j ) = work( j ) + eta
                 end do
                 ! evaluate psi and the derivative dpsi
                 dpsi = zero
                 psi = zero
                 erretm = zero
                 do j = 1, ii
                    temp = z( j ) / ( work( j )*delta( j ) )
                    psi = psi + z( j )*temp
                    dpsi = dpsi + temp*temp
                    erretm = erretm + psi
                 end do
                 erretm = abs( erretm )
                 ! evaluate phi and the derivative dphi
                 tau2 = work( n )*delta( n )
                 temp = z( n ) / tau2
                 phi = z( n )*temp
                 dphi = temp*temp
                 erretm = eight*( -phi-psi ) + erretm - phi + rhoinv
          ! $             + abs( tau2 )*( dpsi+dphi )
                 w = rhoinv + phi + psi
              end do loop_90
              ! return with info = 1, niter = maxit and not converged
              info = 1_${ik}$
              go to 240
              ! end for the case i = n
           else
              ! the case for i < n
              niter = 1_${ik}$
              ip1 = i + 1_${ik}$
              ! calculate initial guess
              delsq = ( d( ip1 )-d( i ) )*( d( ip1 )+d( i ) )
              delsq2 = delsq / two
              sq2=sqrt( ( d( i )*d( i )+d( ip1 )*d( ip1 ) ) / two )
              temp = delsq2 / ( d( i )+sq2 )
              do j = 1, n
                 work( j ) = d( j ) + d( i ) + temp
                 delta( j ) = ( d( j )-d( i ) ) - temp
              end do
              psi = zero
              do j = 1, i - 1
                 psi = psi + z( j )*z( j ) / ( work( j )*delta( j ) )
              end do
              phi = zero
              do j = n, i + 2, -1
                 phi = phi + z( j )*z( j ) / ( work( j )*delta( j ) )
              end do
              c = rhoinv + psi + phi
              w = c + z( i )*z( i ) / ( work( i )*delta( i ) ) +z( ip1 )*z( ip1 ) / ( work( ip1 )&
                        *delta( ip1 ) )
              geomavg = .false.
              if( w>zero ) then
                 ! d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2
                 ! we choose d(i) as origin.
                 orgati = .true.
                 ii = i
                 sglb = zero
                 sgub = delsq2  / ( d( i )+sq2 )
                 a = c*delsq + z( i )*z( i ) + z( ip1 )*z( ip1 )
                 b = z( i )*z( i )*delsq
                 if( a>zero ) then
                    tau2 = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) )
                 else
                    tau2 = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
                 end if
                 ! tau2 now is an estimation of sigma^2 - d( i )^2. the
                 ! following, however, is the corresponding estimation of
                 ! sigma - d( i ).
                 tau = tau2 / ( d( i )+sqrt( d( i )*d( i )+tau2 ) )
                 temp = sqrt(eps)
                 if( (d(i)<=temp*d(ip1)).and.(abs(z(i))<=temp).and.(d(i)>zero) ) then
                    tau = min( ten*d(i), sgub )
                    geomavg = .true.
                 end if
              else
                 ! (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2
                 ! we choose d(i+1) as origin.
                 orgati = .false.
                 ii = ip1
                 sglb = -delsq2  / ( d( ii )+sq2 )
                 sgub = zero
                 a = c*delsq - z( i )*z( i ) - z( ip1 )*z( ip1 )
                 b = z( ip1 )*z( ip1 )*delsq
                 if( a<zero ) then
                    tau2 = two*b / ( a-sqrt( abs( a*a+four*b*c ) ) )
                 else
                    tau2 = -( a+sqrt( abs( a*a+four*b*c ) ) ) / ( two*c )
                 end if
                 ! tau2 now is an estimation of sigma^2 - d( ip1 )^2. the
                 ! following, however, is the corresponding estimation of
                 ! sigma - d( ip1 ).
                 tau = tau2 / ( d( ip1 )+sqrt( abs( d( ip1 )*d( ip1 )+tau2 ) ) )
              end if
              sigma = d( ii ) + tau
              do j = 1, n
                 work( j ) = d( j ) + d( ii ) + tau
                 delta( j ) = ( d( j )-d( ii ) ) - tau
              end do
              iim1 = ii - 1_${ik}$
              iip1 = ii + 1_${ik}$
              ! evaluate psi and the derivative dpsi
              dpsi = zero
              psi = zero
              erretm = zero
              do j = 1, iim1
                 temp = z( j ) / ( work( j )*delta( j ) )
                 psi = psi + z( j )*temp
                 dpsi = dpsi + temp*temp
                 erretm = erretm + psi
              end do
              erretm = abs( erretm )
              ! evaluate phi and the derivative dphi
              dphi = zero
              phi = zero
              do j = n, iip1, -1
                 temp = z( j ) / ( work( j )*delta( j ) )
                 phi = phi + z( j )*temp
                 dphi = dphi + temp*temp
                 erretm = erretm + phi
              end do
              w = rhoinv + phi + psi
              ! w is the value of the secular function with
              ! its ii-th element removed.
              swtch3 = .false.
              if( orgati ) then
                 if( w<zero )swtch3 = .true.
              else
                 if( w>zero )swtch3 = .true.
              end if
              if( ii==1_${ik}$ .or. ii==n )swtch3 = .false.
              temp = z( ii ) / ( work( ii )*delta( ii ) )
              dw = dpsi + dphi + temp*temp
              temp = z( ii )*temp
              w = w + temp
              erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp )
          ! $          + abs( tau2 )*dw
              ! test for convergence
              if( abs( w )<=eps*erretm ) then
                 go to 240
              end if
              if( w<=zero ) then
                 sglb = max( sglb, tau )
              else
                 sgub = min( sgub, tau )
              end if
              ! calculate the new step
              niter = niter + 1_${ik}$
              if( .not.swtch3 ) then
                 dtipsq = work( ip1 )*delta( ip1 )
                 dtisq = work( i )*delta( i )
                 if( orgati ) then
                    c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2_${ik}$
                 else
                    c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2_${ik}$
                 end if
                 a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw
                 b = dtipsq*dtisq*w
                 if( c==zero ) then
                    if( a==zero ) then
                       if( orgati ) then
                          a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi )
                       else
                          a = z( ip1 )*z( ip1 ) + dtisq*dtisq*( dpsi+dphi )
                       end if
                    end if
                    eta = b / a
                 else if( a<=zero ) then
                    eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
                 else
                    eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) )
                 end if
              else
                 ! interpolation using three most relevant poles
                 dtiim = work( iim1 )*delta( iim1 )
                 dtiip = work( iip1 )*delta( iip1 )
                 temp = rhoinv + psi + phi
                 if( orgati ) then
                    temp1 = z( iim1 ) / dtiim
                    temp1 = temp1*temp1
                    c = ( temp - dtiip*( dpsi+dphi ) ) -( d( iim1 )-d( iip1 ) )*( d( iim1 )+d( &
                              iip1 ) )*temp1
                    zz( 1_${ik}$ ) = z( iim1 )*z( iim1 )
                    if( dpsi<temp1 ) then
                       zz( 3_${ik}$ ) = dtiip*dtiip*dphi
                    else
                       zz( 3_${ik}$ ) = dtiip*dtiip*( ( dpsi-temp1 )+dphi )
                    end if
                 else
                    temp1 = z( iip1 ) / dtiip
                    temp1 = temp1*temp1
                    c = ( temp - dtiim*( dpsi+dphi ) ) -( d( iip1 )-d( iim1 ) )*( d( iim1 )+d( &
                              iip1 ) )*temp1
                    if( dphi<temp1 ) then
                       zz( 1_${ik}$ ) = dtiim*dtiim*dpsi
                    else
                       zz( 1_${ik}$ ) = dtiim*dtiim*( dpsi+( dphi-temp1 ) )
                    end if
                    zz( 3_${ik}$ ) = z( iip1 )*z( iip1 )
                 end if
                 zz( 2_${ik}$ ) = z( ii )*z( ii )
                 dd( 1_${ik}$ ) = dtiim
                 dd( 2_${ik}$ ) = delta( ii )*work( ii )
                 dd( 3_${ik}$ ) = dtiip
                 call stdlib${ii}$_${ri}$laed6( niter, orgati, c, dd, zz, w, eta, info )
                 if( info/=0_${ik}$ ) then
                    ! if info is not 0, i.e., stdlib${ii}$_${ri}$laed6 failed, switch back
                    ! to 2 pole interpolation.
                    swtch3 = .false.
                    info = 0_${ik}$
                    dtipsq = work( ip1 )*delta( ip1 )
                    dtisq = work( i )*delta( i )
                    if( orgati ) then
                       c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2_${ik}$
                    else
                       c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2_${ik}$
                    end if
                    a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw
                    b = dtipsq*dtisq*w
                    if( c==zero ) then
                       if( a==zero ) then
                          if( orgati ) then
                             a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi )
                          else
                             a = z( ip1 )*z( ip1 ) + dtisq*dtisq*( dpsi+dphi)
                          end if
                       end if
                       eta = b / a
                    else if( a<=zero ) then
                       eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
                    else
                       eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) )
                    end if
                 end if
              end if
              ! note, eta should be positive if w is negative, and
              ! eta should be negative otherwise. however,
              ! if for some reason caused by roundoff, eta*w > 0,
              ! we simply use one newton step instead. this way
              ! will guarantee eta*w < 0.
              if( w*eta>=zero )eta = -w / dw
              eta = eta / ( sigma+sqrt( sigma*sigma+eta ) )
              temp = tau + eta
              if( temp>sgub .or. temp<sglb ) then
                 if( w<zero ) then
                    eta = ( sgub-tau ) / two
                 else
                    eta = ( sglb-tau ) / two
                 end if
                 if( geomavg ) then
                    if( w < zero ) then
                       if( tau > zero ) then
                          eta = sqrt(sgub*tau)-tau
                       end if
                    else
                       if( sglb > zero ) then
                          eta = sqrt(sglb*tau)-tau
                       end if
                    end if
                 end if
              end if
              prew = w
              tau = tau + eta
              sigma = sigma + eta
              do j = 1, n
                 work( j ) = work( j ) + eta
                 delta( j ) = delta( j ) - eta
              end do
              ! evaluate psi and the derivative dpsi
              dpsi = zero
              psi = zero
              erretm = zero
              do j = 1, iim1
                 temp = z( j ) / ( work( j )*delta( j ) )
                 psi = psi + z( j )*temp
                 dpsi = dpsi + temp*temp
                 erretm = erretm + psi
              end do
              erretm = abs( erretm )
              ! evaluate phi and the derivative dphi
              dphi = zero
              phi = zero
              do j = n, iip1, -1
                 temp = z( j ) / ( work( j )*delta( j ) )
                 phi = phi + z( j )*temp
                 dphi = dphi + temp*temp
                 erretm = erretm + phi
              end do
              tau2 = work( ii )*delta( ii )
              temp = z( ii ) / tau2
              dw = dpsi + dphi + temp*temp
              temp = z( ii )*temp
              w = rhoinv + phi + psi + temp
              erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp )
          ! $          + abs( tau2 )*dw
              swtch = .false.
              if( orgati ) then
                 if( -w>abs( prew ) / ten )swtch = .true.
              else
                 if( w>abs( prew ) / ten )swtch = .true.
              end if
              ! main loop to update the values of the array   delta and work
              iter = niter + 1_${ik}$
              loop_230: do niter = iter, maxit
                 ! test for convergence
                 if( abs( w )<=eps*erretm ) then
           ! $          .or. (sgub-sglb)<=eight*abs(sgub+sglb) ) then
                    go to 240
                 end if
                 if( w<=zero ) then
                    sglb = max( sglb, tau )
                 else
                    sgub = min( sgub, tau )
                 end if
                 ! calculate the new step
                 if( .not.swtch3 ) then
                    dtipsq = work( ip1 )*delta( ip1 )
                    dtisq = work( i )*delta( i )
                    if( .not.swtch ) then
                       if( orgati ) then
                          c = w - dtipsq*dw + delsq*( z( i ) / dtisq )**2_${ik}$
                       else
                          c = w - dtisq*dw - delsq*( z( ip1 ) / dtipsq )**2_${ik}$
                       end if
                    else
                       temp = z( ii ) / ( work( ii )*delta( ii ) )
                       if( orgati ) then
                          dpsi = dpsi + temp*temp
                       else
                          dphi = dphi + temp*temp
                       end if
                       c = w - dtisq*dpsi - dtipsq*dphi
                    end if
                    a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw
                    b = dtipsq*dtisq*w
                    if( c==zero ) then
                       if( a==zero ) then
                          if( .not.swtch ) then
                             if( orgati ) then
                                a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi )
                             else
                                a = z( ip1 )*z( ip1 ) +dtisq*dtisq*( dpsi+dphi )
                             end if
                          else
                             a = dtisq*dtisq*dpsi + dtipsq*dtipsq*dphi
                          end if
                       end if
                       eta = b / a
                    else if( a<=zero ) then
                       eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
                    else
                       eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) )
                    end if
                 else
                    ! interpolation using three most relevant poles
                    dtiim = work( iim1 )*delta( iim1 )
                    dtiip = work( iip1 )*delta( iip1 )
                    temp = rhoinv + psi + phi
                    if( swtch ) then
                       c = temp - dtiim*dpsi - dtiip*dphi
                       zz( 1_${ik}$ ) = dtiim*dtiim*dpsi
                       zz( 3_${ik}$ ) = dtiip*dtiip*dphi
                    else
                       if( orgati ) then
                          temp1 = z( iim1 ) / dtiim
                          temp1 = temp1*temp1
                          temp2 = ( d( iim1 )-d( iip1 ) )*( d( iim1 )+d( iip1 ) )*temp1
                          c = temp - dtiip*( dpsi+dphi ) - temp2
                          zz( 1_${ik}$ ) = z( iim1 )*z( iim1 )
                          if( dpsi<temp1 ) then
                             zz( 3_${ik}$ ) = dtiip*dtiip*dphi
                          else
                             zz( 3_${ik}$ ) = dtiip*dtiip*( ( dpsi-temp1 )+dphi )
                          end if
                       else
                          temp1 = z( iip1 ) / dtiip
                          temp1 = temp1*temp1
                          temp2 = ( d( iip1 )-d( iim1 ) )*( d( iim1 )+d( iip1 ) )*temp1
                          c = temp - dtiim*( dpsi+dphi ) - temp2
                          if( dphi<temp1 ) then
                             zz( 1_${ik}$ ) = dtiim*dtiim*dpsi
                          else
                             zz( 1_${ik}$ ) = dtiim*dtiim*( dpsi+( dphi-temp1 ) )
                          end if
                          zz( 3_${ik}$ ) = z( iip1 )*z( iip1 )
                       end if
                    end if
                    dd( 1_${ik}$ ) = dtiim
                    dd( 2_${ik}$ ) = delta( ii )*work( ii )
                    dd( 3_${ik}$ ) = dtiip
                    call stdlib${ii}$_${ri}$laed6( niter, orgati, c, dd, zz, w, eta, info )
                    if( info/=0_${ik}$ ) then
                       ! if info is not 0, i.e., stdlib${ii}$_${ri}$laed6 failed, switch
                       ! back to two pole interpolation
                       swtch3 = .false.
                       info = 0_${ik}$
                       dtipsq = work( ip1 )*delta( ip1 )
                       dtisq = work( i )*delta( i )
                       if( .not.swtch ) then
                          if( orgati ) then
                             c = w - dtipsq*dw + delsq*( z( i )/dtisq )**2_${ik}$
                          else
                             c = w - dtisq*dw - delsq*( z( ip1 )/dtipsq )**2_${ik}$
                          end if
                       else
                          temp = z( ii ) / ( work( ii )*delta( ii ) )
                          if( orgati ) then
                             dpsi = dpsi + temp*temp
                          else
                             dphi = dphi + temp*temp
                          end if
                          c = w - dtisq*dpsi - dtipsq*dphi
                       end if
                       a = ( dtipsq+dtisq )*w - dtipsq*dtisq*dw
                       b = dtipsq*dtisq*w
                       if( c==zero ) then
                          if( a==zero ) then
                             if( .not.swtch ) then
                                if( orgati ) then
                                   a = z( i )*z( i ) + dtipsq*dtipsq*( dpsi+dphi )
                                else
                                   a = z( ip1 )*z( ip1 ) +dtisq*dtisq*( dpsi+dphi )
                                end if
                             else
                                a = dtisq*dtisq*dpsi + dtipsq*dtipsq*dphi
                             end if
                          end if
                          eta = b / a
                       else if( a<=zero ) then
                          eta = ( a-sqrt( abs( a*a-four*b*c ) ) ) / ( two*c )
                       else
                          eta = two*b / ( a+sqrt( abs( a*a-four*b*c ) ) )
                       end if
                    end if
                 end if
                 ! note, eta should be positive if w is negative, and
                 ! eta should be negative otherwise. however,
                 ! if for some reason caused by roundoff, eta*w > 0,
                 ! we simply use one newton step instead. this way
                 ! will guarantee eta*w < 0.
                 if( w*eta>=zero )eta = -w / dw
                 eta = eta / ( sigma+sqrt( sigma*sigma+eta ) )
                 temp=tau+eta
                 if( temp>sgub .or. temp<sglb ) then
                    if( w<zero ) then
                       eta = ( sgub-tau ) / two
                    else
                       eta = ( sglb-tau ) / two
                    end if
                    if( geomavg ) then
                       if( w < zero ) then
                          if( tau > zero ) then
                             eta = sqrt(sgub*tau)-tau
                          end if
                       else
                          if( sglb > zero ) then
                             eta = sqrt(sglb*tau)-tau
                          end if
                       end if
                    end if
                 end if
                 prew = w
                 tau = tau + eta
                 sigma = sigma + eta
                 do j = 1, n
                    work( j ) = work( j ) + eta
                    delta( j ) = delta( j ) - eta
                 end do
                 ! evaluate psi and the derivative dpsi
                 dpsi = zero
                 psi = zero
                 erretm = zero
                 do j = 1, iim1
                    temp = z( j ) / ( work( j )*delta( j ) )
                    psi = psi + z( j )*temp
                    dpsi = dpsi + temp*temp
                    erretm = erretm + psi
                 end do
                 erretm = abs( erretm )
                 ! evaluate phi and the derivative dphi
                 dphi = zero
                 phi = zero
                 do j = n, iip1, -1
                    temp = z( j ) / ( work( j )*delta( j ) )
                    phi = phi + z( j )*temp
                    dphi = dphi + temp*temp
                    erretm = erretm + phi
                 end do
                 tau2 = work( ii )*delta( ii )
                 temp = z( ii ) / tau2
                 dw = dpsi + dphi + temp*temp
                 temp = z( ii )*temp
                 w = rhoinv + phi + psi + temp
                 erretm = eight*( phi-psi ) + erretm + two*rhoinv+ three*abs( temp )
          ! $             + abs( tau2 )*dw
                 if( w*prew>zero .and. abs( w )>abs( prew ) / ten )swtch = .not.swtch
              end do loop_230
              ! return with info = 1, niter = maxit and not converged
              info = 1_${ik}$
           end if
           240 continue
           return
     end subroutine stdlib${ii}$_${ri}$lasd4

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slasd5( i, d, z, delta, rho, dsigma, work )
     !! This subroutine computes the square root of the I-th eigenvalue
     !! of a positive symmetric rank-one modification of a 2-by-2 diagonal
     !! matrix
     !! diag( D ) * diag( D ) +  RHO * Z * transpose(Z) .
     !! The diagonal entries in the array D are assumed to satisfy
     !! 0 <= D(i) < D(j)  for  i < j .
     !! We also assume RHO > 0 and that the Euclidean norm of the vector
     !! Z is one.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: i
           real(sp), intent(out) :: dsigma
           real(sp), intent(in) :: rho
           ! Array Arguments 
           real(sp), intent(in) :: d(2_${ik}$), z(2_${ik}$)
           real(sp), intent(out) :: delta(2_${ik}$), work(2_${ik}$)
        ! =====================================================================
           
           ! Local Scalars 
           real(sp) :: b, c, del, delsq, tau, w
           ! Intrinsic Functions 
           ! Executable Statements 
           del = d( 2_${ik}$ ) - d( 1_${ik}$ )
           delsq = del*( d( 2_${ik}$ )+d( 1_${ik}$ ) )
           if( i==1_${ik}$ ) then
              w = one + four*rho*( z( 2_${ik}$ )*z( 2_${ik}$ ) / ( d( 1_${ik}$ )+three*d( 2_${ik}$ ) )-z( 1_${ik}$ )*z( 1_${ik}$ ) / ( &
                        three*d( 1_${ik}$ )+d( 2_${ik}$ ) ) ) / del
              if( w>zero ) then
                 b = delsq + rho*( z( 1_${ik}$ )*z( 1_${ik}$ )+z( 2_${ik}$ )*z( 2_${ik}$ ) )
                 c = rho*z( 1_${ik}$ )*z( 1_${ik}$ )*delsq
                 ! b > zero, always
                 ! the following tau is dsigma * dsigma - d( 1 ) * d( 1 )
                 tau = two*c / ( b+sqrt( abs( b*b-four*c ) ) )
                 ! the following tau is dsigma - d( 1 )
                 tau = tau / ( d( 1_${ik}$ )+sqrt( d( 1_${ik}$ )*d( 1_${ik}$ )+tau ) )
                 dsigma = d( 1_${ik}$ ) + tau
                 delta( 1_${ik}$ ) = -tau
                 delta( 2_${ik}$ ) = del - tau
                 work( 1_${ik}$ ) = two*d( 1_${ik}$ ) + tau
                 work( 2_${ik}$ ) = ( d( 1_${ik}$ )+tau ) + d( 2_${ik}$ )
                 ! delta( 1 ) = -z( 1 ) / tau
                 ! delta( 2 ) = z( 2 ) / ( del-tau )
              else
                 b = -delsq + rho*( z( 1_${ik}$ )*z( 1_${ik}$ )+z( 2_${ik}$ )*z( 2_${ik}$ ) )
                 c = rho*z( 2_${ik}$ )*z( 2_${ik}$ )*delsq
                 ! the following tau is dsigma * dsigma - d( 2 ) * d( 2 )
                 if( b>zero ) then
                    tau = -two*c / ( b+sqrt( b*b+four*c ) )
                 else
                    tau = ( b-sqrt( b*b+four*c ) ) / two
                 end if
                 ! the following tau is dsigma - d( 2 )
                 tau = tau / ( d( 2_${ik}$ )+sqrt( abs( d( 2_${ik}$ )*d( 2_${ik}$ )+tau ) ) )
                 dsigma = d( 2_${ik}$ ) + tau
                 delta( 1_${ik}$ ) = -( del+tau )
                 delta( 2_${ik}$ ) = -tau
                 work( 1_${ik}$ ) = d( 1_${ik}$ ) + tau + d( 2_${ik}$ )
                 work( 2_${ik}$ ) = two*d( 2_${ik}$ ) + tau
                 ! delta( 1 ) = -z( 1 ) / ( del+tau )
                 ! delta( 2 ) = -z( 2 ) / tau
              end if
              ! temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) )
              ! delta( 1 ) = delta( 1 ) / temp
              ! delta( 2 ) = delta( 2 ) / temp
           else
              ! now i=2
              b = -delsq + rho*( z( 1_${ik}$ )*z( 1_${ik}$ )+z( 2_${ik}$ )*z( 2_${ik}$ ) )
              c = rho*z( 2_${ik}$ )*z( 2_${ik}$ )*delsq
              ! the following tau is dsigma * dsigma - d( 2 ) * d( 2 )
              if( b>zero ) then
                 tau = ( b+sqrt( b*b+four*c ) ) / two
              else
                 tau = two*c / ( -b+sqrt( b*b+four*c ) )
              end if
              ! the following tau is dsigma - d( 2 )
              tau = tau / ( d( 2_${ik}$ )+sqrt( d( 2_${ik}$ )*d( 2_${ik}$ )+tau ) )
              dsigma = d( 2_${ik}$ ) + tau
              delta( 1_${ik}$ ) = -( del+tau )
              delta( 2_${ik}$ ) = -tau
              work( 1_${ik}$ ) = d( 1_${ik}$ ) + tau + d( 2_${ik}$ )
              work( 2_${ik}$ ) = two*d( 2_${ik}$ ) + tau
              ! delta( 1 ) = -z( 1 ) / ( del+tau )
              ! delta( 2 ) = -z( 2 ) / tau
              ! temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) )
              ! delta( 1 ) = delta( 1 ) / temp
              ! delta( 2 ) = delta( 2 ) / temp
           end if
           return
     end subroutine stdlib${ii}$_slasd5

     pure module subroutine stdlib${ii}$_dlasd5( i, d, z, delta, rho, dsigma, work )
     !! This subroutine computes the square root of the I-th eigenvalue
     !! of a positive symmetric rank-one modification of a 2-by-2 diagonal
     !! matrix
     !! diag( D ) * diag( D ) +  RHO * Z * transpose(Z) .
     !! The diagonal entries in the array D are assumed to satisfy
     !! 0 <= D(i) < D(j)  for  i < j .
     !! We also assume RHO > 0 and that the Euclidean norm of the vector
     !! Z is one.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: i
           real(dp), intent(out) :: dsigma
           real(dp), intent(in) :: rho
           ! Array Arguments 
           real(dp), intent(in) :: d(2_${ik}$), z(2_${ik}$)
           real(dp), intent(out) :: delta(2_${ik}$), work(2_${ik}$)
        ! =====================================================================
           
           ! Local Scalars 
           real(dp) :: b, c, del, delsq, tau, w
           ! Intrinsic Functions 
           ! Executable Statements 
           del = d( 2_${ik}$ ) - d( 1_${ik}$ )
           delsq = del*( d( 2_${ik}$ )+d( 1_${ik}$ ) )
           if( i==1_${ik}$ ) then
              w = one + four*rho*( z( 2_${ik}$ )*z( 2_${ik}$ ) / ( d( 1_${ik}$ )+three*d( 2_${ik}$ ) )-z( 1_${ik}$ )*z( 1_${ik}$ ) / ( &
                        three*d( 1_${ik}$ )+d( 2_${ik}$ ) ) ) / del
              if( w>zero ) then
                 b = delsq + rho*( z( 1_${ik}$ )*z( 1_${ik}$ )+z( 2_${ik}$ )*z( 2_${ik}$ ) )
                 c = rho*z( 1_${ik}$ )*z( 1_${ik}$ )*delsq
                 ! b > zero, always
                 ! the following tau is dsigma * dsigma - d( 1 ) * d( 1 )
                 tau = two*c / ( b+sqrt( abs( b*b-four*c ) ) )
                 ! the following tau is dsigma - d( 1 )
                 tau = tau / ( d( 1_${ik}$ )+sqrt( d( 1_${ik}$ )*d( 1_${ik}$ )+tau ) )
                 dsigma = d( 1_${ik}$ ) + tau
                 delta( 1_${ik}$ ) = -tau
                 delta( 2_${ik}$ ) = del - tau
                 work( 1_${ik}$ ) = two*d( 1_${ik}$ ) + tau
                 work( 2_${ik}$ ) = ( d( 1_${ik}$ )+tau ) + d( 2_${ik}$ )
                 ! delta( 1 ) = -z( 1 ) / tau
                 ! delta( 2 ) = z( 2 ) / ( del-tau )
              else
                 b = -delsq + rho*( z( 1_${ik}$ )*z( 1_${ik}$ )+z( 2_${ik}$ )*z( 2_${ik}$ ) )
                 c = rho*z( 2_${ik}$ )*z( 2_${ik}$ )*delsq
                 ! the following tau is dsigma * dsigma - d( 2 ) * d( 2 )
                 if( b>zero ) then
                    tau = -two*c / ( b+sqrt( b*b+four*c ) )
                 else
                    tau = ( b-sqrt( b*b+four*c ) ) / two
                 end if
                 ! the following tau is dsigma - d( 2 )
                 tau = tau / ( d( 2_${ik}$ )+sqrt( abs( d( 2_${ik}$ )*d( 2_${ik}$ )+tau ) ) )
                 dsigma = d( 2_${ik}$ ) + tau
                 delta( 1_${ik}$ ) = -( del+tau )
                 delta( 2_${ik}$ ) = -tau
                 work( 1_${ik}$ ) = d( 1_${ik}$ ) + tau + d( 2_${ik}$ )
                 work( 2_${ik}$ ) = two*d( 2_${ik}$ ) + tau
                 ! delta( 1 ) = -z( 1 ) / ( del+tau )
                 ! delta( 2 ) = -z( 2 ) / tau
              end if
              ! temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) )
              ! delta( 1 ) = delta( 1 ) / temp
              ! delta( 2 ) = delta( 2 ) / temp
           else
              ! now i=2
              b = -delsq + rho*( z( 1_${ik}$ )*z( 1_${ik}$ )+z( 2_${ik}$ )*z( 2_${ik}$ ) )
              c = rho*z( 2_${ik}$ )*z( 2_${ik}$ )*delsq
              ! the following tau is dsigma * dsigma - d( 2 ) * d( 2 )
              if( b>zero ) then
                 tau = ( b+sqrt( b*b+four*c ) ) / two
              else
                 tau = two*c / ( -b+sqrt( b*b+four*c ) )
              end if
              ! the following tau is dsigma - d( 2 )
              tau = tau / ( d( 2_${ik}$ )+sqrt( d( 2_${ik}$ )*d( 2_${ik}$ )+tau ) )
              dsigma = d( 2_${ik}$ ) + tau
              delta( 1_${ik}$ ) = -( del+tau )
              delta( 2_${ik}$ ) = -tau
              work( 1_${ik}$ ) = d( 1_${ik}$ ) + tau + d( 2_${ik}$ )
              work( 2_${ik}$ ) = two*d( 2_${ik}$ ) + tau
              ! delta( 1 ) = -z( 1 ) / ( del+tau )
              ! delta( 2 ) = -z( 2 ) / tau
              ! temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) )
              ! delta( 1 ) = delta( 1 ) / temp
              ! delta( 2 ) = delta( 2 ) / temp
           end if
           return
     end subroutine stdlib${ii}$_dlasd5

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lasd5( i, d, z, delta, rho, dsigma, work )
     !! This subroutine computes the square root of the I-th eigenvalue
     !! of a positive symmetric rank-one modification of a 2-by-2 diagonal
     !! matrix
     !! diag( D ) * diag( D ) +  RHO * Z * transpose(Z) .
     !! The diagonal entries in the array D are assumed to satisfy
     !! 0 <= D(i) < D(j)  for  i < j .
     !! We also assume RHO > 0 and that the Euclidean norm of the vector
     !! Z is one.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: i
           real(${rk}$), intent(out) :: dsigma
           real(${rk}$), intent(in) :: rho
           ! Array Arguments 
           real(${rk}$), intent(in) :: d(2_${ik}$), z(2_${ik}$)
           real(${rk}$), intent(out) :: delta(2_${ik}$), work(2_${ik}$)
        ! =====================================================================
           
           ! Local Scalars 
           real(${rk}$) :: b, c, del, delsq, tau, w
           ! Intrinsic Functions 
           ! Executable Statements 
           del = d( 2_${ik}$ ) - d( 1_${ik}$ )
           delsq = del*( d( 2_${ik}$ )+d( 1_${ik}$ ) )
           if( i==1_${ik}$ ) then
              w = one + four*rho*( z( 2_${ik}$ )*z( 2_${ik}$ ) / ( d( 1_${ik}$ )+three*d( 2_${ik}$ ) )-z( 1_${ik}$ )*z( 1_${ik}$ ) / ( &
                        three*d( 1_${ik}$ )+d( 2_${ik}$ ) ) ) / del
              if( w>zero ) then
                 b = delsq + rho*( z( 1_${ik}$ )*z( 1_${ik}$ )+z( 2_${ik}$ )*z( 2_${ik}$ ) )
                 c = rho*z( 1_${ik}$ )*z( 1_${ik}$ )*delsq
                 ! b > zero, always
                 ! the following tau is dsigma * dsigma - d( 1 ) * d( 1 )
                 tau = two*c / ( b+sqrt( abs( b*b-four*c ) ) )
                 ! the following tau is dsigma - d( 1 )
                 tau = tau / ( d( 1_${ik}$ )+sqrt( d( 1_${ik}$ )*d( 1_${ik}$ )+tau ) )
                 dsigma = d( 1_${ik}$ ) + tau
                 delta( 1_${ik}$ ) = -tau
                 delta( 2_${ik}$ ) = del - tau
                 work( 1_${ik}$ ) = two*d( 1_${ik}$ ) + tau
                 work( 2_${ik}$ ) = ( d( 1_${ik}$ )+tau ) + d( 2_${ik}$ )
                 ! delta( 1 ) = -z( 1 ) / tau
                 ! delta( 2 ) = z( 2 ) / ( del-tau )
              else
                 b = -delsq + rho*( z( 1_${ik}$ )*z( 1_${ik}$ )+z( 2_${ik}$ )*z( 2_${ik}$ ) )
                 c = rho*z( 2_${ik}$ )*z( 2_${ik}$ )*delsq
                 ! the following tau is dsigma * dsigma - d( 2 ) * d( 2 )
                 if( b>zero ) then
                    tau = -two*c / ( b+sqrt( b*b+four*c ) )
                 else
                    tau = ( b-sqrt( b*b+four*c ) ) / two
                 end if
                 ! the following tau is dsigma - d( 2 )
                 tau = tau / ( d( 2_${ik}$ )+sqrt( abs( d( 2_${ik}$ )*d( 2_${ik}$ )+tau ) ) )
                 dsigma = d( 2_${ik}$ ) + tau
                 delta( 1_${ik}$ ) = -( del+tau )
                 delta( 2_${ik}$ ) = -tau
                 work( 1_${ik}$ ) = d( 1_${ik}$ ) + tau + d( 2_${ik}$ )
                 work( 2_${ik}$ ) = two*d( 2_${ik}$ ) + tau
                 ! delta( 1 ) = -z( 1 ) / ( del+tau )
                 ! delta( 2 ) = -z( 2 ) / tau
              end if
              ! temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) )
              ! delta( 1 ) = delta( 1 ) / temp
              ! delta( 2 ) = delta( 2 ) / temp
           else
              ! now i=2
              b = -delsq + rho*( z( 1_${ik}$ )*z( 1_${ik}$ )+z( 2_${ik}$ )*z( 2_${ik}$ ) )
              c = rho*z( 2_${ik}$ )*z( 2_${ik}$ )*delsq
              ! the following tau is dsigma * dsigma - d( 2 ) * d( 2 )
              if( b>zero ) then
                 tau = ( b+sqrt( b*b+four*c ) ) / two
              else
                 tau = two*c / ( -b+sqrt( b*b+four*c ) )
              end if
              ! the following tau is dsigma - d( 2 )
              tau = tau / ( d( 2_${ik}$ )+sqrt( d( 2_${ik}$ )*d( 2_${ik}$ )+tau ) )
              dsigma = d( 2_${ik}$ ) + tau
              delta( 1_${ik}$ ) = -( del+tau )
              delta( 2_${ik}$ ) = -tau
              work( 1_${ik}$ ) = d( 1_${ik}$ ) + tau + d( 2_${ik}$ )
              work( 2_${ik}$ ) = two*d( 2_${ik}$ ) + tau
              ! delta( 1 ) = -z( 1 ) / ( del+tau )
              ! delta( 2 ) = -z( 2 ) / tau
              ! temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) )
              ! delta( 1 ) = delta( 1 ) / temp
              ! delta( 2 ) = delta( 2 ) / temp
           end if
           return
     end subroutine stdlib${ii}$_${ri}$lasd5

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slasdq( uplo, sqre, n, ncvt, nru, ncc, d, e, vt, ldvt,u, ldu, c, ldc, &
     !! SLASDQ computes the singular value decomposition (SVD) of a real
     !! (upper or lower) bidiagonal matrix with diagonal D and offdiagonal
     !! E, accumulating the transformations if desired. Letting B denote
     !! the input bidiagonal matrix, the algorithm computes orthogonal
     !! matrices Q and P such that B = Q * S * P**T (P**T denotes the transpose
     !! of P). The singular values S are overwritten on D.
     !! The input matrix U  is changed to U  * Q  if desired.
     !! The input matrix VT is changed to P**T * VT if desired.
     !! The input matrix C  is changed to Q**T * C  if desired.
     !! See "Computing  Small Singular Values of Bidiagonal Matrices With
     !! Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
     !! LAPACK Working Note #3, for a detailed description of the algorithm.
               work, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ldc, ldu, ldvt, n, ncc, ncvt, nru, sqre
           ! Array Arguments 
           real(sp), intent(inout) :: c(ldc,*), d(*), e(*), u(ldu,*), vt(ldvt,*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: rotate
           integer(${ik}$) :: i, isub, iuplo, j, np1, sqre1
           real(sp) :: cs, r, smin, sn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           iuplo = 0_${ik}$
           if( stdlib_lsame( uplo, 'U' ) )iuplo = 1_${ik}$
           if( stdlib_lsame( uplo, 'L' ) )iuplo = 2_${ik}$
           if( iuplo==0_${ik}$ ) then
              info = -1_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ncvt<0_${ik}$ ) then
              info = -4_${ik}$
           else if( nru<0_${ik}$ ) then
              info = -5_${ik}$
           else if( ncc<0_${ik}$ ) then
              info = -6_${ik}$
           else if( ( ncvt==0_${ik}$ .and. ldvt<1_${ik}$ ) .or.( ncvt>0_${ik}$ .and. ldvt<max( 1_${ik}$, n ) ) ) then
              info = -10_${ik}$
           else if( ldu<max( 1_${ik}$, nru ) ) then
              info = -12_${ik}$
           else if( ( ncc==0_${ik}$ .and. ldc<1_${ik}$ ) .or.( ncc>0_${ik}$ .and. ldc<max( 1_${ik}$, n ) ) ) then
              info = -14_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SLASDQ', -info )
              return
           end if
           if( n==0 )return
           ! rotate is true if any singular vectors desired, false otherwise
           rotate = ( ncvt>0_${ik}$ ) .or. ( nru>0_${ik}$ ) .or. ( ncc>0_${ik}$ )
           np1 = n + 1_${ik}$
           sqre1 = sqre
           ! if matrix non-square upper bidiagonal, rotate to be lower
           ! bidiagonal.  the rotations are on the right.
           if( ( iuplo==1_${ik}$ ) .and. ( sqre1==1_${ik}$ ) ) then
              do i = 1, n - 1
                 call stdlib${ii}$_slartg( d( i ), e( i ), cs, sn, r )
                 d( i ) = r
                 e( i ) = sn*d( i+1 )
                 d( i+1 ) = cs*d( i+1 )
                 if( rotate ) then
                    work( i ) = cs
                    work( n+i ) = sn
                 end if
              end do
              call stdlib${ii}$_slartg( d( n ), e( n ), cs, sn, r )
              d( n ) = r
              e( n ) = zero
              if( rotate ) then
                 work( n ) = cs
                 work( n+n ) = sn
              end if
              iuplo = 2_${ik}$
              sqre1 = 0_${ik}$
              ! update singular vectors if desired.
              if( ncvt>0_${ik}$ )call stdlib${ii}$_slasr( 'L', 'V', 'F', np1, ncvt, work( 1_${ik}$ ),work( np1 ), vt, &
                        ldvt )
           end if
           ! if matrix lower bidiagonal, rotate to be upper bidiagonal
           ! by applying givens rotations on the left.
           if( iuplo==2_${ik}$ ) then
              do i = 1, n - 1
                 call stdlib${ii}$_slartg( d( i ), e( i ), cs, sn, r )
                 d( i ) = r
                 e( i ) = sn*d( i+1 )
                 d( i+1 ) = cs*d( i+1 )
                 if( rotate ) then
                    work( i ) = cs
                    work( n+i ) = sn
                 end if
              end do
              ! if matrix (n+1)-by-n lower bidiagonal, one additional
              ! rotation is needed.
              if( sqre1==1_${ik}$ ) then
                 call stdlib${ii}$_slartg( d( n ), e( n ), cs, sn, r )
                 d( n ) = r
                 if( rotate ) then
                    work( n ) = cs
                    work( n+n ) = sn
                 end if
              end if
              ! update singular vectors if desired.
              if( nru>0_${ik}$ ) then
                 if( sqre1==0_${ik}$ ) then
                    call stdlib${ii}$_slasr( 'R', 'V', 'F', nru, n, work( 1_${ik}$ ),work( np1 ), u, ldu )
                              
                 else
                    call stdlib${ii}$_slasr( 'R', 'V', 'F', nru, np1, work( 1_${ik}$ ),work( np1 ), u, ldu )
                              
                 end if
              end if
              if( ncc>0_${ik}$ ) then
                 if( sqre1==0_${ik}$ ) then
                    call stdlib${ii}$_slasr( 'L', 'V', 'F', n, ncc, work( 1_${ik}$ ),work( np1 ), c, ldc )
                              
                 else
                    call stdlib${ii}$_slasr( 'L', 'V', 'F', np1, ncc, work( 1_${ik}$ ),work( np1 ), c, ldc )
                              
                 end if
              end if
           end if
           ! call stdlib${ii}$_sbdsqr to compute the svd of the reduced real
           ! n-by-n upper bidiagonal matrix.
           call stdlib${ii}$_sbdsqr( 'U', n, ncvt, nru, ncc, d, e, vt, ldvt, u, ldu, c,ldc, work, info )
                     
           ! sort the singular values into ascending order (insertion sort on
           ! singular values, but only one transposition per singular vector)
           do i = 1, n
              ! scan for smallest d(i).
              isub = i
              smin = d( i )
              do j = i + 1, n
                 if( d( j )<smin ) then
                    isub = j
                    smin = d( j )
                 end if
              end do
              if( isub/=i ) then
                 ! swap singular values and vectors.
                 d( isub ) = d( i )
                 d( i ) = smin
                 if( ncvt>0_${ik}$ )call stdlib${ii}$_sswap( ncvt, vt( isub, 1_${ik}$ ), ldvt, vt( i, 1_${ik}$ ), ldvt )
                           
                 if( nru>0_${ik}$ )call stdlib${ii}$_sswap( nru, u( 1_${ik}$, isub ), 1_${ik}$, u( 1_${ik}$, i ), 1_${ik}$ )
                 if( ncc>0_${ik}$ )call stdlib${ii}$_sswap( ncc, c( isub, 1_${ik}$ ), ldc, c( i, 1_${ik}$ ), ldc )
              end if
           end do
           return
     end subroutine stdlib${ii}$_slasdq

     pure module subroutine stdlib${ii}$_dlasdq( uplo, sqre, n, ncvt, nru, ncc, d, e, vt, ldvt,u, ldu, c, ldc, &
     !! DLASDQ computes the singular value decomposition (SVD) of a real
     !! (upper or lower) bidiagonal matrix with diagonal D and offdiagonal
     !! E, accumulating the transformations if desired. Letting B denote
     !! the input bidiagonal matrix, the algorithm computes orthogonal
     !! matrices Q and P such that B = Q * S * P**T (P**T denotes the transpose
     !! of P). The singular values S are overwritten on D.
     !! The input matrix U  is changed to U  * Q  if desired.
     !! The input matrix VT is changed to P**T * VT if desired.
     !! The input matrix C  is changed to Q**T * C  if desired.
     !! See "Computing  Small Singular Values of Bidiagonal Matrices With
     !! Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
     !! LAPACK Working Note #3, for a detailed description of the algorithm.
               work, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ldc, ldu, ldvt, n, ncc, ncvt, nru, sqre
           ! Array Arguments 
           real(dp), intent(inout) :: c(ldc,*), d(*), e(*), u(ldu,*), vt(ldvt,*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: rotate
           integer(${ik}$) :: i, isub, iuplo, j, np1, sqre1
           real(dp) :: cs, r, smin, sn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           iuplo = 0_${ik}$
           if( stdlib_lsame( uplo, 'U' ) )iuplo = 1_${ik}$
           if( stdlib_lsame( uplo, 'L' ) )iuplo = 2_${ik}$
           if( iuplo==0_${ik}$ ) then
              info = -1_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ncvt<0_${ik}$ ) then
              info = -4_${ik}$
           else if( nru<0_${ik}$ ) then
              info = -5_${ik}$
           else if( ncc<0_${ik}$ ) then
              info = -6_${ik}$
           else if( ( ncvt==0_${ik}$ .and. ldvt<1_${ik}$ ) .or.( ncvt>0_${ik}$ .and. ldvt<max( 1_${ik}$, n ) ) ) then
              info = -10_${ik}$
           else if( ldu<max( 1_${ik}$, nru ) ) then
              info = -12_${ik}$
           else if( ( ncc==0_${ik}$ .and. ldc<1_${ik}$ ) .or.( ncc>0_${ik}$ .and. ldc<max( 1_${ik}$, n ) ) ) then
              info = -14_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASDQ', -info )
              return
           end if
           if( n==0 )return
           ! rotate is true if any singular vectors desired, false otherwise
           rotate = ( ncvt>0_${ik}$ ) .or. ( nru>0_${ik}$ ) .or. ( ncc>0_${ik}$ )
           np1 = n + 1_${ik}$
           sqre1 = sqre
           ! if matrix non-square upper bidiagonal, rotate to be lower
           ! bidiagonal.  the rotations are on the right.
           if( ( iuplo==1_${ik}$ ) .and. ( sqre1==1_${ik}$ ) ) then
              do i = 1, n - 1
                 call stdlib${ii}$_dlartg( d( i ), e( i ), cs, sn, r )
                 d( i ) = r
                 e( i ) = sn*d( i+1 )
                 d( i+1 ) = cs*d( i+1 )
                 if( rotate ) then
                    work( i ) = cs
                    work( n+i ) = sn
                 end if
              end do
              call stdlib${ii}$_dlartg( d( n ), e( n ), cs, sn, r )
              d( n ) = r
              e( n ) = zero
              if( rotate ) then
                 work( n ) = cs
                 work( n+n ) = sn
              end if
              iuplo = 2_${ik}$
              sqre1 = 0_${ik}$
              ! update singular vectors if desired.
              if( ncvt>0_${ik}$ )call stdlib${ii}$_dlasr( 'L', 'V', 'F', np1, ncvt, work( 1_${ik}$ ),work( np1 ), vt, &
                        ldvt )
           end if
           ! if matrix lower bidiagonal, rotate to be upper bidiagonal
           ! by applying givens rotations on the left.
           if( iuplo==2_${ik}$ ) then
              do i = 1, n - 1
                 call stdlib${ii}$_dlartg( d( i ), e( i ), cs, sn, r )
                 d( i ) = r
                 e( i ) = sn*d( i+1 )
                 d( i+1 ) = cs*d( i+1 )
                 if( rotate ) then
                    work( i ) = cs
                    work( n+i ) = sn
                 end if
              end do
              ! if matrix (n+1)-by-n lower bidiagonal, one additional
              ! rotation is needed.
              if( sqre1==1_${ik}$ ) then
                 call stdlib${ii}$_dlartg( d( n ), e( n ), cs, sn, r )
                 d( n ) = r
                 if( rotate ) then
                    work( n ) = cs
                    work( n+n ) = sn
                 end if
              end if
              ! update singular vectors if desired.
              if( nru>0_${ik}$ ) then
                 if( sqre1==0_${ik}$ ) then
                    call stdlib${ii}$_dlasr( 'R', 'V', 'F', nru, n, work( 1_${ik}$ ),work( np1 ), u, ldu )
                              
                 else
                    call stdlib${ii}$_dlasr( 'R', 'V', 'F', nru, np1, work( 1_${ik}$ ),work( np1 ), u, ldu )
                              
                 end if
              end if
              if( ncc>0_${ik}$ ) then
                 if( sqre1==0_${ik}$ ) then
                    call stdlib${ii}$_dlasr( 'L', 'V', 'F', n, ncc, work( 1_${ik}$ ),work( np1 ), c, ldc )
                              
                 else
                    call stdlib${ii}$_dlasr( 'L', 'V', 'F', np1, ncc, work( 1_${ik}$ ),work( np1 ), c, ldc )
                              
                 end if
              end if
           end if
           ! call stdlib${ii}$_dbdsqr to compute the svd of the reduced real
           ! n-by-n upper bidiagonal matrix.
           call stdlib${ii}$_dbdsqr( 'U', n, ncvt, nru, ncc, d, e, vt, ldvt, u, ldu, c,ldc, work, info )
                     
           ! sort the singular values into ascending order (insertion sort on
           ! singular values, but only one transposition per singular vector)
           do i = 1, n
              ! scan for smallest d(i).
              isub = i
              smin = d( i )
              do j = i + 1, n
                 if( d( j )<smin ) then
                    isub = j
                    smin = d( j )
                 end if
              end do
              if( isub/=i ) then
                 ! swap singular values and vectors.
                 d( isub ) = d( i )
                 d( i ) = smin
                 if( ncvt>0_${ik}$ )call stdlib${ii}$_dswap( ncvt, vt( isub, 1_${ik}$ ), ldvt, vt( i, 1_${ik}$ ), ldvt )
                           
                 if( nru>0_${ik}$ )call stdlib${ii}$_dswap( nru, u( 1_${ik}$, isub ), 1_${ik}$, u( 1_${ik}$, i ), 1_${ik}$ )
                 if( ncc>0_${ik}$ )call stdlib${ii}$_dswap( ncc, c( isub, 1_${ik}$ ), ldc, c( i, 1_${ik}$ ), ldc )
              end if
           end do
           return
     end subroutine stdlib${ii}$_dlasdq

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lasdq( uplo, sqre, n, ncvt, nru, ncc, d, e, vt, ldvt,u, ldu, c, ldc, &
     !! DLASDQ: computes the singular value decomposition (SVD) of a real
     !! (upper or lower) bidiagonal matrix with diagonal D and offdiagonal
     !! E, accumulating the transformations if desired. Letting B denote
     !! the input bidiagonal matrix, the algorithm computes orthogonal
     !! matrices Q and P such that B = Q * S * P**T (P**T denotes the transpose
     !! of P). The singular values S are overwritten on D.
     !! The input matrix U  is changed to U  * Q  if desired.
     !! The input matrix VT is changed to P**T * VT if desired.
     !! The input matrix C  is changed to Q**T * C  if desired.
     !! See "Computing  Small Singular Values of Bidiagonal Matrices With
     !! Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
     !! LAPACK Working Note #3, for a detailed description of the algorithm.
               work, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: uplo
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ldc, ldu, ldvt, n, ncc, ncvt, nru, sqre
           ! Array Arguments 
           real(${rk}$), intent(inout) :: c(ldc,*), d(*), e(*), u(ldu,*), vt(ldvt,*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: rotate
           integer(${ik}$) :: i, isub, iuplo, j, np1, sqre1
           real(${rk}$) :: cs, r, smin, sn
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           iuplo = 0_${ik}$
           if( stdlib_lsame( uplo, 'U' ) )iuplo = 1_${ik}$
           if( stdlib_lsame( uplo, 'L' ) )iuplo = 2_${ik}$
           if( iuplo==0_${ik}$ ) then
              info = -1_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ncvt<0_${ik}$ ) then
              info = -4_${ik}$
           else if( nru<0_${ik}$ ) then
              info = -5_${ik}$
           else if( ncc<0_${ik}$ ) then
              info = -6_${ik}$
           else if( ( ncvt==0_${ik}$ .and. ldvt<1_${ik}$ ) .or.( ncvt>0_${ik}$ .and. ldvt<max( 1_${ik}$, n ) ) ) then
              info = -10_${ik}$
           else if( ldu<max( 1_${ik}$, nru ) ) then
              info = -12_${ik}$
           else if( ( ncc==0_${ik}$ .and. ldc<1_${ik}$ ) .or.( ncc>0_${ik}$ .and. ldc<max( 1_${ik}$, n ) ) ) then
              info = -14_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASDQ', -info )
              return
           end if
           if( n==0 )return
           ! rotate is true if any singular vectors desired, false otherwise
           rotate = ( ncvt>0_${ik}$ ) .or. ( nru>0_${ik}$ ) .or. ( ncc>0_${ik}$ )
           np1 = n + 1_${ik}$
           sqre1 = sqre
           ! if matrix non-square upper bidiagonal, rotate to be lower
           ! bidiagonal.  the rotations are on the right.
           if( ( iuplo==1_${ik}$ ) .and. ( sqre1==1_${ik}$ ) ) then
              do i = 1, n - 1
                 call stdlib${ii}$_${ri}$lartg( d( i ), e( i ), cs, sn, r )
                 d( i ) = r
                 e( i ) = sn*d( i+1 )
                 d( i+1 ) = cs*d( i+1 )
                 if( rotate ) then
                    work( i ) = cs
                    work( n+i ) = sn
                 end if
              end do
              call stdlib${ii}$_${ri}$lartg( d( n ), e( n ), cs, sn, r )
              d( n ) = r
              e( n ) = zero
              if( rotate ) then
                 work( n ) = cs
                 work( n+n ) = sn
              end if
              iuplo = 2_${ik}$
              sqre1 = 0_${ik}$
              ! update singular vectors if desired.
              if( ncvt>0_${ik}$ )call stdlib${ii}$_${ri}$lasr( 'L', 'V', 'F', np1, ncvt, work( 1_${ik}$ ),work( np1 ), vt, &
                        ldvt )
           end if
           ! if matrix lower bidiagonal, rotate to be upper bidiagonal
           ! by applying givens rotations on the left.
           if( iuplo==2_${ik}$ ) then
              do i = 1, n - 1
                 call stdlib${ii}$_${ri}$lartg( d( i ), e( i ), cs, sn, r )
                 d( i ) = r
                 e( i ) = sn*d( i+1 )
                 d( i+1 ) = cs*d( i+1 )
                 if( rotate ) then
                    work( i ) = cs
                    work( n+i ) = sn
                 end if
              end do
              ! if matrix (n+1)-by-n lower bidiagonal, one additional
              ! rotation is needed.
              if( sqre1==1_${ik}$ ) then
                 call stdlib${ii}$_${ri}$lartg( d( n ), e( n ), cs, sn, r )
                 d( n ) = r
                 if( rotate ) then
                    work( n ) = cs
                    work( n+n ) = sn
                 end if
              end if
              ! update singular vectors if desired.
              if( nru>0_${ik}$ ) then
                 if( sqre1==0_${ik}$ ) then
                    call stdlib${ii}$_${ri}$lasr( 'R', 'V', 'F', nru, n, work( 1_${ik}$ ),work( np1 ), u, ldu )
                              
                 else
                    call stdlib${ii}$_${ri}$lasr( 'R', 'V', 'F', nru, np1, work( 1_${ik}$ ),work( np1 ), u, ldu )
                              
                 end if
              end if
              if( ncc>0_${ik}$ ) then
                 if( sqre1==0_${ik}$ ) then
                    call stdlib${ii}$_${ri}$lasr( 'L', 'V', 'F', n, ncc, work( 1_${ik}$ ),work( np1 ), c, ldc )
                              
                 else
                    call stdlib${ii}$_${ri}$lasr( 'L', 'V', 'F', np1, ncc, work( 1_${ik}$ ),work( np1 ), c, ldc )
                              
                 end if
              end if
           end if
           ! call stdlib${ii}$_${ri}$bdsqr to compute the svd of the reduced real
           ! n-by-n upper bidiagonal matrix.
           call stdlib${ii}$_${ri}$bdsqr( 'U', n, ncvt, nru, ncc, d, e, vt, ldvt, u, ldu, c,ldc, work, info )
                     
           ! sort the singular values into ascending order (insertion sort on
           ! singular values, but only one transposition per singular vector)
           do i = 1, n
              ! scan for smallest d(i).
              isub = i
              smin = d( i )
              do j = i + 1, n
                 if( d( j )<smin ) then
                    isub = j
                    smin = d( j )
                 end if
              end do
              if( isub/=i ) then
                 ! swap singular values and vectors.
                 d( isub ) = d( i )
                 d( i ) = smin
                 if( ncvt>0_${ik}$ )call stdlib${ii}$_${ri}$swap( ncvt, vt( isub, 1_${ik}$ ), ldvt, vt( i, 1_${ik}$ ), ldvt )
                           
                 if( nru>0_${ik}$ )call stdlib${ii}$_${ri}$swap( nru, u( 1_${ik}$, isub ), 1_${ik}$, u( 1_${ik}$, i ), 1_${ik}$ )
                 if( ncc>0_${ik}$ )call stdlib${ii}$_${ri}$swap( ncc, c( isub, 1_${ik}$ ), ldc, c( i, 1_${ik}$ ), ldc )
              end if
           end do
           return
     end subroutine stdlib${ii}$_${ri}$lasdq

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slasda( icompq, smlsiz, n, sqre, d, e, u, ldu, vt, k,difl, difr, z, &
     !! Using a divide and conquer approach, SLASDA: computes the singular
     !! value decomposition (SVD) of a real upper bidiagonal N-by-M matrix
     !! B with diagonal D and offdiagonal E, where M = N + SQRE. The
     !! algorithm computes the singular values in the SVD B = U * S * VT.
     !! The orthogonal matrices U and VT are optionally computed in
     !! compact form.
     !! A related subroutine, SLASD0, computes the singular values and
     !! the singular vectors in explicit form.
               poles, givptr, givcol, ldgcol,perm, givnum, c, s, work, iwork, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: icompq, ldgcol, ldu, n, smlsiz, sqre
           integer(${ik}$), intent(out) :: info
           ! Array Arguments 
           integer(${ik}$), intent(out) :: givcol(ldgcol,*), givptr(*), iwork(*), k(*), perm(ldgcol,&
                     *)
           real(sp), intent(out) :: c(*), difl(ldu,*), difr(ldu,*), givnum(ldu,*), poles(ldu,*), &
                     s(*), u(ldu,*), vt(ldu,*), work(*), z(ldu,*)
           real(sp), intent(inout) :: d(*), e(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, i1, ic, idxq, idxqi, im1, inode, itemp, iwk, j, lf, ll, lvl, lvl2, &
           m, ncc, nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, nru, nwork1, &
                     nwork2, smlszp, sqrei, vf, vfi, vl, vli
           real(sp) :: alpha, beta
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then
              info = -1_${ik}$
           else if( smlsiz<3_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -4_${ik}$
           else if( ldu<( n+sqre ) ) then
              info = -8_${ik}$
           else if( ldgcol<n ) then
              info = -17_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SLASDA', -info )
              return
           end if
           m = n + sqre
           ! if the input matrix is too small, call stdlib${ii}$_slasdq to find the svd.
           if( n<=smlsiz ) then
              if( icompq==0_${ik}$ ) then
                 call stdlib${ii}$_slasdq( 'U', sqre, n, 0_${ik}$, 0_${ik}$, 0_${ik}$, d, e, vt, ldu, u, ldu,u, ldu, work, &
                           info )
              else
                 call stdlib${ii}$_slasdq( 'U', sqre, n, m, n, 0_${ik}$, d, e, vt, ldu, u, ldu,u, ldu, work, &
                           info )
              end if
              return
           end if
           ! book-keeping and  set up the computation tree.
           inode = 1_${ik}$
           ndiml = inode + n
           ndimr = ndiml + n
           idxq = ndimr + n
           iwk = idxq + n
           ncc = 0_${ik}$
           nru = 0_${ik}$
           smlszp = smlsiz + 1_${ik}$
           vf = 1_${ik}$
           vl = vf + m
           nwork1 = vl + m
           nwork2 = nwork1 + smlszp*smlszp
           call stdlib${ii}$_slasdt( n, nlvl, nd, iwork( inode ), iwork( ndiml ),iwork( ndimr ), smlsiz &
                     )
           ! for the nodes on bottom level of the tree, solve
           ! their subproblems by stdlib${ii}$_slasdq.
           ndb1 = ( nd+1 ) / 2_${ik}$
           loop_30: do i = ndb1, nd
              ! ic : center row of each node
              ! nl : number of rows of left  subproblem
              ! nr : number of rows of right subproblem
              ! nlf: starting row of the left   subproblem
              ! nrf: starting row of the right  subproblem
              i1 = i - 1_${ik}$
              ic = iwork( inode+i1 )
              nl = iwork( ndiml+i1 )
              nlp1 = nl + 1_${ik}$
              nr = iwork( ndimr+i1 )
              nlf = ic - nl
              nrf = ic + 1_${ik}$
              idxqi = idxq + nlf - 2_${ik}$
              vfi = vf + nlf - 1_${ik}$
              vli = vl + nlf - 1_${ik}$
              sqrei = 1_${ik}$
              if( icompq==0_${ik}$ ) then
                 call stdlib${ii}$_slaset( 'A', nlp1, nlp1, zero, one, work( nwork1 ),smlszp )
                 call stdlib${ii}$_slasdq( 'U', sqrei, nl, nlp1, nru, ncc, d( nlf ),e( nlf ), work( &
                 nwork1 ), smlszp,work( nwork2 ), nl, work( nwork2 ), nl,work( nwork2 ), info )
                           
                 itemp = nwork1 + nl*smlszp
                 call stdlib${ii}$_scopy( nlp1, work( nwork1 ), 1_${ik}$, work( vfi ), 1_${ik}$ )
                 call stdlib${ii}$_scopy( nlp1, work( itemp ), 1_${ik}$, work( vli ), 1_${ik}$ )
              else
                 call stdlib${ii}$_slaset( 'A', nl, nl, zero, one, u( nlf, 1_${ik}$ ), ldu )
                 call stdlib${ii}$_slaset( 'A', nlp1, nlp1, zero, one, vt( nlf, 1_${ik}$ ), ldu )
                 call stdlib${ii}$_slasdq( 'U', sqrei, nl, nlp1, nl, ncc, d( nlf ),e( nlf ), vt( nlf, 1_${ik}$ &
                           ), ldu, u( nlf, 1_${ik}$ ), ldu,u( nlf, 1_${ik}$ ), ldu, work( nwork1 ), info )
                 call stdlib${ii}$_scopy( nlp1, vt( nlf, 1_${ik}$ ), 1_${ik}$, work( vfi ), 1_${ik}$ )
                 call stdlib${ii}$_scopy( nlp1, vt( nlf, nlp1 ), 1_${ik}$, work( vli ), 1_${ik}$ )
              end if
              if( info/=0_${ik}$ ) then
                 return
              end if
              do j = 1, nl
                 iwork( idxqi+j ) = j
              end do
              if( ( i==nd ) .and. ( sqre==0_${ik}$ ) ) then
                 sqrei = 0_${ik}$
              else
                 sqrei = 1_${ik}$
              end if
              idxqi = idxqi + nlp1
              vfi = vfi + nlp1
              vli = vli + nlp1
              nrp1 = nr + sqrei
              if( icompq==0_${ik}$ ) then
                 call stdlib${ii}$_slaset( 'A', nrp1, nrp1, zero, one, work( nwork1 ),smlszp )
                 call stdlib${ii}$_slasdq( 'U', sqrei, nr, nrp1, nru, ncc, d( nrf ),e( nrf ), work( &
                 nwork1 ), smlszp,work( nwork2 ), nr, work( nwork2 ), nr,work( nwork2 ), info )
                           
                 itemp = nwork1 + ( nrp1-1 )*smlszp
                 call stdlib${ii}$_scopy( nrp1, work( nwork1 ), 1_${ik}$, work( vfi ), 1_${ik}$ )
                 call stdlib${ii}$_scopy( nrp1, work( itemp ), 1_${ik}$, work( vli ), 1_${ik}$ )
              else
                 call stdlib${ii}$_slaset( 'A', nr, nr, zero, one, u( nrf, 1_${ik}$ ), ldu )
                 call stdlib${ii}$_slaset( 'A', nrp1, nrp1, zero, one, vt( nrf, 1_${ik}$ ), ldu )
                 call stdlib${ii}$_slasdq( 'U', sqrei, nr, nrp1, nr, ncc, d( nrf ),e( nrf ), vt( nrf, 1_${ik}$ &
                           ), ldu, u( nrf, 1_${ik}$ ), ldu,u( nrf, 1_${ik}$ ), ldu, work( nwork1 ), info )
                 call stdlib${ii}$_scopy( nrp1, vt( nrf, 1_${ik}$ ), 1_${ik}$, work( vfi ), 1_${ik}$ )
                 call stdlib${ii}$_scopy( nrp1, vt( nrf, nrp1 ), 1_${ik}$, work( vli ), 1_${ik}$ )
              end if
              if( info/=0_${ik}$ ) then
                 return
              end if
              do j = 1, nr
                 iwork( idxqi+j ) = j
              end do
           end do loop_30
           ! now conquer each subproblem bottom-up.
           j = 2_${ik}$**nlvl
           loop_50: do lvl = nlvl, 1, -1
              lvl2 = lvl*2_${ik}$ - 1_${ik}$
              ! find the first node lf and last node ll on
              ! the current level lvl.
              if( lvl==1_${ik}$ ) then
                 lf = 1_${ik}$
                 ll = 1_${ik}$
              else
                 lf = 2_${ik}$**( lvl-1 )
                 ll = 2_${ik}$*lf - 1_${ik}$
              end if
              loop_40: do i = lf, ll
                 im1 = i - 1_${ik}$
                 ic = iwork( inode+im1 )
                 nl = iwork( ndiml+im1 )
                 nr = iwork( ndimr+im1 )
                 nlf = ic - nl
                 nrf = ic + 1_${ik}$
                 if( i==ll ) then
                    sqrei = sqre
                 else
                    sqrei = 1_${ik}$
                 end if
                 vfi = vf + nlf - 1_${ik}$
                 vli = vl + nlf - 1_${ik}$
                 idxqi = idxq + nlf - 1_${ik}$
                 alpha = d( ic )
                 beta = e( ic )
                 if( icompq==0_${ik}$ ) then
                    call stdlib${ii}$_slasd6( icompq, nl, nr, sqrei, d( nlf ),work( vfi ), work( vli ), &
                    alpha, beta,iwork( idxqi ), perm, givptr( 1_${ik}$ ), givcol,ldgcol, givnum, ldu, &
                    poles, difl, difr, z,k( 1_${ik}$ ), c( 1_${ik}$ ), s( 1_${ik}$ ), work( nwork1 ),iwork( iwk ), &
                              info )
                 else
                    j = j - 1_${ik}$
                    call stdlib${ii}$_slasd6( icompq, nl, nr, sqrei, d( nlf ),work( vfi ), work( vli ), &
                    alpha, beta,iwork( idxqi ), perm( nlf, lvl ),givptr( j ), givcol( nlf, lvl2 ),&
                     ldgcol,givnum( nlf, lvl2 ), ldu,poles( nlf, lvl2 ), difl( nlf, lvl ),difr( &
                     nlf, lvl2 ), z( nlf, lvl ), k( j ),c( j ), s( j ), work( nwork1 ),iwork( iwk &
                               ), info )
                 end if
                 if( info/=0_${ik}$ ) then
                    return
                 end if
              end do loop_40
           end do loop_50
           return
     end subroutine stdlib${ii}$_slasda

     pure module subroutine stdlib${ii}$_dlasda( icompq, smlsiz, n, sqre, d, e, u, ldu, vt, k,difl, difr, z, &
     !! Using a divide and conquer approach, DLASDA: computes the singular
     !! value decomposition (SVD) of a real upper bidiagonal N-by-M matrix
     !! B with diagonal D and offdiagonal E, where M = N + SQRE. The
     !! algorithm computes the singular values in the SVD B = U * S * VT.
     !! The orthogonal matrices U and VT are optionally computed in
     !! compact form.
     !! A related subroutine, DLASD0, computes the singular values and
     !! the singular vectors in explicit form.
               poles, givptr, givcol, ldgcol,perm, givnum, c, s, work, iwork, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: icompq, ldgcol, ldu, n, smlsiz, sqre
           integer(${ik}$), intent(out) :: info
           ! Array Arguments 
           integer(${ik}$), intent(out) :: givcol(ldgcol,*), givptr(*), iwork(*), k(*), perm(ldgcol,&
                     *)
           real(dp), intent(out) :: c(*), difl(ldu,*), difr(ldu,*), givnum(ldu,*), poles(ldu,*), &
                     s(*), u(ldu,*), vt(ldu,*), work(*), z(ldu,*)
           real(dp), intent(inout) :: d(*), e(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, i1, ic, idxq, idxqi, im1, inode, itemp, iwk, j, lf, ll, lvl, lvl2, &
           m, ncc, nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, nru, nwork1, &
                     nwork2, smlszp, sqrei, vf, vfi, vl, vli
           real(dp) :: alpha, beta
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then
              info = -1_${ik}$
           else if( smlsiz<3_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -4_${ik}$
           else if( ldu<( n+sqre ) ) then
              info = -8_${ik}$
           else if( ldgcol<n ) then
              info = -17_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASDA', -info )
              return
           end if
           m = n + sqre
           ! if the input matrix is too small, call stdlib${ii}$_dlasdq to find the svd.
           if( n<=smlsiz ) then
              if( icompq==0_${ik}$ ) then
                 call stdlib${ii}$_dlasdq( 'U', sqre, n, 0_${ik}$, 0_${ik}$, 0_${ik}$, d, e, vt, ldu, u, ldu,u, ldu, work, &
                           info )
              else
                 call stdlib${ii}$_dlasdq( 'U', sqre, n, m, n, 0_${ik}$, d, e, vt, ldu, u, ldu,u, ldu, work, &
                           info )
              end if
              return
           end if
           ! book-keeping and  set up the computation tree.
           inode = 1_${ik}$
           ndiml = inode + n
           ndimr = ndiml + n
           idxq = ndimr + n
           iwk = idxq + n
           ncc = 0_${ik}$
           nru = 0_${ik}$
           smlszp = smlsiz + 1_${ik}$
           vf = 1_${ik}$
           vl = vf + m
           nwork1 = vl + m
           nwork2 = nwork1 + smlszp*smlszp
           call stdlib${ii}$_dlasdt( n, nlvl, nd, iwork( inode ), iwork( ndiml ),iwork( ndimr ), smlsiz &
                     )
           ! for the nodes on bottom level of the tree, solve
           ! their subproblems by stdlib${ii}$_dlasdq.
           ndb1 = ( nd+1 ) / 2_${ik}$
           loop_30: do i = ndb1, nd
              ! ic : center row of each node
              ! nl : number of rows of left  subproblem
              ! nr : number of rows of right subproblem
              ! nlf: starting row of the left   subproblem
              ! nrf: starting row of the right  subproblem
              i1 = i - 1_${ik}$
              ic = iwork( inode+i1 )
              nl = iwork( ndiml+i1 )
              nlp1 = nl + 1_${ik}$
              nr = iwork( ndimr+i1 )
              nlf = ic - nl
              nrf = ic + 1_${ik}$
              idxqi = idxq + nlf - 2_${ik}$
              vfi = vf + nlf - 1_${ik}$
              vli = vl + nlf - 1_${ik}$
              sqrei = 1_${ik}$
              if( icompq==0_${ik}$ ) then
                 call stdlib${ii}$_dlaset( 'A', nlp1, nlp1, zero, one, work( nwork1 ),smlszp )
                 call stdlib${ii}$_dlasdq( 'U', sqrei, nl, nlp1, nru, ncc, d( nlf ),e( nlf ), work( &
                 nwork1 ), smlszp,work( nwork2 ), nl, work( nwork2 ), nl,work( nwork2 ), info )
                           
                 itemp = nwork1 + nl*smlszp
                 call stdlib${ii}$_dcopy( nlp1, work( nwork1 ), 1_${ik}$, work( vfi ), 1_${ik}$ )
                 call stdlib${ii}$_dcopy( nlp1, work( itemp ), 1_${ik}$, work( vli ), 1_${ik}$ )
              else
                 call stdlib${ii}$_dlaset( 'A', nl, nl, zero, one, u( nlf, 1_${ik}$ ), ldu )
                 call stdlib${ii}$_dlaset( 'A', nlp1, nlp1, zero, one, vt( nlf, 1_${ik}$ ), ldu )
                 call stdlib${ii}$_dlasdq( 'U', sqrei, nl, nlp1, nl, ncc, d( nlf ),e( nlf ), vt( nlf, 1_${ik}$ &
                           ), ldu, u( nlf, 1_${ik}$ ), ldu,u( nlf, 1_${ik}$ ), ldu, work( nwork1 ), info )
                 call stdlib${ii}$_dcopy( nlp1, vt( nlf, 1_${ik}$ ), 1_${ik}$, work( vfi ), 1_${ik}$ )
                 call stdlib${ii}$_dcopy( nlp1, vt( nlf, nlp1 ), 1_${ik}$, work( vli ), 1_${ik}$ )
              end if
              if( info/=0_${ik}$ ) then
                 return
              end if
              do j = 1, nl
                 iwork( idxqi+j ) = j
              end do
              if( ( i==nd ) .and. ( sqre==0_${ik}$ ) ) then
                 sqrei = 0_${ik}$
              else
                 sqrei = 1_${ik}$
              end if
              idxqi = idxqi + nlp1
              vfi = vfi + nlp1
              vli = vli + nlp1
              nrp1 = nr + sqrei
              if( icompq==0_${ik}$ ) then
                 call stdlib${ii}$_dlaset( 'A', nrp1, nrp1, zero, one, work( nwork1 ),smlszp )
                 call stdlib${ii}$_dlasdq( 'U', sqrei, nr, nrp1, nru, ncc, d( nrf ),e( nrf ), work( &
                 nwork1 ), smlszp,work( nwork2 ), nr, work( nwork2 ), nr,work( nwork2 ), info )
                           
                 itemp = nwork1 + ( nrp1-1 )*smlszp
                 call stdlib${ii}$_dcopy( nrp1, work( nwork1 ), 1_${ik}$, work( vfi ), 1_${ik}$ )
                 call stdlib${ii}$_dcopy( nrp1, work( itemp ), 1_${ik}$, work( vli ), 1_${ik}$ )
              else
                 call stdlib${ii}$_dlaset( 'A', nr, nr, zero, one, u( nrf, 1_${ik}$ ), ldu )
                 call stdlib${ii}$_dlaset( 'A', nrp1, nrp1, zero, one, vt( nrf, 1_${ik}$ ), ldu )
                 call stdlib${ii}$_dlasdq( 'U', sqrei, nr, nrp1, nr, ncc, d( nrf ),e( nrf ), vt( nrf, 1_${ik}$ &
                           ), ldu, u( nrf, 1_${ik}$ ), ldu,u( nrf, 1_${ik}$ ), ldu, work( nwork1 ), info )
                 call stdlib${ii}$_dcopy( nrp1, vt( nrf, 1_${ik}$ ), 1_${ik}$, work( vfi ), 1_${ik}$ )
                 call stdlib${ii}$_dcopy( nrp1, vt( nrf, nrp1 ), 1_${ik}$, work( vli ), 1_${ik}$ )
              end if
              if( info/=0_${ik}$ ) then
                 return
              end if
              do j = 1, nr
                 iwork( idxqi+j ) = j
              end do
           end do loop_30
           ! now conquer each subproblem bottom-up.
           j = 2_${ik}$**nlvl
           loop_50: do lvl = nlvl, 1, -1
              lvl2 = lvl*2_${ik}$ - 1_${ik}$
              ! find the first node lf and last node ll on
              ! the current level lvl.
              if( lvl==1_${ik}$ ) then
                 lf = 1_${ik}$
                 ll = 1_${ik}$
              else
                 lf = 2_${ik}$**( lvl-1 )
                 ll = 2_${ik}$*lf - 1_${ik}$
              end if
              loop_40: do i = lf, ll
                 im1 = i - 1_${ik}$
                 ic = iwork( inode+im1 )
                 nl = iwork( ndiml+im1 )
                 nr = iwork( ndimr+im1 )
                 nlf = ic - nl
                 nrf = ic + 1_${ik}$
                 if( i==ll ) then
                    sqrei = sqre
                 else
                    sqrei = 1_${ik}$
                 end if
                 vfi = vf + nlf - 1_${ik}$
                 vli = vl + nlf - 1_${ik}$
                 idxqi = idxq + nlf - 1_${ik}$
                 alpha = d( ic )
                 beta = e( ic )
                 if( icompq==0_${ik}$ ) then
                    call stdlib${ii}$_dlasd6( icompq, nl, nr, sqrei, d( nlf ),work( vfi ), work( vli ), &
                    alpha, beta,iwork( idxqi ), perm, givptr( 1_${ik}$ ), givcol,ldgcol, givnum, ldu, &
                    poles, difl, difr, z,k( 1_${ik}$ ), c( 1_${ik}$ ), s( 1_${ik}$ ), work( nwork1 ),iwork( iwk ), &
                              info )
                 else
                    j = j - 1_${ik}$
                    call stdlib${ii}$_dlasd6( icompq, nl, nr, sqrei, d( nlf ),work( vfi ), work( vli ), &
                    alpha, beta,iwork( idxqi ), perm( nlf, lvl ),givptr( j ), givcol( nlf, lvl2 ),&
                     ldgcol,givnum( nlf, lvl2 ), ldu,poles( nlf, lvl2 ), difl( nlf, lvl ),difr( &
                     nlf, lvl2 ), z( nlf, lvl ), k( j ),c( j ), s( j ), work( nwork1 ),iwork( iwk &
                               ), info )
                 end if
                 if( info/=0_${ik}$ ) then
                    return
                 end if
              end do loop_40
           end do loop_50
           return
     end subroutine stdlib${ii}$_dlasda

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lasda( icompq, smlsiz, n, sqre, d, e, u, ldu, vt, k,difl, difr, z, &
     !! Using a divide and conquer approach, DLASDA: computes the singular
     !! value decomposition (SVD) of a real upper bidiagonal N-by-M matrix
     !! B with diagonal D and offdiagonal E, where M = N + SQRE. The
     !! algorithm computes the singular values in the SVD B = U * S * VT.
     !! The orthogonal matrices U and VT are optionally computed in
     !! compact form.
     !! A related subroutine, DLASD0, computes the singular values and
     !! the singular vectors in explicit form.
               poles, givptr, givcol, ldgcol,perm, givnum, c, s, work, iwork, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: icompq, ldgcol, ldu, n, smlsiz, sqre
           integer(${ik}$), intent(out) :: info
           ! Array Arguments 
           integer(${ik}$), intent(out) :: givcol(ldgcol,*), givptr(*), iwork(*), k(*), perm(ldgcol,&
                     *)
           real(${rk}$), intent(out) :: c(*), difl(ldu,*), difr(ldu,*), givnum(ldu,*), poles(ldu,*), &
                     s(*), u(ldu,*), vt(ldu,*), work(*), z(ldu,*)
           real(${rk}$), intent(inout) :: d(*), e(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, i1, ic, idxq, idxqi, im1, inode, itemp, iwk, j, lf, ll, lvl, lvl2, &
           m, ncc, nd, ndb1, ndiml, ndimr, nl, nlf, nlp1, nlvl, nr, nrf, nrp1, nru, nwork1, &
                     nwork2, smlszp, sqrei, vf, vfi, vl, vli
           real(${rk}$) :: alpha, beta
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then
              info = -1_${ik}$
           else if( smlsiz<3_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -4_${ik}$
           else if( ldu<( n+sqre ) ) then
              info = -8_${ik}$
           else if( ldgcol<n ) then
              info = -17_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASDA', -info )
              return
           end if
           m = n + sqre
           ! if the input matrix is too small, call stdlib${ii}$_${ri}$lasdq to find the svd.
           if( n<=smlsiz ) then
              if( icompq==0_${ik}$ ) then
                 call stdlib${ii}$_${ri}$lasdq( 'U', sqre, n, 0_${ik}$, 0_${ik}$, 0_${ik}$, d, e, vt, ldu, u, ldu,u, ldu, work, &
                           info )
              else
                 call stdlib${ii}$_${ri}$lasdq( 'U', sqre, n, m, n, 0_${ik}$, d, e, vt, ldu, u, ldu,u, ldu, work, &
                           info )
              end if
              return
           end if
           ! book-keeping and  set up the computation tree.
           inode = 1_${ik}$
           ndiml = inode + n
           ndimr = ndiml + n
           idxq = ndimr + n
           iwk = idxq + n
           ncc = 0_${ik}$
           nru = 0_${ik}$
           smlszp = smlsiz + 1_${ik}$
           vf = 1_${ik}$
           vl = vf + m
           nwork1 = vl + m
           nwork2 = nwork1 + smlszp*smlszp
           call stdlib${ii}$_${ri}$lasdt( n, nlvl, nd, iwork( inode ), iwork( ndiml ),iwork( ndimr ), smlsiz &
                     )
           ! for the nodes on bottom level of the tree, solve
           ! their subproblems by stdlib${ii}$_${ri}$lasdq.
           ndb1 = ( nd+1 ) / 2_${ik}$
           loop_30: do i = ndb1, nd
              ! ic : center row of each node
              ! nl : number of rows of left  subproblem
              ! nr : number of rows of right subproblem
              ! nlf: starting row of the left   subproblem
              ! nrf: starting row of the right  subproblem
              i1 = i - 1_${ik}$
              ic = iwork( inode+i1 )
              nl = iwork( ndiml+i1 )
              nlp1 = nl + 1_${ik}$
              nr = iwork( ndimr+i1 )
              nlf = ic - nl
              nrf = ic + 1_${ik}$
              idxqi = idxq + nlf - 2_${ik}$
              vfi = vf + nlf - 1_${ik}$
              vli = vl + nlf - 1_${ik}$
              sqrei = 1_${ik}$
              if( icompq==0_${ik}$ ) then
                 call stdlib${ii}$_${ri}$laset( 'A', nlp1, nlp1, zero, one, work( nwork1 ),smlszp )
                 call stdlib${ii}$_${ri}$lasdq( 'U', sqrei, nl, nlp1, nru, ncc, d( nlf ),e( nlf ), work( &
                 nwork1 ), smlszp,work( nwork2 ), nl, work( nwork2 ), nl,work( nwork2 ), info )
                           
                 itemp = nwork1 + nl*smlszp
                 call stdlib${ii}$_${ri}$copy( nlp1, work( nwork1 ), 1_${ik}$, work( vfi ), 1_${ik}$ )
                 call stdlib${ii}$_${ri}$copy( nlp1, work( itemp ), 1_${ik}$, work( vli ), 1_${ik}$ )
              else
                 call stdlib${ii}$_${ri}$laset( 'A', nl, nl, zero, one, u( nlf, 1_${ik}$ ), ldu )
                 call stdlib${ii}$_${ri}$laset( 'A', nlp1, nlp1, zero, one, vt( nlf, 1_${ik}$ ), ldu )
                 call stdlib${ii}$_${ri}$lasdq( 'U', sqrei, nl, nlp1, nl, ncc, d( nlf ),e( nlf ), vt( nlf, 1_${ik}$ &
                           ), ldu, u( nlf, 1_${ik}$ ), ldu,u( nlf, 1_${ik}$ ), ldu, work( nwork1 ), info )
                 call stdlib${ii}$_${ri}$copy( nlp1, vt( nlf, 1_${ik}$ ), 1_${ik}$, work( vfi ), 1_${ik}$ )
                 call stdlib${ii}$_${ri}$copy( nlp1, vt( nlf, nlp1 ), 1_${ik}$, work( vli ), 1_${ik}$ )
              end if
              if( info/=0_${ik}$ ) then
                 return
              end if
              do j = 1, nl
                 iwork( idxqi+j ) = j
              end do
              if( ( i==nd ) .and. ( sqre==0_${ik}$ ) ) then
                 sqrei = 0_${ik}$
              else
                 sqrei = 1_${ik}$
              end if
              idxqi = idxqi + nlp1
              vfi = vfi + nlp1
              vli = vli + nlp1
              nrp1 = nr + sqrei
              if( icompq==0_${ik}$ ) then
                 call stdlib${ii}$_${ri}$laset( 'A', nrp1, nrp1, zero, one, work( nwork1 ),smlszp )
                 call stdlib${ii}$_${ri}$lasdq( 'U', sqrei, nr, nrp1, nru, ncc, d( nrf ),e( nrf ), work( &
                 nwork1 ), smlszp,work( nwork2 ), nr, work( nwork2 ), nr,work( nwork2 ), info )
                           
                 itemp = nwork1 + ( nrp1-1 )*smlszp
                 call stdlib${ii}$_${ri}$copy( nrp1, work( nwork1 ), 1_${ik}$, work( vfi ), 1_${ik}$ )
                 call stdlib${ii}$_${ri}$copy( nrp1, work( itemp ), 1_${ik}$, work( vli ), 1_${ik}$ )
              else
                 call stdlib${ii}$_${ri}$laset( 'A', nr, nr, zero, one, u( nrf, 1_${ik}$ ), ldu )
                 call stdlib${ii}$_${ri}$laset( 'A', nrp1, nrp1, zero, one, vt( nrf, 1_${ik}$ ), ldu )
                 call stdlib${ii}$_${ri}$lasdq( 'U', sqrei, nr, nrp1, nr, ncc, d( nrf ),e( nrf ), vt( nrf, 1_${ik}$ &
                           ), ldu, u( nrf, 1_${ik}$ ), ldu,u( nrf, 1_${ik}$ ), ldu, work( nwork1 ), info )
                 call stdlib${ii}$_${ri}$copy( nrp1, vt( nrf, 1_${ik}$ ), 1_${ik}$, work( vfi ), 1_${ik}$ )
                 call stdlib${ii}$_${ri}$copy( nrp1, vt( nrf, nrp1 ), 1_${ik}$, work( vli ), 1_${ik}$ )
              end if
              if( info/=0_${ik}$ ) then
                 return
              end if
              do j = 1, nr
                 iwork( idxqi+j ) = j
              end do
           end do loop_30
           ! now conquer each subproblem bottom-up.
           j = 2_${ik}$**nlvl
           loop_50: do lvl = nlvl, 1, -1
              lvl2 = lvl*2_${ik}$ - 1_${ik}$
              ! find the first node lf and last node ll on
              ! the current level lvl.
              if( lvl==1_${ik}$ ) then
                 lf = 1_${ik}$
                 ll = 1_${ik}$
              else
                 lf = 2_${ik}$**( lvl-1 )
                 ll = 2_${ik}$*lf - 1_${ik}$
              end if
              loop_40: do i = lf, ll
                 im1 = i - 1_${ik}$
                 ic = iwork( inode+im1 )
                 nl = iwork( ndiml+im1 )
                 nr = iwork( ndimr+im1 )
                 nlf = ic - nl
                 nrf = ic + 1_${ik}$
                 if( i==ll ) then
                    sqrei = sqre
                 else
                    sqrei = 1_${ik}$
                 end if
                 vfi = vf + nlf - 1_${ik}$
                 vli = vl + nlf - 1_${ik}$
                 idxqi = idxq + nlf - 1_${ik}$
                 alpha = d( ic )
                 beta = e( ic )
                 if( icompq==0_${ik}$ ) then
                    call stdlib${ii}$_${ri}$lasd6( icompq, nl, nr, sqrei, d( nlf ),work( vfi ), work( vli ), &
                    alpha, beta,iwork( idxqi ), perm, givptr( 1_${ik}$ ), givcol,ldgcol, givnum, ldu, &
                    poles, difl, difr, z,k( 1_${ik}$ ), c( 1_${ik}$ ), s( 1_${ik}$ ), work( nwork1 ),iwork( iwk ), &
                              info )
                 else
                    j = j - 1_${ik}$
                    call stdlib${ii}$_${ri}$lasd6( icompq, nl, nr, sqrei, d( nlf ),work( vfi ), work( vli ), &
                    alpha, beta,iwork( idxqi ), perm( nlf, lvl ),givptr( j ), givcol( nlf, lvl2 ),&
                     ldgcol,givnum( nlf, lvl2 ), ldu,poles( nlf, lvl2 ), difl( nlf, lvl ),difr( &
                     nlf, lvl2 ), z( nlf, lvl ), k( j ),c( j ), s( j ), work( nwork1 ),iwork( iwk &
                               ), info )
                 end if
                 if( info/=0_${ik}$ ) then
                    return
                 end if
              end do loop_40
           end do loop_50
           return
     end subroutine stdlib${ii}$_${ri}$lasda

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slasd6( icompq, nl, nr, sqre, d, vf, vl, alpha, beta,idxq, perm, &
     !! SLASD6 computes the SVD of an updated upper bidiagonal matrix B
     !! obtained by merging two smaller ones by appending a row. This
     !! routine is used only for the problem which requires all singular
     !! values and optionally singular vector matrices in factored form.
     !! B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE.
     !! A related subroutine, SLASD1, handles the case in which all singular
     !! values and singular vectors of the bidiagonal matrix are desired.
     !! SLASD6 computes the SVD as follows:
     !! ( D1(in)    0    0       0 )
     !! B = U(in) * (   Z1**T   a   Z2**T    b ) * VT(in)
     !! (   0       0   D2(in)   0 )
     !! = U(out) * ( D(out) 0) * VT(out)
     !! where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M
     !! with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
     !! elsewhere; and the entry b is empty if SQRE = 0.
     !! The singular values of B can be computed using D1, D2, the first
     !! components of all the right singular vectors of the lower block, and
     !! the last components of all the right singular vectors of the upper
     !! block. These components are stored and updated in VF and VL,
     !! respectively, in SLASD6. Hence U and VT are not explicitly
     !! referenced.
     !! The singular values are stored in D. The algorithm consists of two
     !! stages:
     !! The first stage consists of deflating the size of the problem
     !! when there are multiple singular values or if there is a zero
     !! in the Z vector. For each such occurrence the dimension of the
     !! secular equation problem is reduced by one. This stage is
     !! performed by the routine SLASD7.
     !! The second stage consists of calculating the updated
     !! singular values. This is done by finding the roots of the
     !! secular equation via the routine SLASD4 (as called by SLASD8).
     !! This routine also updates VF and VL and computes the distances
     !! between the updated singular values and the old singular
     !! values.
     !! SLASD6 is called from SLASDA.
     givptr, givcol, ldgcol, givnum,ldgnum, poles, difl, difr, z, k, c, s, work,iwork, info )
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: givptr, info, k
           integer(${ik}$), intent(in) :: icompq, ldgcol, ldgnum, nl, nr, sqre
           real(sp), intent(inout) :: alpha, beta
           real(sp), intent(out) :: c, s
           ! Array Arguments 
           integer(${ik}$), intent(out) :: givcol(ldgcol,*), iwork(*), perm(*)
           integer(${ik}$), intent(inout) :: idxq(*)
           real(sp), intent(inout) :: d(*), vf(*), vl(*)
           real(sp), intent(out) :: difl(*), difr(*), givnum(ldgnum,*), poles(ldgnum,*), work(*), &
                     z(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, idx, idxc, idxp, isigma, ivfw, ivlw, iw, m, n, n1, n2
           real(sp) :: orgnrm
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           n = nl + nr + 1_${ik}$
           m = n + sqre
           if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then
              info = -1_${ik}$
           else if( nl<1_${ik}$ ) then
              info = -2_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -3_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -4_${ik}$
           else if( ldgcol<n ) then
              info = -14_${ik}$
           else if( ldgnum<n ) then
              info = -16_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SLASD6', -info )
              return
           end if
           ! the following values are for bookkeeping purposes only.  they are
           ! integer pointers which indicate the portion of the workspace
           ! used by a particular array in stdlib${ii}$_slasd7 and stdlib${ii}$_slasd8.
           isigma = 1_${ik}$
           iw = isigma + n
           ivfw = iw + m
           ivlw = ivfw + m
           idx = 1_${ik}$
           idxc = idx + n
           idxp = idxc + n
           ! scale.
           orgnrm = max( abs( alpha ), abs( beta ) )
           d( nl+1 ) = zero
           do i = 1, n
              if( abs( d( i ) )>orgnrm ) then
                 orgnrm = abs( d( i ) )
              end if
           end do
           call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, orgnrm, one, n, 1_${ik}$, d, n, info )
           alpha = alpha / orgnrm
           beta = beta / orgnrm
           ! sort and deflate singular values.
           call stdlib${ii}$_slasd7( icompq, nl, nr, sqre, k, d, z, work( iw ), vf,work( ivfw ), vl, &
           work( ivlw ), alpha, beta,work( isigma ), iwork( idx ), iwork( idxp ), idxq,perm, &
                     givptr, givcol, ldgcol, givnum, ldgnum, c, s,info )
           ! solve secular equation, compute difl, difr, and update vf, vl.
           call stdlib${ii}$_slasd8( icompq, k, d, z, vf, vl, difl, difr, ldgnum,work( isigma ), work( &
                     iw ), info )
           ! report the possible convergence failure.
           if( info/=0_${ik}$ ) then
              return
           end if
           ! save the poles if icompq = 1.
           if( icompq==1_${ik}$ ) then
              call stdlib${ii}$_scopy( k, d, 1_${ik}$, poles( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              call stdlib${ii}$_scopy( k, work( isigma ), 1_${ik}$, poles( 1_${ik}$, 2_${ik}$ ), 1_${ik}$ )
           end if
           ! unscale.
           call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, one, orgnrm, n, 1_${ik}$, d, n, info )
           ! prepare the idxq sorting permutation.
           n1 = k
           n2 = n - k
           call stdlib${ii}$_slamrg( n1, n2, d, 1_${ik}$, -1_${ik}$, idxq )
           return
     end subroutine stdlib${ii}$_slasd6

     pure module subroutine stdlib${ii}$_dlasd6( icompq, nl, nr, sqre, d, vf, vl, alpha, beta,idxq, perm, &
     !! DLASD6 computes the SVD of an updated upper bidiagonal matrix B
     !! obtained by merging two smaller ones by appending a row. This
     !! routine is used only for the problem which requires all singular
     !! values and optionally singular vector matrices in factored form.
     !! B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE.
     !! A related subroutine, DLASD1, handles the case in which all singular
     !! values and singular vectors of the bidiagonal matrix are desired.
     !! DLASD6 computes the SVD as follows:
     !! ( D1(in)    0    0       0 )
     !! B = U(in) * (   Z1**T   a   Z2**T    b ) * VT(in)
     !! (   0       0   D2(in)   0 )
     !! = U(out) * ( D(out) 0) * VT(out)
     !! where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M
     !! with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
     !! elsewhere; and the entry b is empty if SQRE = 0.
     !! The singular values of B can be computed using D1, D2, the first
     !! components of all the right singular vectors of the lower block, and
     !! the last components of all the right singular vectors of the upper
     !! block. These components are stored and updated in VF and VL,
     !! respectively, in DLASD6. Hence U and VT are not explicitly
     !! referenced.
     !! The singular values are stored in D. The algorithm consists of two
     !! stages:
     !! The first stage consists of deflating the size of the problem
     !! when there are multiple singular values or if there is a zero
     !! in the Z vector. For each such occurrence the dimension of the
     !! secular equation problem is reduced by one. This stage is
     !! performed by the routine DLASD7.
     !! The second stage consists of calculating the updated
     !! singular values. This is done by finding the roots of the
     !! secular equation via the routine DLASD4 (as called by DLASD8).
     !! This routine also updates VF and VL and computes the distances
     !! between the updated singular values and the old singular
     !! values.
     !! DLASD6 is called from DLASDA.
     givptr, givcol, ldgcol, givnum,ldgnum, poles, difl, difr, z, k, c, s, work,iwork, info )
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: givptr, info, k
           integer(${ik}$), intent(in) :: icompq, ldgcol, ldgnum, nl, nr, sqre
           real(dp), intent(inout) :: alpha, beta
           real(dp), intent(out) :: c, s
           ! Array Arguments 
           integer(${ik}$), intent(out) :: givcol(ldgcol,*), iwork(*), perm(*)
           integer(${ik}$), intent(inout) :: idxq(*)
           real(dp), intent(inout) :: d(*), vf(*), vl(*)
           real(dp), intent(out) :: difl(*), difr(*), givnum(ldgnum,*), poles(ldgnum,*), work(*), &
                     z(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, idx, idxc, idxp, isigma, ivfw, ivlw, iw, m, n, n1, n2
           real(dp) :: orgnrm
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           n = nl + nr + 1_${ik}$
           m = n + sqre
           if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then
              info = -1_${ik}$
           else if( nl<1_${ik}$ ) then
              info = -2_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -3_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -4_${ik}$
           else if( ldgcol<n ) then
              info = -14_${ik}$
           else if( ldgnum<n ) then
              info = -16_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD6', -info )
              return
           end if
           ! the following values are for bookkeeping purposes only.  they are
           ! integer pointers which indicate the portion of the workspace
           ! used by a particular array in stdlib${ii}$_dlasd7 and stdlib${ii}$_dlasd8.
           isigma = 1_${ik}$
           iw = isigma + n
           ivfw = iw + m
           ivlw = ivfw + m
           idx = 1_${ik}$
           idxc = idx + n
           idxp = idxc + n
           ! scale.
           orgnrm = max( abs( alpha ), abs( beta ) )
           d( nl+1 ) = zero
           do i = 1, n
              if( abs( d( i ) )>orgnrm ) then
                 orgnrm = abs( d( i ) )
              end if
           end do
           call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, orgnrm, one, n, 1_${ik}$, d, n, info )
           alpha = alpha / orgnrm
           beta = beta / orgnrm
           ! sort and deflate singular values.
           call stdlib${ii}$_dlasd7( icompq, nl, nr, sqre, k, d, z, work( iw ), vf,work( ivfw ), vl, &
           work( ivlw ), alpha, beta,work( isigma ), iwork( idx ), iwork( idxp ), idxq,perm, &
                     givptr, givcol, ldgcol, givnum, ldgnum, c, s,info )
           ! solve secular equation, compute difl, difr, and update vf, vl.
           call stdlib${ii}$_dlasd8( icompq, k, d, z, vf, vl, difl, difr, ldgnum,work( isigma ), work( &
                     iw ), info )
           ! report the possible convergence failure.
           if( info/=0_${ik}$ ) then
              return
           end if
           ! save the poles if icompq = 1.
           if( icompq==1_${ik}$ ) then
              call stdlib${ii}$_dcopy( k, d, 1_${ik}$, poles( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              call stdlib${ii}$_dcopy( k, work( isigma ), 1_${ik}$, poles( 1_${ik}$, 2_${ik}$ ), 1_${ik}$ )
           end if
           ! unscale.
           call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, one, orgnrm, n, 1_${ik}$, d, n, info )
           ! prepare the idxq sorting permutation.
           n1 = k
           n2 = n - k
           call stdlib${ii}$_dlamrg( n1, n2, d, 1_${ik}$, -1_${ik}$, idxq )
           return
     end subroutine stdlib${ii}$_dlasd6

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lasd6( icompq, nl, nr, sqre, d, vf, vl, alpha, beta,idxq, perm, &
     !! DLASD6: computes the SVD of an updated upper bidiagonal matrix B
     !! obtained by merging two smaller ones by appending a row. This
     !! routine is used only for the problem which requires all singular
     !! values and optionally singular vector matrices in factored form.
     !! B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE.
     !! A related subroutine, DLASD1, handles the case in which all singular
     !! values and singular vectors of the bidiagonal matrix are desired.
     !! DLASD6 computes the SVD as follows:
     !! ( D1(in)    0    0       0 )
     !! B = U(in) * (   Z1**T   a   Z2**T    b ) * VT(in)
     !! (   0       0   D2(in)   0 )
     !! = U(out) * ( D(out) 0) * VT(out)
     !! where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M
     !! with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
     !! elsewhere; and the entry b is empty if SQRE = 0.
     !! The singular values of B can be computed using D1, D2, the first
     !! components of all the right singular vectors of the lower block, and
     !! the last components of all the right singular vectors of the upper
     !! block. These components are stored and updated in VF and VL,
     !! respectively, in DLASD6. Hence U and VT are not explicitly
     !! referenced.
     !! The singular values are stored in D. The algorithm consists of two
     !! stages:
     !! The first stage consists of deflating the size of the problem
     !! when there are multiple singular values or if there is a zero
     !! in the Z vector. For each such occurrence the dimension of the
     !! secular equation problem is reduced by one. This stage is
     !! performed by the routine DLASD7.
     !! The second stage consists of calculating the updated
     !! singular values. This is done by finding the roots of the
     !! secular equation via the routine DLASD4 (as called by DLASD8).
     !! This routine also updates VF and VL and computes the distances
     !! between the updated singular values and the old singular
     !! values.
     !! DLASD6 is called from DLASDA.
     givptr, givcol, ldgcol, givnum,ldgnum, poles, difl, difr, z, k, c, s, work,iwork, info )
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: givptr, info, k
           integer(${ik}$), intent(in) :: icompq, ldgcol, ldgnum, nl, nr, sqre
           real(${rk}$), intent(inout) :: alpha, beta
           real(${rk}$), intent(out) :: c, s
           ! Array Arguments 
           integer(${ik}$), intent(out) :: givcol(ldgcol,*), iwork(*), perm(*)
           integer(${ik}$), intent(inout) :: idxq(*)
           real(${rk}$), intent(inout) :: d(*), vf(*), vl(*)
           real(${rk}$), intent(out) :: difl(*), difr(*), givnum(ldgnum,*), poles(ldgnum,*), work(*), &
                     z(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, idx, idxc, idxp, isigma, ivfw, ivlw, iw, m, n, n1, n2
           real(${rk}$) :: orgnrm
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           n = nl + nr + 1_${ik}$
           m = n + sqre
           if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then
              info = -1_${ik}$
           else if( nl<1_${ik}$ ) then
              info = -2_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -3_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -4_${ik}$
           else if( ldgcol<n ) then
              info = -14_${ik}$
           else if( ldgnum<n ) then
              info = -16_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD6', -info )
              return
           end if
           ! the following values are for bookkeeping purposes only.  they are
           ! integer pointers which indicate the portion of the workspace
           ! used by a particular array in stdlib${ii}$_${ri}$lasd7 and stdlib${ii}$_${ri}$lasd8.
           isigma = 1_${ik}$
           iw = isigma + n
           ivfw = iw + m
           ivlw = ivfw + m
           idx = 1_${ik}$
           idxc = idx + n
           idxp = idxc + n
           ! scale.
           orgnrm = max( abs( alpha ), abs( beta ) )
           d( nl+1 ) = zero
           do i = 1, n
              if( abs( d( i ) )>orgnrm ) then
                 orgnrm = abs( d( i ) )
              end if
           end do
           call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, orgnrm, one, n, 1_${ik}$, d, n, info )
           alpha = alpha / orgnrm
           beta = beta / orgnrm
           ! sort and deflate singular values.
           call stdlib${ii}$_${ri}$lasd7( icompq, nl, nr, sqre, k, d, z, work( iw ), vf,work( ivfw ), vl, &
           work( ivlw ), alpha, beta,work( isigma ), iwork( idx ), iwork( idxp ), idxq,perm, &
                     givptr, givcol, ldgcol, givnum, ldgnum, c, s,info )
           ! solve secular equation, compute difl, difr, and update vf, vl.
           call stdlib${ii}$_${ri}$lasd8( icompq, k, d, z, vf, vl, difl, difr, ldgnum,work( isigma ), work( &
                     iw ), info )
           ! report the possible convergence failure.
           if( info/=0_${ik}$ ) then
              return
           end if
           ! save the poles if icompq = 1.
           if( icompq==1_${ik}$ ) then
              call stdlib${ii}$_${ri}$copy( k, d, 1_${ik}$, poles( 1_${ik}$, 1_${ik}$ ), 1_${ik}$ )
              call stdlib${ii}$_${ri}$copy( k, work( isigma ), 1_${ik}$, poles( 1_${ik}$, 2_${ik}$ ), 1_${ik}$ )
           end if
           ! unscale.
           call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, orgnrm, n, 1_${ik}$, d, n, info )
           ! prepare the idxq sorting permutation.
           n1 = k
           n2 = n - k
           call stdlib${ii}$_${ri}$lamrg( n1, n2, d, 1_${ik}$, -1_${ik}$, idxq )
           return
     end subroutine stdlib${ii}$_${ri}$lasd6

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slasd7( icompq, nl, nr, sqre, k, d, z, zw, vf, vfw, vl,vlw, alpha, &
     !! SLASD7 merges the two sets of singular values together into a single
     !! sorted set. Then it tries to deflate the size of the problem. There
     !! are two ways in which deflation can occur:  when two or more singular
     !! values are close together or if there is a tiny entry in the Z
     !! vector. For each such occurrence the order of the related
     !! secular equation problem is reduced by one.
     !! SLASD7 is called from SLASD6.
     beta, dsigma, idx, idxp, idxq,perm, givptr, givcol, ldgcol, givnum, ldgnum,c, s, info )
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: givptr, info, k
           integer(${ik}$), intent(in) :: icompq, ldgcol, ldgnum, nl, nr, sqre
           real(sp), intent(in) :: alpha, beta
           real(sp), intent(out) :: c, s
           ! Array Arguments 
           integer(${ik}$), intent(out) :: givcol(ldgcol,*), idx(*), idxp(*), perm(*)
           integer(${ik}$), intent(inout) :: idxq(*)
           real(sp), intent(inout) :: d(*), vf(*), vl(*)
           real(sp), intent(out) :: dsigma(*), givnum(ldgnum,*), vfw(*), vlw(*), z(*), zw(*)
                     
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, idxi, idxj, idxjp, j, jp, jprev, k2, m, n, nlp1, nlp2
           real(sp) :: eps, hlftol, tau, tol, z1
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           n = nl + nr + 1_${ik}$
           m = n + sqre
           if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then
              info = -1_${ik}$
           else if( nl<1_${ik}$ ) then
              info = -2_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -3_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -4_${ik}$
           else if( ldgcol<n ) then
              info = -22_${ik}$
           else if( ldgnum<n ) then
              info = -24_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SLASD7', -info )
              return
           end if
           nlp1 = nl + 1_${ik}$
           nlp2 = nl + 2_${ik}$
           if( icompq==1_${ik}$ ) then
              givptr = 0_${ik}$
           end if
           ! generate the first part of the vector z and move the singular
           ! values in the first part of d one position backward.
           z1 = alpha*vl( nlp1 )
           vl( nlp1 ) = zero
           tau = vf( nlp1 )
           do i = nl, 1, -1
              z( i+1 ) = alpha*vl( i )
              vl( i ) = zero
              vf( i+1 ) = vf( i )
              d( i+1 ) = d( i )
              idxq( i+1 ) = idxq( i ) + 1_${ik}$
           end do
           vf( 1_${ik}$ ) = tau
           ! generate the second part of the vector z.
           do i = nlp2, m
              z( i ) = beta*vf( i )
              vf( i ) = zero
           end do
           ! sort the singular values into increasing order
           do i = nlp2, n
              idxq( i ) = idxq( i ) + nlp1
           end do
           ! dsigma, idxc, idxc, and zw are used as storage space.
           do i = 2, n
              dsigma( i ) = d( idxq( i ) )
              zw( i ) = z( idxq( i ) )
              vfw( i ) = vf( idxq( i ) )
              vlw( i ) = vl( idxq( i ) )
           end do
           call stdlib${ii}$_slamrg( nl, nr, dsigma( 2_${ik}$ ), 1_${ik}$, 1_${ik}$, idx( 2_${ik}$ ) )
           do i = 2, n
              idxi = 1_${ik}$ + idx( i )
              d( i ) = dsigma( idxi )
              z( i ) = zw( idxi )
              vf( i ) = vfw( idxi )
              vl( i ) = vlw( idxi )
           end do
           ! calculate the allowable deflation tolerance
           eps = stdlib${ii}$_slamch( 'EPSILON' )
           tol = max( abs( alpha ), abs( beta ) )
           tol = eight*eight*eps*max( abs( d( n ) ), tol )
           ! there are 2 kinds of deflation -- first a value in the z-vector
           ! is small, second two (or more) singular values are very close
           ! together (their difference is small).
           ! if the value in the z-vector is small, we simply permute the
           ! array so that the corresponding singular value is moved to the
           ! end.
           ! if two values in the d-vector are close, we perform a two-sided
           ! rotation designed to make one of the corresponding z-vector
           ! entries zero, and then permute the array so that the deflated
           ! singular value is moved to the end.
           ! if there are multiple singular values then the problem deflates.
           ! here the number of equal singular values are found.  as each equal
           ! singular value is found, an elementary reflector is computed to
           ! rotate the corresponding singular subspace so that the
           ! corresponding components of z are zero in this new basis.
           k = 1_${ik}$
           k2 = n + 1_${ik}$
           do j = 2, n
              if( abs( z( j ) )<=tol ) then
                 ! deflate due to small z component.
                 k2 = k2 - 1_${ik}$
                 idxp( k2 ) = j
                 if( j==n )go to 100
              else
                 jprev = j
                 go to 70
              end if
           end do
           70 continue
           j = jprev
           80 continue
           j = j + 1_${ik}$
           if( j>n )go to 90
           if( abs( z( j ) )<=tol ) then
              ! deflate due to small z component.
              k2 = k2 - 1_${ik}$
              idxp( k2 ) = j
           else
              ! check if singular values are close enough to allow deflation.
              if( abs( d( j )-d( jprev ) )<=tol ) then
                 ! deflation is possible.
                 s = z( jprev )
                 c = z( j )
                 ! find sqrt(a**2+b**2) without overflow or
                 ! destructive underflow.
                 tau = stdlib${ii}$_slapy2( c, s )
                 z( j ) = tau
                 z( jprev ) = zero
                 c = c / tau
                 s = -s / tau
                 ! record the appropriate givens rotation
                 if( icompq==1_${ik}$ ) then
                    givptr = givptr + 1_${ik}$
                    idxjp = idxq( idx( jprev )+1_${ik}$ )
                    idxj = idxq( idx( j )+1_${ik}$ )
                    if( idxjp<=nlp1 ) then
                       idxjp = idxjp - 1_${ik}$
                    end if
                    if( idxj<=nlp1 ) then
                       idxj = idxj - 1_${ik}$
                    end if
                    givcol( givptr, 2_${ik}$ ) = idxjp
                    givcol( givptr, 1_${ik}$ ) = idxj
                    givnum( givptr, 2_${ik}$ ) = c
                    givnum( givptr, 1_${ik}$ ) = s
                 end if
                 call stdlib${ii}$_srot( 1_${ik}$, vf( jprev ), 1_${ik}$, vf( j ), 1_${ik}$, c, s )
                 call stdlib${ii}$_srot( 1_${ik}$, vl( jprev ), 1_${ik}$, vl( j ), 1_${ik}$, c, s )
                 k2 = k2 - 1_${ik}$
                 idxp( k2 ) = jprev
                 jprev = j
              else
                 k = k + 1_${ik}$
                 zw( k ) = z( jprev )
                 dsigma( k ) = d( jprev )
                 idxp( k ) = jprev
                 jprev = j
              end if
           end if
           go to 80
           90 continue
           ! record the last singular value.
           k = k + 1_${ik}$
           zw( k ) = z( jprev )
           dsigma( k ) = d( jprev )
           idxp( k ) = jprev
           100 continue
           ! sort the singular values into dsigma. the singular values which
           ! were not deflated go into the first k slots of dsigma, except
           ! that dsigma(1) is treated separately.
           do j = 2, n
              jp = idxp( j )
              dsigma( j ) = d( jp )
              vfw( j ) = vf( jp )
              vlw( j ) = vl( jp )
           end do
           if( icompq==1_${ik}$ ) then
              do j = 2, n
                 jp = idxp( j )
                 perm( j ) = idxq( idx( jp )+1_${ik}$ )
                 if( perm( j )<=nlp1 ) then
                    perm( j ) = perm( j ) - 1_${ik}$
                 end if
              end do
           end if
           ! the deflated singular values go back into the last n - k slots of
           ! d.
           call stdlib${ii}$_scopy( n-k, dsigma( k+1 ), 1_${ik}$, d( k+1 ), 1_${ik}$ )
           ! determine dsigma(1), dsigma(2), z(1), vf(1), vl(1), vf(m), and
           ! vl(m).
           dsigma( 1_${ik}$ ) = zero
           hlftol = tol / two
           if( abs( dsigma( 2_${ik}$ ) )<=hlftol )dsigma( 2_${ik}$ ) = hlftol
           if( m>n ) then
              z( 1_${ik}$ ) = stdlib${ii}$_slapy2( z1, z( m ) )
              if( z( 1_${ik}$ )<=tol ) then
                 c = one
                 s = zero
                 z( 1_${ik}$ ) = tol
              else
                 c = z1 / z( 1_${ik}$ )
                 s = -z( m ) / z( 1_${ik}$ )
              end if
              call stdlib${ii}$_srot( 1_${ik}$, vf( m ), 1_${ik}$, vf( 1_${ik}$ ), 1_${ik}$, c, s )
              call stdlib${ii}$_srot( 1_${ik}$, vl( m ), 1_${ik}$, vl( 1_${ik}$ ), 1_${ik}$, c, s )
           else
              if( abs( z1 )<=tol ) then
                 z( 1_${ik}$ ) = tol
              else
                 z( 1_${ik}$ ) = z1
              end if
           end if
           ! restore z, vf, and vl.
           call stdlib${ii}$_scopy( k-1, zw( 2_${ik}$ ), 1_${ik}$, z( 2_${ik}$ ), 1_${ik}$ )
           call stdlib${ii}$_scopy( n-1, vfw( 2_${ik}$ ), 1_${ik}$, vf( 2_${ik}$ ), 1_${ik}$ )
           call stdlib${ii}$_scopy( n-1, vlw( 2_${ik}$ ), 1_${ik}$, vl( 2_${ik}$ ), 1_${ik}$ )
           return
     end subroutine stdlib${ii}$_slasd7

     pure module subroutine stdlib${ii}$_dlasd7( icompq, nl, nr, sqre, k, d, z, zw, vf, vfw, vl,vlw, alpha, &
     !! DLASD7 merges the two sets of singular values together into a single
     !! sorted set. Then it tries to deflate the size of the problem. There
     !! are two ways in which deflation can occur:  when two or more singular
     !! values are close together or if there is a tiny entry in the Z
     !! vector. For each such occurrence the order of the related
     !! secular equation problem is reduced by one.
     !! DLASD7 is called from DLASD6.
     beta, dsigma, idx, idxp, idxq,perm, givptr, givcol, ldgcol, givnum, ldgnum,c, s, info )
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: givptr, info, k
           integer(${ik}$), intent(in) :: icompq, ldgcol, ldgnum, nl, nr, sqre
           real(dp), intent(in) :: alpha, beta
           real(dp), intent(out) :: c, s
           ! Array Arguments 
           integer(${ik}$), intent(out) :: givcol(ldgcol,*), idx(*), idxp(*), perm(*)
           integer(${ik}$), intent(inout) :: idxq(*)
           real(dp), intent(inout) :: d(*), vf(*), vl(*)
           real(dp), intent(out) :: dsigma(*), givnum(ldgnum,*), vfw(*), vlw(*), z(*), zw(*)
                     
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, idxi, idxj, idxjp, j, jp, jprev, k2, m, n, nlp1, nlp2
           real(dp) :: eps, hlftol, tau, tol, z1
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           n = nl + nr + 1_${ik}$
           m = n + sqre
           if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then
              info = -1_${ik}$
           else if( nl<1_${ik}$ ) then
              info = -2_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -3_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -4_${ik}$
           else if( ldgcol<n ) then
              info = -22_${ik}$
           else if( ldgnum<n ) then
              info = -24_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD7', -info )
              return
           end if
           nlp1 = nl + 1_${ik}$
           nlp2 = nl + 2_${ik}$
           if( icompq==1_${ik}$ ) then
              givptr = 0_${ik}$
           end if
           ! generate the first part of the vector z and move the singular
           ! values in the first part of d one position backward.
           z1 = alpha*vl( nlp1 )
           vl( nlp1 ) = zero
           tau = vf( nlp1 )
           do i = nl, 1, -1
              z( i+1 ) = alpha*vl( i )
              vl( i ) = zero
              vf( i+1 ) = vf( i )
              d( i+1 ) = d( i )
              idxq( i+1 ) = idxq( i ) + 1_${ik}$
           end do
           vf( 1_${ik}$ ) = tau
           ! generate the second part of the vector z.
           do i = nlp2, m
              z( i ) = beta*vf( i )
              vf( i ) = zero
           end do
           ! sort the singular values into increasing order
           do i = nlp2, n
              idxq( i ) = idxq( i ) + nlp1
           end do
           ! dsigma, idxc, idxc, and zw are used as storage space.
           do i = 2, n
              dsigma( i ) = d( idxq( i ) )
              zw( i ) = z( idxq( i ) )
              vfw( i ) = vf( idxq( i ) )
              vlw( i ) = vl( idxq( i ) )
           end do
           call stdlib${ii}$_dlamrg( nl, nr, dsigma( 2_${ik}$ ), 1_${ik}$, 1_${ik}$, idx( 2_${ik}$ ) )
           do i = 2, n
              idxi = 1_${ik}$ + idx( i )
              d( i ) = dsigma( idxi )
              z( i ) = zw( idxi )
              vf( i ) = vfw( idxi )
              vl( i ) = vlw( idxi )
           end do
           ! calculate the allowable deflation tolerance
           eps = stdlib${ii}$_dlamch( 'EPSILON' )
           tol = max( abs( alpha ), abs( beta ) )
           tol = eight*eight*eps*max( abs( d( n ) ), tol )
           ! there are 2 kinds of deflation -- first a value in the z-vector
           ! is small, second two (or more) singular values are very close
           ! together (their difference is small).
           ! if the value in the z-vector is small, we simply permute the
           ! array so that the corresponding singular value is moved to the
           ! end.
           ! if two values in the d-vector are close, we perform a two-sided
           ! rotation designed to make one of the corresponding z-vector
           ! entries zero, and then permute the array so that the deflated
           ! singular value is moved to the end.
           ! if there are multiple singular values then the problem deflates.
           ! here the number of equal singular values are found.  as each equal
           ! singular value is found, an elementary reflector is computed to
           ! rotate the corresponding singular subspace so that the
           ! corresponding components of z are zero in this new basis.
           k = 1_${ik}$
           k2 = n + 1_${ik}$
           do j = 2, n
              if( abs( z( j ) )<=tol ) then
                 ! deflate due to small z component.
                 k2 = k2 - 1_${ik}$
                 idxp( k2 ) = j
                 if( j==n )go to 100
              else
                 jprev = j
                 go to 70
              end if
           end do
           70 continue
           j = jprev
           80 continue
           j = j + 1_${ik}$
           if( j>n )go to 90
           if( abs( z( j ) )<=tol ) then
              ! deflate due to small z component.
              k2 = k2 - 1_${ik}$
              idxp( k2 ) = j
           else
              ! check if singular values are close enough to allow deflation.
              if( abs( d( j )-d( jprev ) )<=tol ) then
                 ! deflation is possible.
                 s = z( jprev )
                 c = z( j )
                 ! find sqrt(a**2+b**2) without overflow or
                 ! destructive underflow.
                 tau = stdlib${ii}$_dlapy2( c, s )
                 z( j ) = tau
                 z( jprev ) = zero
                 c = c / tau
                 s = -s / tau
                 ! record the appropriate givens rotation
                 if( icompq==1_${ik}$ ) then
                    givptr = givptr + 1_${ik}$
                    idxjp = idxq( idx( jprev )+1_${ik}$ )
                    idxj = idxq( idx( j )+1_${ik}$ )
                    if( idxjp<=nlp1 ) then
                       idxjp = idxjp - 1_${ik}$
                    end if
                    if( idxj<=nlp1 ) then
                       idxj = idxj - 1_${ik}$
                    end if
                    givcol( givptr, 2_${ik}$ ) = idxjp
                    givcol( givptr, 1_${ik}$ ) = idxj
                    givnum( givptr, 2_${ik}$ ) = c
                    givnum( givptr, 1_${ik}$ ) = s
                 end if
                 call stdlib${ii}$_drot( 1_${ik}$, vf( jprev ), 1_${ik}$, vf( j ), 1_${ik}$, c, s )
                 call stdlib${ii}$_drot( 1_${ik}$, vl( jprev ), 1_${ik}$, vl( j ), 1_${ik}$, c, s )
                 k2 = k2 - 1_${ik}$
                 idxp( k2 ) = jprev
                 jprev = j
              else
                 k = k + 1_${ik}$
                 zw( k ) = z( jprev )
                 dsigma( k ) = d( jprev )
                 idxp( k ) = jprev
                 jprev = j
              end if
           end if
           go to 80
           90 continue
           ! record the last singular value.
           k = k + 1_${ik}$
           zw( k ) = z( jprev )
           dsigma( k ) = d( jprev )
           idxp( k ) = jprev
           100 continue
           ! sort the singular values into dsigma. the singular values which
           ! were not deflated go into the first k slots of dsigma, except
           ! that dsigma(1) is treated separately.
           do j = 2, n
              jp = idxp( j )
              dsigma( j ) = d( jp )
              vfw( j ) = vf( jp )
              vlw( j ) = vl( jp )
           end do
           if( icompq==1_${ik}$ ) then
              do j = 2, n
                 jp = idxp( j )
                 perm( j ) = idxq( idx( jp )+1_${ik}$ )
                 if( perm( j )<=nlp1 ) then
                    perm( j ) = perm( j ) - 1_${ik}$
                 end if
              end do
           end if
           ! the deflated singular values go back into the last n - k slots of
           ! d.
           call stdlib${ii}$_dcopy( n-k, dsigma( k+1 ), 1_${ik}$, d( k+1 ), 1_${ik}$ )
           ! determine dsigma(1), dsigma(2), z(1), vf(1), vl(1), vf(m), and
           ! vl(m).
           dsigma( 1_${ik}$ ) = zero
           hlftol = tol / two
           if( abs( dsigma( 2_${ik}$ ) )<=hlftol )dsigma( 2_${ik}$ ) = hlftol
           if( m>n ) then
              z( 1_${ik}$ ) = stdlib${ii}$_dlapy2( z1, z( m ) )
              if( z( 1_${ik}$ )<=tol ) then
                 c = one
                 s = zero
                 z( 1_${ik}$ ) = tol
              else
                 c = z1 / z( 1_${ik}$ )
                 s = -z( m ) / z( 1_${ik}$ )
              end if
              call stdlib${ii}$_drot( 1_${ik}$, vf( m ), 1_${ik}$, vf( 1_${ik}$ ), 1_${ik}$, c, s )
              call stdlib${ii}$_drot( 1_${ik}$, vl( m ), 1_${ik}$, vl( 1_${ik}$ ), 1_${ik}$, c, s )
           else
              if( abs( z1 )<=tol ) then
                 z( 1_${ik}$ ) = tol
              else
                 z( 1_${ik}$ ) = z1
              end if
           end if
           ! restore z, vf, and vl.
           call stdlib${ii}$_dcopy( k-1, zw( 2_${ik}$ ), 1_${ik}$, z( 2_${ik}$ ), 1_${ik}$ )
           call stdlib${ii}$_dcopy( n-1, vfw( 2_${ik}$ ), 1_${ik}$, vf( 2_${ik}$ ), 1_${ik}$ )
           call stdlib${ii}$_dcopy( n-1, vlw( 2_${ik}$ ), 1_${ik}$, vl( 2_${ik}$ ), 1_${ik}$ )
           return
     end subroutine stdlib${ii}$_dlasd7

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lasd7( icompq, nl, nr, sqre, k, d, z, zw, vf, vfw, vl,vlw, alpha, &
     !! DLASD7: merges the two sets of singular values together into a single
     !! sorted set. Then it tries to deflate the size of the problem. There
     !! are two ways in which deflation can occur:  when two or more singular
     !! values are close together or if there is a tiny entry in the Z
     !! vector. For each such occurrence the order of the related
     !! secular equation problem is reduced by one.
     !! DLASD7 is called from DLASD6.
     beta, dsigma, idx, idxp, idxq,perm, givptr, givcol, ldgcol, givnum, ldgnum,c, s, info )
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(out) :: givptr, info, k
           integer(${ik}$), intent(in) :: icompq, ldgcol, ldgnum, nl, nr, sqre
           real(${rk}$), intent(in) :: alpha, beta
           real(${rk}$), intent(out) :: c, s
           ! Array Arguments 
           integer(${ik}$), intent(out) :: givcol(ldgcol,*), idx(*), idxp(*), perm(*)
           integer(${ik}$), intent(inout) :: idxq(*)
           real(${rk}$), intent(inout) :: d(*), vf(*), vl(*)
           real(${rk}$), intent(out) :: dsigma(*), givnum(ldgnum,*), vfw(*), vlw(*), z(*), zw(*)
                     
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, idxi, idxj, idxjp, j, jp, jprev, k2, m, n, nlp1, nlp2
           real(${rk}$) :: eps, hlftol, tau, tol, z1
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           n = nl + nr + 1_${ik}$
           m = n + sqre
           if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then
              info = -1_${ik}$
           else if( nl<1_${ik}$ ) then
              info = -2_${ik}$
           else if( nr<1_${ik}$ ) then
              info = -3_${ik}$
           else if( ( sqre<0_${ik}$ ) .or. ( sqre>1_${ik}$ ) ) then
              info = -4_${ik}$
           else if( ldgcol<n ) then
              info = -22_${ik}$
           else if( ldgnum<n ) then
              info = -24_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD7', -info )
              return
           end if
           nlp1 = nl + 1_${ik}$
           nlp2 = nl + 2_${ik}$
           if( icompq==1_${ik}$ ) then
              givptr = 0_${ik}$
           end if
           ! generate the first part of the vector z and move the singular
           ! values in the first part of d one position backward.
           z1 = alpha*vl( nlp1 )
           vl( nlp1 ) = zero
           tau = vf( nlp1 )
           do i = nl, 1, -1
              z( i+1 ) = alpha*vl( i )
              vl( i ) = zero
              vf( i+1 ) = vf( i )
              d( i+1 ) = d( i )
              idxq( i+1 ) = idxq( i ) + 1_${ik}$
           end do
           vf( 1_${ik}$ ) = tau
           ! generate the second part of the vector z.
           do i = nlp2, m
              z( i ) = beta*vf( i )
              vf( i ) = zero
           end do
           ! sort the singular values into increasing order
           do i = nlp2, n
              idxq( i ) = idxq( i ) + nlp1
           end do
           ! dsigma, idxc, idxc, and zw are used as storage space.
           do i = 2, n
              dsigma( i ) = d( idxq( i ) )
              zw( i ) = z( idxq( i ) )
              vfw( i ) = vf( idxq( i ) )
              vlw( i ) = vl( idxq( i ) )
           end do
           call stdlib${ii}$_${ri}$lamrg( nl, nr, dsigma( 2_${ik}$ ), 1_${ik}$, 1_${ik}$, idx( 2_${ik}$ ) )
           do i = 2, n
              idxi = 1_${ik}$ + idx( i )
              d( i ) = dsigma( idxi )
              z( i ) = zw( idxi )
              vf( i ) = vfw( idxi )
              vl( i ) = vlw( idxi )
           end do
           ! calculate the allowable deflation tolerance
           eps = stdlib${ii}$_${ri}$lamch( 'EPSILON' )
           tol = max( abs( alpha ), abs( beta ) )
           tol = eight*eight*eps*max( abs( d( n ) ), tol )
           ! there are 2 kinds of deflation -- first a value in the z-vector
           ! is small, second two (or more) singular values are very close
           ! together (their difference is small).
           ! if the value in the z-vector is small, we simply permute the
           ! array so that the corresponding singular value is moved to the
           ! end.
           ! if two values in the d-vector are close, we perform a two-sided
           ! rotation designed to make one of the corresponding z-vector
           ! entries zero, and then permute the array so that the deflated
           ! singular value is moved to the end.
           ! if there are multiple singular values then the problem deflates.
           ! here the number of equal singular values are found.  as each equal
           ! singular value is found, an elementary reflector is computed to
           ! rotate the corresponding singular subspace so that the
           ! corresponding components of z are zero in this new basis.
           k = 1_${ik}$
           k2 = n + 1_${ik}$
           do j = 2, n
              if( abs( z( j ) )<=tol ) then
                 ! deflate due to small z component.
                 k2 = k2 - 1_${ik}$
                 idxp( k2 ) = j
                 if( j==n )go to 100
              else
                 jprev = j
                 go to 70
              end if
           end do
           70 continue
           j = jprev
           80 continue
           j = j + 1_${ik}$
           if( j>n )go to 90
           if( abs( z( j ) )<=tol ) then
              ! deflate due to small z component.
              k2 = k2 - 1_${ik}$
              idxp( k2 ) = j
           else
              ! check if singular values are close enough to allow deflation.
              if( abs( d( j )-d( jprev ) )<=tol ) then
                 ! deflation is possible.
                 s = z( jprev )
                 c = z( j )
                 ! find sqrt(a**2+b**2) without overflow or
                 ! destructive underflow.
                 tau = stdlib${ii}$_${ri}$lapy2( c, s )
                 z( j ) = tau
                 z( jprev ) = zero
                 c = c / tau
                 s = -s / tau
                 ! record the appropriate givens rotation
                 if( icompq==1_${ik}$ ) then
                    givptr = givptr + 1_${ik}$
                    idxjp = idxq( idx( jprev )+1_${ik}$ )
                    idxj = idxq( idx( j )+1_${ik}$ )
                    if( idxjp<=nlp1 ) then
                       idxjp = idxjp - 1_${ik}$
                    end if
                    if( idxj<=nlp1 ) then
                       idxj = idxj - 1_${ik}$
                    end if
                    givcol( givptr, 2_${ik}$ ) = idxjp
                    givcol( givptr, 1_${ik}$ ) = idxj
                    givnum( givptr, 2_${ik}$ ) = c
                    givnum( givptr, 1_${ik}$ ) = s
                 end if
                 call stdlib${ii}$_${ri}$rot( 1_${ik}$, vf( jprev ), 1_${ik}$, vf( j ), 1_${ik}$, c, s )
                 call stdlib${ii}$_${ri}$rot( 1_${ik}$, vl( jprev ), 1_${ik}$, vl( j ), 1_${ik}$, c, s )
                 k2 = k2 - 1_${ik}$
                 idxp( k2 ) = jprev
                 jprev = j
              else
                 k = k + 1_${ik}$
                 zw( k ) = z( jprev )
                 dsigma( k ) = d( jprev )
                 idxp( k ) = jprev
                 jprev = j
              end if
           end if
           go to 80
           90 continue
           ! record the last singular value.
           k = k + 1_${ik}$
           zw( k ) = z( jprev )
           dsigma( k ) = d( jprev )
           idxp( k ) = jprev
           100 continue
           ! sort the singular values into dsigma. the singular values which
           ! were not deflated go into the first k slots of dsigma, except
           ! that dsigma(1) is treated separately.
           do j = 2, n
              jp = idxp( j )
              dsigma( j ) = d( jp )
              vfw( j ) = vf( jp )
              vlw( j ) = vl( jp )
           end do
           if( icompq==1_${ik}$ ) then
              do j = 2, n
                 jp = idxp( j )
                 perm( j ) = idxq( idx( jp )+1_${ik}$ )
                 if( perm( j )<=nlp1 ) then
                    perm( j ) = perm( j ) - 1_${ik}$
                 end if
              end do
           end if
           ! the deflated singular values go back into the last n - k slots of
           ! d.
           call stdlib${ii}$_${ri}$copy( n-k, dsigma( k+1 ), 1_${ik}$, d( k+1 ), 1_${ik}$ )
           ! determine dsigma(1), dsigma(2), z(1), vf(1), vl(1), vf(m), and
           ! vl(m).
           dsigma( 1_${ik}$ ) = zero
           hlftol = tol / two
           if( abs( dsigma( 2_${ik}$ ) )<=hlftol )dsigma( 2_${ik}$ ) = hlftol
           if( m>n ) then
              z( 1_${ik}$ ) = stdlib${ii}$_${ri}$lapy2( z1, z( m ) )
              if( z( 1_${ik}$ )<=tol ) then
                 c = one
                 s = zero
                 z( 1_${ik}$ ) = tol
              else
                 c = z1 / z( 1_${ik}$ )
                 s = -z( m ) / z( 1_${ik}$ )
              end if
              call stdlib${ii}$_${ri}$rot( 1_${ik}$, vf( m ), 1_${ik}$, vf( 1_${ik}$ ), 1_${ik}$, c, s )
              call stdlib${ii}$_${ri}$rot( 1_${ik}$, vl( m ), 1_${ik}$, vl( 1_${ik}$ ), 1_${ik}$, c, s )
           else
              if( abs( z1 )<=tol ) then
                 z( 1_${ik}$ ) = tol
              else
                 z( 1_${ik}$ ) = z1
              end if
           end if
           ! restore z, vf, and vl.
           call stdlib${ii}$_${ri}$copy( k-1, zw( 2_${ik}$ ), 1_${ik}$, z( 2_${ik}$ ), 1_${ik}$ )
           call stdlib${ii}$_${ri}$copy( n-1, vfw( 2_${ik}$ ), 1_${ik}$, vf( 2_${ik}$ ), 1_${ik}$ )
           call stdlib${ii}$_${ri}$copy( n-1, vlw( 2_${ik}$ ), 1_${ik}$, vl( 2_${ik}$ ), 1_${ik}$ )
           return
     end subroutine stdlib${ii}$_${ri}$lasd7

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slasd8( icompq, k, d, z, vf, vl, difl, difr, lddifr,dsigma, work, &
     !! SLASD8 finds the square roots of the roots of the secular equation,
     !! as defined by the values in DSIGMA and Z. It makes the appropriate
     !! calls to SLASD4, and stores, for each  element in D, the distance
     !! to its two nearest poles (elements in DSIGMA). It also updates
     !! the arrays VF and VL, the first and last components of all the
     !! right singular vectors of the original bidiagonal matrix.
     !! SLASD8 is called from SLASD6.
               info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: icompq, k, lddifr
           integer(${ik}$), intent(out) :: info
           ! Array Arguments 
           real(sp), intent(out) :: d(*), difl(*), difr(lddifr,*), work(*)
           real(sp), intent(inout) :: dsigma(*), vf(*), vl(*), z(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, iwk1, iwk2, iwk2i, iwk3, iwk3i, j
           real(sp) :: diflj, difrj, dj, dsigj, dsigjp, rho, temp
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then
              info = -1_${ik}$
           else if( k<1_${ik}$ ) then
              info = -2_${ik}$
           else if( lddifr<k ) then
              info = -9_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SLASD8', -info )
              return
           end if
           ! quick return if possible
           if( k==1_${ik}$ ) then
              d( 1_${ik}$ ) = abs( z( 1_${ik}$ ) )
              difl( 1_${ik}$ ) = d( 1_${ik}$ )
              if( icompq==1_${ik}$ ) then
                 difl( 2_${ik}$ ) = one
                 difr( 1_${ik}$, 2_${ik}$ ) = one
              end if
              return
           end if
           ! modify values dsigma(i) to make sure all dsigma(i)-dsigma(j) can
           ! be computed with high relative accuracy (barring over/underflow).
           ! this is a problem on machines without a guard digit in
           ! add/subtract (cray xmp, cray ymp, cray c 90 and cray 2).
           ! the following code replaces dsigma(i) by 2*dsigma(i)-dsigma(i),
           ! which on any of these machines zeros out the bottommost
           ! bit of dsigma(i) if it is 1; this makes the subsequent
           ! subtractions dsigma(i)-dsigma(j) unproblematic when cancellation
           ! occurs. on binary machines with a guard digit (almost all
           ! machines) it does not change dsigma(i) at all. on hexadecimal
           ! and decimal machines with a guard digit, it slightly
           ! changes the bottommost bits of dsigma(i). it does not account
           ! for hexadecimal or decimal machines without guard digits
           ! (we know of none). we use a subroutine call to compute
           ! 2*dlambda(i) to prevent optimizing compilers from eliminating
           ! this code.
           do i = 1, k
              dsigma( i ) = stdlib${ii}$_slamc3( dsigma( i ), dsigma( i ) ) - dsigma( i )
           end do
           ! book keeping.
           iwk1 = 1_${ik}$
           iwk2 = iwk1 + k
           iwk3 = iwk2 + k
           iwk2i = iwk2 - 1_${ik}$
           iwk3i = iwk3 - 1_${ik}$
           ! normalize z.
           rho = stdlib${ii}$_snrm2( k, z, 1_${ik}$ )
           call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, rho, one, k, 1_${ik}$, z, k, info )
           rho = rho*rho
           ! initialize work(iwk3).
           call stdlib${ii}$_slaset( 'A', k, 1_${ik}$, one, one, work( iwk3 ), k )
           ! compute the updated singular values, the arrays difl, difr,
           ! and the updated z.
           do j = 1, k
              call stdlib${ii}$_slasd4( k, j, dsigma, z, work( iwk1 ), rho, d( j ),work( iwk2 ), info )
                        
              ! if the root finder fails, report the convergence failure.
              if( info/=0_${ik}$ ) then
                 return
              end if
              work( iwk3i+j ) = work( iwk3i+j )*work( j )*work( iwk2i+j )
              difl( j ) = -work( j )
              difr( j, 1_${ik}$ ) = -work( j+1 )
              do i = 1, j - 1
                 work( iwk3i+i ) = work( iwk3i+i )*work( i )*work( iwk2i+i ) / ( dsigma( i )-&
                           dsigma( j ) ) / ( dsigma( i )+dsigma( j ) )
              end do
              do i = j + 1, k
                 work( iwk3i+i ) = work( iwk3i+i )*work( i )*work( iwk2i+i ) / ( dsigma( i )-&
                           dsigma( j ) ) / ( dsigma( i )+dsigma( j ) )
              end do
           end do
           ! compute updated z.
           do i = 1, k
              z( i ) = sign( sqrt( abs( work( iwk3i+i ) ) ), z( i ) )
           end do
           ! update vf and vl.
           do j = 1, k
              diflj = difl( j )
              dj = d( j )
              dsigj = -dsigma( j )
              if( j<k ) then
                 difrj = -difr( j, 1_${ik}$ )
                 dsigjp = -dsigma( j+1 )
              end if
              work( j ) = -z( j ) / diflj / ( dsigma( j )+dj )
              do i = 1, j - 1
                 work( i ) = z( i ) / ( stdlib${ii}$_slamc3( dsigma( i ), dsigj )-diflj )/ ( dsigma( i )&
                           +dj )
              end do
              do i = j + 1, k
                 work( i ) = z( i ) / ( stdlib${ii}$_slamc3( dsigma( i ), dsigjp )+difrj )/ ( dsigma( i &
                           )+dj )
              end do
              temp = stdlib${ii}$_snrm2( k, work, 1_${ik}$ )
              work( iwk2i+j ) = stdlib${ii}$_sdot( k, work, 1_${ik}$, vf, 1_${ik}$ ) / temp
              work( iwk3i+j ) = stdlib${ii}$_sdot( k, work, 1_${ik}$, vl, 1_${ik}$ ) / temp
              if( icompq==1_${ik}$ ) then
                 difr( j, 2_${ik}$ ) = temp
              end if
           end do
           call stdlib${ii}$_scopy( k, work( iwk2 ), 1_${ik}$, vf, 1_${ik}$ )
           call stdlib${ii}$_scopy( k, work( iwk3 ), 1_${ik}$, vl, 1_${ik}$ )
           return
     end subroutine stdlib${ii}$_slasd8

     pure module subroutine stdlib${ii}$_dlasd8( icompq, k, d, z, vf, vl, difl, difr, lddifr,dsigma, work, &
     !! DLASD8 finds the square roots of the roots of the secular equation,
     !! as defined by the values in DSIGMA and Z. It makes the appropriate
     !! calls to DLASD4, and stores, for each  element in D, the distance
     !! to its two nearest poles (elements in DSIGMA). It also updates
     !! the arrays VF and VL, the first and last components of all the
     !! right singular vectors of the original bidiagonal matrix.
     !! DLASD8 is called from DLASD6.
               info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: icompq, k, lddifr
           integer(${ik}$), intent(out) :: info
           ! Array Arguments 
           real(dp), intent(out) :: d(*), difl(*), difr(lddifr,*), work(*)
           real(dp), intent(inout) :: dsigma(*), vf(*), vl(*), z(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, iwk1, iwk2, iwk2i, iwk3, iwk3i, j
           real(dp) :: diflj, difrj, dj, dsigj, dsigjp, rho, temp
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then
              info = -1_${ik}$
           else if( k<1_${ik}$ ) then
              info = -2_${ik}$
           else if( lddifr<k ) then
              info = -9_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD8', -info )
              return
           end if
           ! quick return if possible
           if( k==1_${ik}$ ) then
              d( 1_${ik}$ ) = abs( z( 1_${ik}$ ) )
              difl( 1_${ik}$ ) = d( 1_${ik}$ )
              if( icompq==1_${ik}$ ) then
                 difl( 2_${ik}$ ) = one
                 difr( 1_${ik}$, 2_${ik}$ ) = one
              end if
              return
           end if
           ! modify values dsigma(i) to make sure all dsigma(i)-dsigma(j) can
           ! be computed with high relative accuracy (barring over/underflow).
           ! this is a problem on machines without a guard digit in
           ! add/subtract (cray xmp, cray ymp, cray c 90 and cray 2).
           ! the following code replaces dsigma(i) by 2*dsigma(i)-dsigma(i),
           ! which on any of these machines zeros out the bottommost
           ! bit of dsigma(i) if it is 1; this makes the subsequent
           ! subtractions dsigma(i)-dsigma(j) unproblematic when cancellation
           ! occurs. on binary machines with a guard digit (almost all
           ! machines) it does not change dsigma(i) at all. on hexadecimal
           ! and decimal machines with a guard digit, it slightly
           ! changes the bottommost bits of dsigma(i). it does not account
           ! for hexadecimal or decimal machines without guard digits
           ! (we know of none). we use a subroutine call to compute
           ! 2*dlambda(i) to prevent optimizing compilers from eliminating
           ! this code.
           do i = 1, k
              dsigma( i ) = stdlib${ii}$_dlamc3( dsigma( i ), dsigma( i ) ) - dsigma( i )
           end do
           ! book keeping.
           iwk1 = 1_${ik}$
           iwk2 = iwk1 + k
           iwk3 = iwk2 + k
           iwk2i = iwk2 - 1_${ik}$
           iwk3i = iwk3 - 1_${ik}$
           ! normalize z.
           rho = stdlib${ii}$_dnrm2( k, z, 1_${ik}$ )
           call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, rho, one, k, 1_${ik}$, z, k, info )
           rho = rho*rho
           ! initialize work(iwk3).
           call stdlib${ii}$_dlaset( 'A', k, 1_${ik}$, one, one, work( iwk3 ), k )
           ! compute the updated singular values, the arrays difl, difr,
           ! and the updated z.
           do j = 1, k
              call stdlib${ii}$_dlasd4( k, j, dsigma, z, work( iwk1 ), rho, d( j ),work( iwk2 ), info )
                        
              ! if the root finder fails, report the convergence failure.
              if( info/=0_${ik}$ ) then
                 return
              end if
              work( iwk3i+j ) = work( iwk3i+j )*work( j )*work( iwk2i+j )
              difl( j ) = -work( j )
              difr( j, 1_${ik}$ ) = -work( j+1 )
              do i = 1, j - 1
                 work( iwk3i+i ) = work( iwk3i+i )*work( i )*work( iwk2i+i ) / ( dsigma( i )-&
                           dsigma( j ) ) / ( dsigma( i )+dsigma( j ) )
              end do
              do i = j + 1, k
                 work( iwk3i+i ) = work( iwk3i+i )*work( i )*work( iwk2i+i ) / ( dsigma( i )-&
                           dsigma( j ) ) / ( dsigma( i )+dsigma( j ) )
              end do
           end do
           ! compute updated z.
           do i = 1, k
              z( i ) = sign( sqrt( abs( work( iwk3i+i ) ) ), z( i ) )
           end do
           ! update vf and vl.
           do j = 1, k
              diflj = difl( j )
              dj = d( j )
              dsigj = -dsigma( j )
              if( j<k ) then
                 difrj = -difr( j, 1_${ik}$ )
                 dsigjp = -dsigma( j+1 )
              end if
              work( j ) = -z( j ) / diflj / ( dsigma( j )+dj )
              do i = 1, j - 1
                 work( i ) = z( i ) / ( stdlib${ii}$_dlamc3( dsigma( i ), dsigj )-diflj )/ ( dsigma( i )&
                           +dj )
              end do
              do i = j + 1, k
                 work( i ) = z( i ) / ( stdlib${ii}$_dlamc3( dsigma( i ), dsigjp )+difrj )/ ( dsigma( i &
                           )+dj )
              end do
              temp = stdlib${ii}$_dnrm2( k, work, 1_${ik}$ )
              work( iwk2i+j ) = stdlib${ii}$_ddot( k, work, 1_${ik}$, vf, 1_${ik}$ ) / temp
              work( iwk3i+j ) = stdlib${ii}$_ddot( k, work, 1_${ik}$, vl, 1_${ik}$ ) / temp
              if( icompq==1_${ik}$ ) then
                 difr( j, 2_${ik}$ ) = temp
              end if
           end do
           call stdlib${ii}$_dcopy( k, work( iwk2 ), 1_${ik}$, vf, 1_${ik}$ )
           call stdlib${ii}$_dcopy( k, work( iwk3 ), 1_${ik}$, vl, 1_${ik}$ )
           return
     end subroutine stdlib${ii}$_dlasd8

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lasd8( icompq, k, d, z, vf, vl, difl, difr, lddifr,dsigma, work, &
     !! DLASD8: finds the square roots of the roots of the secular equation,
     !! as defined by the values in DSIGMA and Z. It makes the appropriate
     !! calls to DLASD4, and stores, for each  element in D, the distance
     !! to its two nearest poles (elements in DSIGMA). It also updates
     !! the arrays VF and VL, the first and last components of all the
     !! right singular vectors of the original bidiagonal matrix.
     !! DLASD8 is called from DLASD6.
               info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: icompq, k, lddifr
           integer(${ik}$), intent(out) :: info
           ! Array Arguments 
           real(${rk}$), intent(out) :: d(*), difl(*), difr(lddifr,*), work(*)
           real(${rk}$), intent(inout) :: dsigma(*), vf(*), vl(*), z(*)
        ! =====================================================================
           
           ! Local Scalars 
           integer(${ik}$) :: i, iwk1, iwk2, iwk2i, iwk3, iwk3i, j
           real(${rk}$) :: diflj, difrj, dj, dsigj, dsigjp, rho, temp
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input parameters.
           info = 0_${ik}$
           if( ( icompq<0_${ik}$ ) .or. ( icompq>1_${ik}$ ) ) then
              info = -1_${ik}$
           else if( k<1_${ik}$ ) then
              info = -2_${ik}$
           else if( lddifr<k ) then
              info = -9_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLASD8', -info )
              return
           end if
           ! quick return if possible
           if( k==1_${ik}$ ) then
              d( 1_${ik}$ ) = abs( z( 1_${ik}$ ) )
              difl( 1_${ik}$ ) = d( 1_${ik}$ )
              if( icompq==1_${ik}$ ) then
                 difl( 2_${ik}$ ) = one
                 difr( 1_${ik}$, 2_${ik}$ ) = one
              end if
              return
           end if
           ! modify values dsigma(i) to make sure all dsigma(i)-dsigma(j) can
           ! be computed with high relative accuracy (barring over/underflow).
           ! this is a problem on machines without a guard digit in
           ! add/subtract (cray xmp, cray ymp, cray c 90 and cray 2).
           ! the following code replaces dsigma(i) by 2*dsigma(i)-dsigma(i),
           ! which on any of these machines zeros out the bottommost
           ! bit of dsigma(i) if it is 1; this makes the subsequent
           ! subtractions dsigma(i)-dsigma(j) unproblematic when cancellation
           ! occurs. on binary machines with a guard digit (almost all
           ! machines) it does not change dsigma(i) at all. on hexadecimal
           ! and decimal machines with a guard digit, it slightly
           ! changes the bottommost bits of dsigma(i). it does not account
           ! for hexadecimal or decimal machines without guard digits
           ! (we know of none). we use a subroutine call to compute
           ! 2*dlambda(i) to prevent optimizing compilers from eliminating
           ! this code.
           do i = 1, k
              dsigma( i ) = stdlib${ii}$_${ri}$lamc3( dsigma( i ), dsigma( i ) ) - dsigma( i )
           end do
           ! book keeping.
           iwk1 = 1_${ik}$
           iwk2 = iwk1 + k
           iwk3 = iwk2 + k
           iwk2i = iwk2 - 1_${ik}$
           iwk3i = iwk3 - 1_${ik}$
           ! normalize z.
           rho = stdlib${ii}$_${ri}$nrm2( k, z, 1_${ik}$ )
           call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, rho, one, k, 1_${ik}$, z, k, info )
           rho = rho*rho
           ! initialize work(iwk3).
           call stdlib${ii}$_${ri}$laset( 'A', k, 1_${ik}$, one, one, work( iwk3 ), k )
           ! compute the updated singular values, the arrays difl, difr,
           ! and the updated z.
           do j = 1, k
              call stdlib${ii}$_${ri}$lasd4( k, j, dsigma, z, work( iwk1 ), rho, d( j ),work( iwk2 ), info )
                        
              ! if the root finder fails, report the convergence failure.
              if( info/=0_${ik}$ ) then
                 return
              end if
              work( iwk3i+j ) = work( iwk3i+j )*work( j )*work( iwk2i+j )
              difl( j ) = -work( j )
              difr( j, 1_${ik}$ ) = -work( j+1 )
              do i = 1, j - 1
                 work( iwk3i+i ) = work( iwk3i+i )*work( i )*work( iwk2i+i ) / ( dsigma( i )-&
                           dsigma( j ) ) / ( dsigma( i )+dsigma( j ) )
              end do
              do i = j + 1, k
                 work( iwk3i+i ) = work( iwk3i+i )*work( i )*work( iwk2i+i ) / ( dsigma( i )-&
                           dsigma( j ) ) / ( dsigma( i )+dsigma( j ) )
              end do
           end do
           ! compute updated z.
           do i = 1, k
              z( i ) = sign( sqrt( abs( work( iwk3i+i ) ) ), z( i ) )
           end do
           ! update vf and vl.
           do j = 1, k
              diflj = difl( j )
              dj = d( j )
              dsigj = -dsigma( j )
              if( j<k ) then
                 difrj = -difr( j, 1_${ik}$ )
                 dsigjp = -dsigma( j+1 )
              end if
              work( j ) = -z( j ) / diflj / ( dsigma( j )+dj )
              do i = 1, j - 1
                 work( i ) = z( i ) / ( stdlib${ii}$_${ri}$lamc3( dsigma( i ), dsigj )-diflj )/ ( dsigma( i )&
                           +dj )
              end do
              do i = j + 1, k
                 work( i ) = z( i ) / ( stdlib${ii}$_${ri}$lamc3( dsigma( i ), dsigjp )+difrj )/ ( dsigma( i &
                           )+dj )
              end do
              temp = stdlib${ii}$_${ri}$nrm2( k, work, 1_${ik}$ )
              work( iwk2i+j ) = stdlib${ii}$_${ri}$dot( k, work, 1_${ik}$, vf, 1_${ik}$ ) / temp
              work( iwk3i+j ) = stdlib${ii}$_${ri}$dot( k, work, 1_${ik}$, vl, 1_${ik}$ ) / temp
              if( icompq==1_${ik}$ ) then
                 difr( j, 2_${ik}$ ) = temp
              end if
           end do
           call stdlib${ii}$_${ri}$copy( k, work( iwk2 ), 1_${ik}$, vf, 1_${ik}$ )
           call stdlib${ii}$_${ri}$copy( k, work( iwk3 ), 1_${ik}$, vl, 1_${ik}$ )
           return
     end subroutine stdlib${ii}$_${ri}$lasd8

#:endif
#:endfor


#:endfor
end submodule stdlib_lapack_eigv_svd_bidiag_dc